• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- MemCheck: Maintain bitmaps of memory, tracking the           ---*/
4 /*--- accessibility (A) and validity (V) status of each byte.      ---*/
5 /*---                                                    mc_main.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of MemCheck, a heavyweight Valgrind tool for
10    detecting memory errors.
11 
12    Copyright (C) 2000-2013 Julian Seward
13       jseward@acm.org
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 
33 #include "pub_tool_basics.h"
34 #include "pub_tool_aspacemgr.h"
35 #include "pub_tool_gdbserver.h"
36 #include "pub_tool_poolalloc.h"
37 #include "pub_tool_hashtable.h"     // For mc_include.h
38 #include "pub_tool_libcbase.h"
39 #include "pub_tool_libcassert.h"
40 #include "pub_tool_libcprint.h"
41 #include "pub_tool_machine.h"
42 #include "pub_tool_mallocfree.h"
43 #include "pub_tool_options.h"
44 #include "pub_tool_oset.h"
45 #include "pub_tool_rangemap.h"
46 #include "pub_tool_replacemalloc.h"
47 #include "pub_tool_tooliface.h"
48 #include "pub_tool_threadstate.h"
49 
50 #include "mc_include.h"
51 #include "memcheck.h"   /* for client requests */
52 
53 
54 /* Set to 1 to do a little more sanity checking */
55 #define VG_DEBUG_MEMORY 0
56 
57 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
58 
59 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
60 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
61 
62 
63 /*------------------------------------------------------------*/
64 /*--- Fast-case knobs                                      ---*/
65 /*------------------------------------------------------------*/
66 
67 // Comment these out to disable the fast cases (don't just set them to zero).
68 
69 #define PERF_FAST_LOADV    1
70 #define PERF_FAST_STOREV   1
71 
72 #define PERF_FAST_SARP     1
73 
74 #define PERF_FAST_STACK    1
75 #define PERF_FAST_STACK2   1
76 
77 /* Change this to 1 to enable assertions on origin tracking cache fast
78    paths */
79 #define OC_ENABLE_ASSERTIONS 0
80 
81 
82 /*------------------------------------------------------------*/
83 /*--- Comments on the origin tracking implementation       ---*/
84 /*------------------------------------------------------------*/
85 
86 /* See detailed comment entitled
87    AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
88    which is contained further on in this file. */
89 
90 
91 /*------------------------------------------------------------*/
92 /*--- V bits and A bits                                    ---*/
93 /*------------------------------------------------------------*/
94 
95 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
96    thinks the corresponding value bit is defined.  And every memory byte
97    has an A bit, which tracks whether Memcheck thinks the program can access
98    it safely (ie. it's mapped, and has at least one of the RWX permission bits
99    set).  So every N-bit register is shadowed with N V bits, and every memory
100    byte is shadowed with 8 V bits and one A bit.
101 
102    In the implementation, we use two forms of compression (compressed V bits
103    and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
104    for memory.
105 
106    Memcheck also tracks extra information about each heap block that is
107    allocated, for detecting memory leaks and other purposes.
108 */
109 
110 /*------------------------------------------------------------*/
111 /*--- Basic A/V bitmap representation.                     ---*/
112 /*------------------------------------------------------------*/
113 
114 /* All reads and writes are checked against a memory map (a.k.a. shadow
115    memory), which records the state of all memory in the process.
116 
117    On 32-bit machines the memory map is organised as follows.
118    The top 16 bits of an address are used to index into a top-level
119    map table, containing 65536 entries.  Each entry is a pointer to a
120    second-level map, which records the accesibililty and validity
121    permissions for the 65536 bytes indexed by the lower 16 bits of the
122    address.  Each byte is represented by two bits (details are below).  So
123    each second-level map contains 16384 bytes.  This two-level arrangement
124    conveniently divides the 4G address space into 64k lumps, each size 64k
125    bytes.
126 
127    All entries in the primary (top-level) map must point to a valid
128    secondary (second-level) map.  Since many of the 64kB chunks will
129    have the same status for every bit -- ie. noaccess (for unused
130    address space) or entirely addressable and defined (for code segments) --
131    there are three distinguished secondary maps, which indicate 'noaccess',
132    'undefined' and 'defined'.  For these uniform 64kB chunks, the primary
133    map entry points to the relevant distinguished map.  In practice,
134    typically more than half of the addressable memory is represented with
135    the 'undefined' or 'defined' distinguished secondary map, so it gives a
136    good saving.  It also lets us set the V+A bits of large address regions
137    quickly in set_address_range_perms().
138 
139    On 64-bit machines it's more complicated.  If we followed the same basic
140    scheme we'd have a four-level table which would require too many memory
141    accesses.  So instead the top-level map table has 2^19 entries (indexed
142    using bits 16..34 of the address);  this covers the bottom 32GB.  Any
143    accesses above 32GB are handled with a slow, sparse auxiliary table.
144    Valgrind's address space manager tries very hard to keep things below
145    this 32GB barrier so that performance doesn't suffer too much.
146 
147    Note that this file has a lot of different functions for reading and
148    writing shadow memory.  Only a couple are strictly necessary (eg.
149    get_vabits2 and set_vabits2), most are just specialised for specific
150    common cases to improve performance.
151 
152    Aside: the V+A bits are less precise than they could be -- we have no way
153    of marking memory as read-only.  It would be great if we could add an
154    extra state VA_BITSn_READONLY.  But then we'd have 5 different states,
155    which requires 2.3 bits to hold, and there's no way to do that elegantly
156    -- we'd have to double up to 4 bits of metadata per byte, which doesn't
157    seem worth it.
158 */
159 
160 /* --------------- Basic configuration --------------- */
161 
162 /* Only change this.  N_PRIMARY_MAP *must* be a power of 2. */
163 
164 #if VG_WORDSIZE == 4
165 
166 /* cover the entire address space */
167 #  define N_PRIMARY_BITS  16
168 
169 #else
170 
171 /* Just handle the first 64G fast and the rest via auxiliary
172    primaries.  If you change this, Memcheck will assert at startup.
173    See the definition of UNALIGNED_OR_HIGH for extensive comments. */
174 #  define N_PRIMARY_BITS  20
175 
176 #endif
177 
178 
179 /* Do not change this. */
180 #define N_PRIMARY_MAP  ( ((UWord)1) << N_PRIMARY_BITS)
181 
182 /* Do not change this. */
183 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
184 
185 
186 /* --------------- Secondary maps --------------- */
187 
188 // Each byte of memory conceptually has an A bit, which indicates its
189 // addressability, and 8 V bits, which indicates its definedness.
190 //
191 // But because very few bytes are partially defined, we can use a nice
192 // compression scheme to reduce the size of shadow memory.  Each byte of
193 // memory has 2 bits which indicates its state (ie. V+A bits):
194 //
195 //   00:  noaccess    (unaddressable but treated as fully defined)
196 //   01:  undefined   (addressable and fully undefined)
197 //   10:  defined     (addressable and fully defined)
198 //   11:  partdefined (addressable and partially defined)
199 //
200 // In the "partdefined" case, we use a secondary table to store the V bits.
201 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
202 // bits.
203 //
204 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
205 // four bytes (32 bits) of memory are in each chunk.  Hence the name
206 // "vabits8".  This lets us get the V+A bits for four bytes at a time
207 // easily (without having to do any shifting and/or masking), and that is a
208 // very common operation.  (Note that although each vabits8 chunk
209 // is 8 bits in size, it represents 32 bits of memory.)
210 //
211 // The representation is "inverse" little-endian... each 4 bytes of
212 // memory is represented by a 1 byte value, where:
213 //
214 // - the status of byte (a+0) is held in bits [1..0]
215 // - the status of byte (a+1) is held in bits [3..2]
216 // - the status of byte (a+2) is held in bits [5..4]
217 // - the status of byte (a+3) is held in bits [7..6]
218 //
219 // It's "inverse" because endianness normally describes a mapping from
220 // value bits to memory addresses;  in this case the mapping is inverted.
221 // Ie. instead of particular value bits being held in certain addresses, in
222 // this case certain addresses are represented by particular value bits.
223 // See insert_vabits2_into_vabits8() for an example.
224 //
225 // But note that we don't compress the V bits stored in registers;  they
226 // need to be explicit to made the shadow operations possible.  Therefore
227 // when moving values between registers and memory we need to convert
228 // between the expanded in-register format and the compressed in-memory
229 // format.  This isn't so difficult, it just requires careful attention in a
230 // few places.
231 
232 // These represent eight bits of memory.
233 #define VA_BITS2_NOACCESS     0x0      // 00b
234 #define VA_BITS2_UNDEFINED    0x1      // 01b
235 #define VA_BITS2_DEFINED      0x2      // 10b
236 #define VA_BITS2_PARTDEFINED  0x3      // 11b
237 
238 // These represent 16 bits of memory.
239 #define VA_BITS4_NOACCESS     0x0      // 00_00b
240 #define VA_BITS4_UNDEFINED    0x5      // 01_01b
241 #define VA_BITS4_DEFINED      0xa      // 10_10b
242 
243 // These represent 32 bits of memory.
244 #define VA_BITS8_NOACCESS     0x00     // 00_00_00_00b
245 #define VA_BITS8_UNDEFINED    0x55     // 01_01_01_01b
246 #define VA_BITS8_DEFINED      0xaa     // 10_10_10_10b
247 
248 // These represent 64 bits of memory.
249 #define VA_BITS16_NOACCESS    0x0000   // 00_00_00_00b x 2
250 #define VA_BITS16_UNDEFINED   0x5555   // 01_01_01_01b x 2
251 #define VA_BITS16_DEFINED     0xaaaa   // 10_10_10_10b x 2
252 
253 
254 #define SM_CHUNKS             16384
255 #define SM_OFF(aaa)           (((aaa) & 0xffff) >> 2)
256 #define SM_OFF_16(aaa)        (((aaa) & 0xffff) >> 3)
257 
258 // Paranoia:  it's critical for performance that the requested inlining
259 // occurs.  So try extra hard.
260 #define INLINE    inline __attribute__((always_inline))
261 
start_of_this_sm(Addr a)262 static INLINE Addr start_of_this_sm ( Addr a ) {
263    return (a & (~SM_MASK));
264 }
is_start_of_sm(Addr a)265 static INLINE Bool is_start_of_sm ( Addr a ) {
266    return (start_of_this_sm(a) == a);
267 }
268 
269 typedef
270    struct {
271       UChar vabits8[SM_CHUNKS];
272    }
273    SecMap;
274 
275 // 3 distinguished secondary maps, one for no-access, one for
276 // accessible but undefined, and one for accessible and defined.
277 // Distinguished secondaries may never be modified.
278 #define SM_DIST_NOACCESS   0
279 #define SM_DIST_UNDEFINED  1
280 #define SM_DIST_DEFINED    2
281 
282 static SecMap sm_distinguished[3];
283 
is_distinguished_sm(SecMap * sm)284 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
285    return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
286 }
287 
288 // Forward declaration
289 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
290 
291 /* dist_sm points to one of our three distinguished secondaries.  Make
292    a copy of it so that we can write to it.
293 */
copy_for_writing(SecMap * dist_sm)294 static SecMap* copy_for_writing ( SecMap* dist_sm )
295 {
296    SecMap* new_sm;
297    tl_assert(dist_sm == &sm_distinguished[0]
298           || dist_sm == &sm_distinguished[1]
299           || dist_sm == &sm_distinguished[2]);
300 
301    new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
302    if (new_sm == NULL)
303       VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
304                                    sizeof(SecMap) );
305    VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
306    update_SM_counts(dist_sm, new_sm);
307    return new_sm;
308 }
309 
310 /* --------------- Stats --------------- */
311 
312 static Int   n_issued_SMs      = 0;
313 static Int   n_deissued_SMs    = 0;
314 static Int   n_noaccess_SMs    = N_PRIMARY_MAP; // start with many noaccess DSMs
315 static Int   n_undefined_SMs   = 0;
316 static Int   n_defined_SMs     = 0;
317 static Int   n_non_DSM_SMs     = 0;
318 static Int   max_noaccess_SMs  = 0;
319 static Int   max_undefined_SMs = 0;
320 static Int   max_defined_SMs   = 0;
321 static Int   max_non_DSM_SMs   = 0;
322 
323 /* # searches initiated in auxmap_L1, and # base cmps required */
324 static ULong n_auxmap_L1_searches  = 0;
325 static ULong n_auxmap_L1_cmps      = 0;
326 /* # of searches that missed in auxmap_L1 and therefore had to
327    be handed to auxmap_L2. And the number of nodes inserted. */
328 static ULong n_auxmap_L2_searches  = 0;
329 static ULong n_auxmap_L2_nodes     = 0;
330 
331 static Int   n_sanity_cheap     = 0;
332 static Int   n_sanity_expensive = 0;
333 
334 static Int   n_secVBit_nodes   = 0;
335 static Int   max_secVBit_nodes = 0;
336 
update_SM_counts(SecMap * oldSM,SecMap * newSM)337 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
338 {
339    if      (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
340    else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
341    else if (oldSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  --;
342    else                                                  { n_non_DSM_SMs  --;
343                                                            n_deissued_SMs ++; }
344 
345    if      (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
346    else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
347    else if (newSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  ++;
348    else                                                  { n_non_DSM_SMs  ++;
349                                                            n_issued_SMs   ++; }
350 
351    if (n_noaccess_SMs  > max_noaccess_SMs ) max_noaccess_SMs  = n_noaccess_SMs;
352    if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
353    if (n_defined_SMs   > max_defined_SMs  ) max_defined_SMs   = n_defined_SMs;
354    if (n_non_DSM_SMs   > max_non_DSM_SMs  ) max_non_DSM_SMs   = n_non_DSM_SMs;
355 }
356 
357 /* --------------- Primary maps --------------- */
358 
359 /* The main primary map.  This covers some initial part of the address
360    space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
361    handled using the auxiliary primary map.
362 */
363 static SecMap* primary_map[N_PRIMARY_MAP];
364 
365 
366 /* An entry in the auxiliary primary map.  base must be a 64k-aligned
367    value, and sm points at the relevant secondary map.  As with the
368    main primary map, the secondary may be either a real secondary, or
369    one of the three distinguished secondaries.  DO NOT CHANGE THIS
370    LAYOUT: the first word has to be the key for OSet fast lookups.
371 */
372 typedef
373    struct {
374       Addr    base;
375       SecMap* sm;
376    }
377    AuxMapEnt;
378 
379 /* Tunable parameter: How big is the L1 queue? */
380 #define N_AUXMAP_L1 24
381 
382 /* Tunable parameter: How far along the L1 queue to insert
383    entries resulting from L2 lookups? */
384 #define AUXMAP_L1_INSERT_IX 12
385 
386 static struct {
387           Addr       base;
388           AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
389        }
390        auxmap_L1[N_AUXMAP_L1];
391 
392 static OSet* auxmap_L2 = NULL;
393 
init_auxmap_L1_L2(void)394 static void init_auxmap_L1_L2 ( void )
395 {
396    Int i;
397    for (i = 0; i < N_AUXMAP_L1; i++) {
398       auxmap_L1[i].base = 0;
399       auxmap_L1[i].ent  = NULL;
400    }
401 
402    tl_assert(0 == offsetof(AuxMapEnt,base));
403    tl_assert(sizeof(Addr) == sizeof(void*));
404    auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
405                                     /*fastCmp*/ NULL,
406                                     VG_(malloc), "mc.iaLL.1", VG_(free) );
407 }
408 
409 /* Check representation invariants; if OK return NULL; else a
410    descriptive bit of text.  Also return the number of
411    non-distinguished secondary maps referred to from the auxiliary
412    primary maps. */
413 
check_auxmap_L1_L2_sanity(Word * n_secmaps_found)414 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
415 {
416    Word i, j;
417    /* On a 32-bit platform, the L2 and L1 tables should
418       both remain empty forever.
419 
420       On a 64-bit platform:
421       In the L2 table:
422        all .base & 0xFFFF == 0
423        all .base > MAX_PRIMARY_ADDRESS
424       In the L1 table:
425        all .base & 0xFFFF == 0
426        all (.base > MAX_PRIMARY_ADDRESS
427             .base & 0xFFFF == 0
428             and .ent points to an AuxMapEnt with the same .base)
429            or
430            (.base == 0 and .ent == NULL)
431    */
432    *n_secmaps_found = 0;
433    if (sizeof(void*) == 4) {
434       /* 32-bit platform */
435       if (VG_(OSetGen_Size)(auxmap_L2) != 0)
436          return "32-bit: auxmap_L2 is non-empty";
437       for (i = 0; i < N_AUXMAP_L1; i++)
438         if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
439       return "32-bit: auxmap_L1 is non-empty";
440    } else {
441       /* 64-bit platform */
442       UWord elems_seen = 0;
443       AuxMapEnt *elem, *res;
444       AuxMapEnt key;
445       /* L2 table */
446       VG_(OSetGen_ResetIter)(auxmap_L2);
447       while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
448          elems_seen++;
449          if (0 != (elem->base & (Addr)0xFFFF))
450             return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
451          if (elem->base <= MAX_PRIMARY_ADDRESS)
452             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
453          if (elem->sm == NULL)
454             return "64-bit: .sm in _L2 is NULL";
455          if (!is_distinguished_sm(elem->sm))
456             (*n_secmaps_found)++;
457       }
458       if (elems_seen != n_auxmap_L2_nodes)
459          return "64-bit: disagreement on number of elems in _L2";
460       /* Check L1-L2 correspondence */
461       for (i = 0; i < N_AUXMAP_L1; i++) {
462          if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
463             continue;
464          if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
465             return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
466          if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
467             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
468          if (auxmap_L1[i].ent == NULL)
469             return "64-bit: .ent is NULL in auxmap_L1";
470          if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
471             return "64-bit: _L1 and _L2 bases are inconsistent";
472          /* Look it up in auxmap_L2. */
473          key.base = auxmap_L1[i].base;
474          key.sm   = 0;
475          res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
476          if (res == NULL)
477             return "64-bit: _L1 .base not found in _L2";
478          if (res != auxmap_L1[i].ent)
479             return "64-bit: _L1 .ent disagrees with _L2 entry";
480       }
481       /* Check L1 contains no duplicates */
482       for (i = 0; i < N_AUXMAP_L1; i++) {
483          if (auxmap_L1[i].base == 0)
484             continue;
485 	 for (j = i+1; j < N_AUXMAP_L1; j++) {
486             if (auxmap_L1[j].base == 0)
487                continue;
488             if (auxmap_L1[j].base == auxmap_L1[i].base)
489                return "64-bit: duplicate _L1 .base entries";
490          }
491       }
492    }
493    return NULL; /* ok */
494 }
495 
insert_into_auxmap_L1_at(Word rank,AuxMapEnt * ent)496 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
497 {
498    Word i;
499    tl_assert(ent);
500    tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
501    for (i = N_AUXMAP_L1-1; i > rank; i--)
502       auxmap_L1[i] = auxmap_L1[i-1];
503    auxmap_L1[rank].base = ent->base;
504    auxmap_L1[rank].ent  = ent;
505 }
506 
maybe_find_in_auxmap(Addr a)507 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
508 {
509    AuxMapEnt  key;
510    AuxMapEnt* res;
511    Word       i;
512 
513    tl_assert(a > MAX_PRIMARY_ADDRESS);
514    a &= ~(Addr)0xFFFF;
515 
516    /* First search the front-cache, which is a self-organising
517       list containing the most popular entries. */
518 
519    if (LIKELY(auxmap_L1[0].base == a))
520       return auxmap_L1[0].ent;
521    if (LIKELY(auxmap_L1[1].base == a)) {
522       Addr       t_base = auxmap_L1[0].base;
523       AuxMapEnt* t_ent  = auxmap_L1[0].ent;
524       auxmap_L1[0].base = auxmap_L1[1].base;
525       auxmap_L1[0].ent  = auxmap_L1[1].ent;
526       auxmap_L1[1].base = t_base;
527       auxmap_L1[1].ent  = t_ent;
528       return auxmap_L1[0].ent;
529    }
530 
531    n_auxmap_L1_searches++;
532 
533    for (i = 0; i < N_AUXMAP_L1; i++) {
534       if (auxmap_L1[i].base == a) {
535          break;
536       }
537    }
538    tl_assert(i >= 0 && i <= N_AUXMAP_L1);
539 
540    n_auxmap_L1_cmps += (ULong)(i+1);
541 
542    if (i < N_AUXMAP_L1) {
543       if (i > 0) {
544          Addr       t_base = auxmap_L1[i-1].base;
545          AuxMapEnt* t_ent  = auxmap_L1[i-1].ent;
546          auxmap_L1[i-1].base = auxmap_L1[i-0].base;
547          auxmap_L1[i-1].ent  = auxmap_L1[i-0].ent;
548          auxmap_L1[i-0].base = t_base;
549          auxmap_L1[i-0].ent  = t_ent;
550          i--;
551       }
552       return auxmap_L1[i].ent;
553    }
554 
555    n_auxmap_L2_searches++;
556 
557    /* First see if we already have it. */
558    key.base = a;
559    key.sm   = 0;
560 
561    res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
562    if (res)
563       insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
564    return res;
565 }
566 
find_or_alloc_in_auxmap(Addr a)567 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
568 {
569    AuxMapEnt *nyu, *res;
570 
571    /* First see if we already have it. */
572    res = maybe_find_in_auxmap( a );
573    if (LIKELY(res))
574       return res;
575 
576    /* Ok, there's no entry in the secondary map, so we'll have
577       to allocate one. */
578    a &= ~(Addr)0xFFFF;
579 
580    nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
581    tl_assert(nyu);
582    nyu->base = a;
583    nyu->sm   = &sm_distinguished[SM_DIST_NOACCESS];
584    VG_(OSetGen_Insert)( auxmap_L2, nyu );
585    insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
586    n_auxmap_L2_nodes++;
587    return nyu;
588 }
589 
590 /* --------------- SecMap fundamentals --------------- */
591 
592 // In all these, 'low' means it's definitely in the main primary map,
593 // 'high' means it's definitely in the auxiliary table.
594 
get_secmap_low_ptr(Addr a)595 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
596 {
597    UWord pm_off = a >> 16;
598 #  if VG_DEBUG_MEMORY >= 1
599    tl_assert(pm_off < N_PRIMARY_MAP);
600 #  endif
601    return &primary_map[ pm_off ];
602 }
603 
get_secmap_high_ptr(Addr a)604 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
605 {
606    AuxMapEnt* am = find_or_alloc_in_auxmap(a);
607    return &am->sm;
608 }
609 
get_secmap_ptr(Addr a)610 static INLINE SecMap** get_secmap_ptr ( Addr a )
611 {
612    return ( a <= MAX_PRIMARY_ADDRESS
613           ? get_secmap_low_ptr(a)
614           : get_secmap_high_ptr(a));
615 }
616 
get_secmap_for_reading_low(Addr a)617 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
618 {
619    return *get_secmap_low_ptr(a);
620 }
621 
get_secmap_for_reading_high(Addr a)622 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
623 {
624    return *get_secmap_high_ptr(a);
625 }
626 
get_secmap_for_writing_low(Addr a)627 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
628 {
629    SecMap** p = get_secmap_low_ptr(a);
630    if (UNLIKELY(is_distinguished_sm(*p)))
631       *p = copy_for_writing(*p);
632    return *p;
633 }
634 
get_secmap_for_writing_high(Addr a)635 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
636 {
637    SecMap** p = get_secmap_high_ptr(a);
638    if (UNLIKELY(is_distinguished_sm(*p)))
639       *p = copy_for_writing(*p);
640    return *p;
641 }
642 
643 /* Produce the secmap for 'a', either from the primary map or by
644    ensuring there is an entry for it in the aux primary map.  The
645    secmap may be a distinguished one as the caller will only want to
646    be able to read it.
647 */
get_secmap_for_reading(Addr a)648 static INLINE SecMap* get_secmap_for_reading ( Addr a )
649 {
650    return ( a <= MAX_PRIMARY_ADDRESS
651           ? get_secmap_for_reading_low (a)
652           : get_secmap_for_reading_high(a) );
653 }
654 
655 /* Produce the secmap for 'a', either from the primary map or by
656    ensuring there is an entry for it in the aux primary map.  The
657    secmap may not be a distinguished one, since the caller will want
658    to be able to write it.  If it is a distinguished secondary, make a
659    writable copy of it, install it, and return the copy instead.  (COW
660    semantics).
661 */
get_secmap_for_writing(Addr a)662 static INLINE SecMap* get_secmap_for_writing ( Addr a )
663 {
664    return ( a <= MAX_PRIMARY_ADDRESS
665           ? get_secmap_for_writing_low (a)
666           : get_secmap_for_writing_high(a) );
667 }
668 
669 /* If 'a' has a SecMap, produce it.  Else produce NULL.  But don't
670    allocate one if one doesn't already exist.  This is used by the
671    leak checker.
672 */
maybe_get_secmap_for(Addr a)673 static SecMap* maybe_get_secmap_for ( Addr a )
674 {
675    if (a <= MAX_PRIMARY_ADDRESS) {
676       return get_secmap_for_reading_low(a);
677    } else {
678       AuxMapEnt* am = maybe_find_in_auxmap(a);
679       return am ? am->sm : NULL;
680    }
681 }
682 
683 /* --------------- Fundamental functions --------------- */
684 
685 static INLINE
insert_vabits2_into_vabits8(Addr a,UChar vabits2,UChar * vabits8)686 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
687 {
688    UInt shift =  (a & 3)  << 1;        // shift by 0, 2, 4, or 6
689    *vabits8  &= ~(0x3     << shift);   // mask out the two old bits
690    *vabits8  |=  (vabits2 << shift);   // mask  in the two new bits
691 }
692 
693 static INLINE
insert_vabits4_into_vabits8(Addr a,UChar vabits4,UChar * vabits8)694 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
695 {
696    UInt shift;
697    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
698    shift     =  (a & 2)   << 1;        // shift by 0 or 4
699    *vabits8 &= ~(0xf      << shift);   // mask out the four old bits
700    *vabits8 |=  (vabits4 << shift);    // mask  in the four new bits
701 }
702 
703 static INLINE
extract_vabits2_from_vabits8(Addr a,UChar vabits8)704 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
705 {
706    UInt shift = (a & 3) << 1;          // shift by 0, 2, 4, or 6
707    vabits8 >>= shift;                  // shift the two bits to the bottom
708    return 0x3 & vabits8;               // mask out the rest
709 }
710 
711 static INLINE
extract_vabits4_from_vabits8(Addr a,UChar vabits8)712 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
713 {
714    UInt shift;
715    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
716    shift = (a & 2) << 1;               // shift by 0 or 4
717    vabits8 >>= shift;                  // shift the four bits to the bottom
718    return 0xf & vabits8;               // mask out the rest
719 }
720 
721 // Note that these four are only used in slow cases.  The fast cases do
722 // clever things like combine the auxmap check (in
723 // get_secmap_{read,writ}able) with alignment checks.
724 
725 // *** WARNING! ***
726 // Any time this function is called, if it is possible that vabits2
727 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
728 // sec-V-bits table must also be set!
729 static INLINE
set_vabits2(Addr a,UChar vabits2)730 void set_vabits2 ( Addr a, UChar vabits2 )
731 {
732    SecMap* sm       = get_secmap_for_writing(a);
733    UWord   sm_off   = SM_OFF(a);
734    insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
735 }
736 
737 static INLINE
get_vabits2(Addr a)738 UChar get_vabits2 ( Addr a )
739 {
740    SecMap* sm       = get_secmap_for_reading(a);
741    UWord   sm_off   = SM_OFF(a);
742    UChar   vabits8  = sm->vabits8[sm_off];
743    return extract_vabits2_from_vabits8(a, vabits8);
744 }
745 
746 // *** WARNING! ***
747 // Any time this function is called, if it is possible that any of the
748 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
749 // corresponding entry(s) in the sec-V-bits table must also be set!
750 static INLINE
get_vabits8_for_aligned_word32(Addr a)751 UChar get_vabits8_for_aligned_word32 ( Addr a )
752 {
753    SecMap* sm       = get_secmap_for_reading(a);
754    UWord   sm_off   = SM_OFF(a);
755    UChar   vabits8  = sm->vabits8[sm_off];
756    return vabits8;
757 }
758 
759 static INLINE
set_vabits8_for_aligned_word32(Addr a,UChar vabits8)760 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
761 {
762    SecMap* sm       = get_secmap_for_writing(a);
763    UWord   sm_off   = SM_OFF(a);
764    sm->vabits8[sm_off] = vabits8;
765 }
766 
767 
768 // Forward declarations
769 static UWord get_sec_vbits8(Addr a);
770 static void  set_sec_vbits8(Addr a, UWord vbits8);
771 
772 // Returns False if there was an addressability error.
773 static INLINE
set_vbits8(Addr a,UChar vbits8)774 Bool set_vbits8 ( Addr a, UChar vbits8 )
775 {
776    Bool  ok      = True;
777    UChar vabits2 = get_vabits2(a);
778    if ( VA_BITS2_NOACCESS != vabits2 ) {
779       // Addressable.  Convert in-register format to in-memory format.
780       // Also remove any existing sec V bit entry for the byte if no
781       // longer necessary.
782       if      ( V_BITS8_DEFINED   == vbits8 ) { vabits2 = VA_BITS2_DEFINED;   }
783       else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
784       else                                    { vabits2 = VA_BITS2_PARTDEFINED;
785                                                 set_sec_vbits8(a, vbits8);  }
786       set_vabits2(a, vabits2);
787 
788    } else {
789       // Unaddressable!  Do nothing -- when writing to unaddressable
790       // memory it acts as a black hole, and the V bits can never be seen
791       // again.  So we don't have to write them at all.
792       ok = False;
793    }
794    return ok;
795 }
796 
797 // Returns False if there was an addressability error.  In that case, we put
798 // all defined bits into vbits8.
799 static INLINE
get_vbits8(Addr a,UChar * vbits8)800 Bool get_vbits8 ( Addr a, UChar* vbits8 )
801 {
802    Bool  ok      = True;
803    UChar vabits2 = get_vabits2(a);
804 
805    // Convert the in-memory format to in-register format.
806    if      ( VA_BITS2_DEFINED   == vabits2 ) { *vbits8 = V_BITS8_DEFINED;   }
807    else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
808    else if ( VA_BITS2_NOACCESS  == vabits2 ) {
809       *vbits8 = V_BITS8_DEFINED;    // Make V bits defined!
810       ok = False;
811    } else {
812       tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
813       *vbits8 = get_sec_vbits8(a);
814    }
815    return ok;
816 }
817 
818 
819 /* --------------- Secondary V bit table ------------ */
820 
821 // This table holds the full V bit pattern for partially-defined bytes
822 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
823 // memory.
824 //
825 // Note: the nodes in this table can become stale.  Eg. if you write a PDB,
826 // then overwrite the same address with a fully defined byte, the sec-V-bit
827 // node will not necessarily be removed.  This is because checking for
828 // whether removal is necessary would slow down the fast paths.
829 //
830 // To avoid the stale nodes building up too much, we periodically (once the
831 // table reaches a certain size) garbage collect (GC) the table by
832 // traversing it and evicting any nodes not having PDB.
833 // If more than a certain proportion of nodes survived, we increase the
834 // table size so that GCs occur less often.
835 //
836 // This policy is designed to avoid bad table bloat in the worst case where
837 // a program creates huge numbers of stale PDBs -- we would get this bloat
838 // if we had no GC -- while handling well the case where a node becomes
839 // stale but shortly afterwards is rewritten with a PDB and so becomes
840 // non-stale again (which happens quite often, eg. in perf/bz2).  If we just
841 // remove all stale nodes as soon as possible, we just end up re-adding a
842 // lot of them in later again.  The "sufficiently stale" approach avoids
843 // this.  (If a program has many live PDBs, performance will just suck,
844 // there's no way around that.)
845 //
846 // Further comments, JRS 14 Feb 2012.  It turns out that the policy of
847 // holding on to stale entries for 2 GCs before discarding them can lead
848 // to massive space leaks.  So we're changing to an arrangement where
849 // lines are evicted as soon as they are observed to be stale during a
850 // GC.  This also has a side benefit of allowing the sufficiently_stale
851 // field to be removed from the SecVBitNode struct, reducing its size by
852 // 8 bytes, which is a substantial space saving considering that the
853 // struct was previously 32 or so bytes, on a 64 bit target.
854 //
855 // In order to try and mitigate the problem that the "sufficiently stale"
856 // heuristic was designed to avoid, the table size is allowed to drift
857 // up ("DRIFTUP") slowly to 80000, even if the residency is low.  This
858 // means that nodes will exist in the table longer on average, and hopefully
859 // will be deleted and re-added less frequently.
860 //
861 // The previous scaling up mechanism (now called STEPUP) is retained:
862 // if residency exceeds 50%, the table is scaled up, although by a
863 // factor sqrt(2) rather than 2 as before.  This effectively doubles the
864 // frequency of GCs when there are many PDBs at reduces the tendency of
865 // stale PDBs to reside for long periods in the table.
866 
867 static OSet* secVBitTable;
868 
869 // Stats
870 static ULong sec_vbits_new_nodes = 0;
871 static ULong sec_vbits_updates   = 0;
872 
873 // This must be a power of two;  this is checked in mc_pre_clo_init().
874 // The size chosen here is a trade-off:  if the nodes are bigger (ie. cover
875 // a larger address range) they take more space but we can get multiple
876 // partially-defined bytes in one if they are close to each other, reducing
877 // the number of total nodes.  In practice sometimes they are clustered (eg.
878 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
879 // row), but often not.  So we choose something intermediate.
880 #define BYTES_PER_SEC_VBIT_NODE     16
881 
882 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
883 // more than this many nodes survive a GC.
884 #define STEPUP_SURVIVOR_PROPORTION  0.5
885 #define STEPUP_GROWTH_FACTOR        1.414213562
886 
887 // If the above heuristic doesn't apply, then we may make the table
888 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
889 // this many nodes survive a GC, _and_ the total table size does
890 // not exceed a fixed limit.  The numbers are somewhat arbitrary, but
891 // work tolerably well on long Firefox runs.  The scaleup ratio of 1.5%
892 // effectively although gradually reduces residency and increases time
893 // between GCs for programs with small numbers of PDBs.  The 80000 limit
894 // effectively limits the table size to around 2MB for programs with
895 // small numbers of PDBs, whilst giving a reasonably long lifetime to
896 // entries, to try and reduce the costs resulting from deleting and
897 // re-adding of entries.
898 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
899 #define DRIFTUP_GROWTH_FACTOR       1.015
900 #define DRIFTUP_MAX_SIZE            80000
901 
902 // We GC the table when it gets this many nodes in it, ie. it's effectively
903 // the table size.  It can change.
904 static Int  secVBitLimit = 1000;
905 
906 // The number of GCs done, used to age sec-V-bit nodes for eviction.
907 // Because it's unsigned, wrapping doesn't matter -- the right answer will
908 // come out anyway.
909 static UInt GCs_done = 0;
910 
911 typedef
912    struct {
913       Addr  a;
914       UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
915    }
916    SecVBitNode;
917 
createSecVBitTable(void)918 static OSet* createSecVBitTable(void)
919 {
920    OSet* newSecVBitTable;
921    newSecVBitTable = VG_(OSetGen_Create_With_Pool)
922       ( offsetof(SecVBitNode, a),
923         NULL, // use fast comparisons
924         VG_(malloc), "mc.cSVT.1 (sec VBit table)",
925         VG_(free),
926         1000,
927         sizeof(SecVBitNode));
928    return newSecVBitTable;
929 }
930 
gcSecVBitTable(void)931 static void gcSecVBitTable(void)
932 {
933    OSet*        secVBitTable2;
934    SecVBitNode* n;
935    Int          i, n_nodes = 0, n_survivors = 0;
936 
937    GCs_done++;
938 
939    // Create the new table.
940    secVBitTable2 = createSecVBitTable();
941 
942    // Traverse the table, moving fresh nodes into the new table.
943    VG_(OSetGen_ResetIter)(secVBitTable);
944    while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
945       // Keep node if any of its bytes are non-stale.  Using
946       // get_vabits2() for the lookup is not very efficient, but I don't
947       // think it matters.
948       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
949          if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
950             // Found a non-stale byte, so keep =>
951             // Insert a copy of the node into the new table.
952             SecVBitNode* n2 =
953                VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
954             *n2 = *n;
955             VG_(OSetGen_Insert)(secVBitTable2, n2);
956             break;
957          }
958       }
959    }
960 
961    // Get the before and after sizes.
962    n_nodes     = VG_(OSetGen_Size)(secVBitTable);
963    n_survivors = VG_(OSetGen_Size)(secVBitTable2);
964 
965    // Destroy the old table, and put the new one in its place.
966    VG_(OSetGen_Destroy)(secVBitTable);
967    secVBitTable = secVBitTable2;
968 
969    if (VG_(clo_verbosity) > 1) {
970       HChar percbuf[7];
971       VG_(percentify)(n_survivors, n_nodes, 1, 6, percbuf);
972       VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%s)\n",
973                    n_nodes, n_survivors, percbuf);
974    }
975 
976    // Increase table size if necessary.
977    if ((Double)n_survivors
978        > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
979       secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
980       if (VG_(clo_verbosity) > 1)
981          VG_(message)(Vg_DebugMsg,
982                       "memcheck GC: %d new table size (stepup)\n",
983                       secVBitLimit);
984    }
985    else
986    if (secVBitLimit < DRIFTUP_MAX_SIZE
987        && (Double)n_survivors
988           > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
989       secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
990       if (VG_(clo_verbosity) > 1)
991          VG_(message)(Vg_DebugMsg,
992                       "memcheck GC: %d new table size (driftup)\n",
993                       secVBitLimit);
994    }
995 }
996 
get_sec_vbits8(Addr a)997 static UWord get_sec_vbits8(Addr a)
998 {
999    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1000    Int          amod     = a % BYTES_PER_SEC_VBIT_NODE;
1001    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1002    UChar        vbits8;
1003    tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
1004    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1005    // make it to the secondary V bits table.
1006    vbits8 = n->vbits8[amod];
1007    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1008    return vbits8;
1009 }
1010 
set_sec_vbits8(Addr a,UWord vbits8)1011 static void set_sec_vbits8(Addr a, UWord vbits8)
1012 {
1013    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1014    Int          i, amod  = a % BYTES_PER_SEC_VBIT_NODE;
1015    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1016    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1017    // make it to the secondary V bits table.
1018    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1019    if (n) {
1020       n->vbits8[amod] = vbits8;     // update
1021       sec_vbits_updates++;
1022    } else {
1023       // Do a table GC if necessary.  Nb: do this before creating and
1024       // inserting the new node, to avoid erroneously GC'ing the new node.
1025       if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1026          gcSecVBitTable();
1027       }
1028 
1029       // New node:  assign the specific byte, make the rest invalid (they
1030       // should never be read as-is, but be cautious).
1031       n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1032       n->a            = aAligned;
1033       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1034          n->vbits8[i] = V_BITS8_UNDEFINED;
1035       }
1036       n->vbits8[amod] = vbits8;
1037 
1038       // Insert the new node.
1039       VG_(OSetGen_Insert)(secVBitTable, n);
1040       sec_vbits_new_nodes++;
1041 
1042       n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1043       if (n_secVBit_nodes > max_secVBit_nodes)
1044          max_secVBit_nodes = n_secVBit_nodes;
1045    }
1046 }
1047 
1048 /* --------------- Endianness helpers --------------- */
1049 
1050 /* Returns the offset in memory of the byteno-th most significant byte
1051    in a wordszB-sized word, given the specified endianness. */
byte_offset_w(UWord wordszB,Bool bigendian,UWord byteno)1052 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1053                                     UWord byteno ) {
1054    return bigendian ? (wordszB-1-byteno) : byteno;
1055 }
1056 
1057 
1058 /* --------------- Ignored address ranges --------------- */
1059 
1060 /* Denotes the address-error-reportability status for address ranges:
1061    IAR_NotIgnored:  the usual case -- report errors in this range
1062    IAR_CommandLine: don't report errors -- from command line setting
1063    IAR_ClientReq:   don't report errors -- from client request
1064 */
1065 typedef
1066    enum { IAR_INVALID=99,
1067           IAR_NotIgnored,
1068           IAR_CommandLine,
1069           IAR_ClientReq }
1070    IARKind;
1071 
showIARKind(IARKind iark)1072 static const HChar* showIARKind ( IARKind iark )
1073 {
1074    switch (iark) {
1075       case IAR_INVALID:     return "INVALID";
1076       case IAR_NotIgnored:  return "NotIgnored";
1077       case IAR_CommandLine: return "CommandLine";
1078       case IAR_ClientReq:   return "ClientReq";
1079       default:              return "???";
1080    }
1081 }
1082 
1083 // RangeMap<IARKind>
1084 static RangeMap* gIgnoredAddressRanges = NULL;
1085 
init_gIgnoredAddressRanges(void)1086 static void init_gIgnoredAddressRanges ( void )
1087 {
1088    if (LIKELY(gIgnoredAddressRanges != NULL))
1089       return;
1090    gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
1091                                              VG_(free), IAR_NotIgnored );
1092    tl_assert(gIgnoredAddressRanges != NULL);
1093 }
1094 
MC_(in_ignored_range)1095 INLINE Bool MC_(in_ignored_range) ( Addr a )
1096 {
1097    if (LIKELY(gIgnoredAddressRanges == NULL))
1098       return False;
1099    UWord how     = IAR_INVALID;
1100    UWord key_min = ~(UWord)0;
1101    UWord key_max =  (UWord)0;
1102    VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
1103    tl_assert(key_min <= a && a <= key_max);
1104    switch (how) {
1105       case IAR_NotIgnored:  return False;
1106       case IAR_CommandLine: return True;
1107       case IAR_ClientReq:   return True;
1108       default: break; /* invalid */
1109    }
1110    VG_(tool_panic)("MC_(in_ignore_range)");
1111    /*NOTREACHED*/
1112 }
1113 
1114 /* Parse two Addr separated by a dash, or fail. */
1115 
parse_range(const HChar ** ppc,Addr * result1,Addr * result2)1116 static Bool parse_range ( const HChar** ppc, Addr* result1, Addr* result2 )
1117 {
1118    Bool ok = VG_(parse_Addr) (ppc, result1);
1119    if (!ok)
1120       return False;
1121    if (**ppc != '-')
1122       return False;
1123    (*ppc)++;
1124    ok = VG_(parse_Addr) (ppc, result2);
1125    if (!ok)
1126       return False;
1127    return True;
1128 }
1129 
1130 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1131    fail.  If they are valid, add them to the global set of ignored
1132    ranges. */
parse_ignore_ranges(const HChar * str0)1133 static Bool parse_ignore_ranges ( const HChar* str0 )
1134 {
1135    init_gIgnoredAddressRanges();
1136    const HChar*  str = str0;
1137    const HChar** ppc = &str;
1138    while (1) {
1139       Addr start = ~(Addr)0;
1140       Addr end   = (Addr)0;
1141       Bool ok    = parse_range(ppc, &start, &end);
1142       if (!ok)
1143          return False;
1144       if (start > end)
1145          return False;
1146       VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
1147       if (**ppc == 0)
1148          return True;
1149       if (**ppc != ',')
1150          return False;
1151       (*ppc)++;
1152    }
1153    /*NOTREACHED*/
1154    return False;
1155 }
1156 
1157 /* Add or remove [start, +len) from the set of ignored ranges. */
modify_ignore_ranges(Bool addRange,Addr start,Addr len)1158 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
1159 {
1160    init_gIgnoredAddressRanges();
1161    const Bool verbose = (VG_(clo_verbosity) > 1);
1162    if (len == 0) {
1163       return False;
1164    }
1165    if (addRange) {
1166       VG_(bindRangeMap)(gIgnoredAddressRanges,
1167                         start, start+len-1, IAR_ClientReq);
1168       if (verbose)
1169          VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
1170                    (void*)start, (void*)(start+len-1));
1171    } else {
1172       VG_(bindRangeMap)(gIgnoredAddressRanges,
1173                         start, start+len-1, IAR_NotIgnored);
1174       if (verbose)
1175          VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
1176                    (void*)start, (void*)(start+len-1));
1177    }
1178    if (verbose) {
1179       VG_(dmsg)("memcheck:   now have %ld ranges:\n",
1180                 VG_(sizeRangeMap)(gIgnoredAddressRanges));
1181       Word i;
1182       for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
1183          UWord val     = IAR_INVALID;
1184          UWord key_min = ~(UWord)0;
1185          UWord key_max = (UWord)0;
1186          VG_(indexRangeMap)( &key_min, &key_max, &val,
1187                              gIgnoredAddressRanges, i );
1188          VG_(dmsg)("memcheck:      [%ld]  %016llx-%016llx  %s\n",
1189                    i, (ULong)key_min, (ULong)key_max, showIARKind(val));
1190       }
1191    }
1192    return True;
1193 }
1194 
1195 
1196 /* --------------- Load/store slow cases. --------------- */
1197 
1198 static
1199 __attribute__((noinline))
mc_LOADV_128_or_256_slow(ULong * res,Addr a,SizeT nBits,Bool bigendian)1200 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
1201                                 Addr a, SizeT nBits, Bool bigendian )
1202 {
1203    ULong  pessim[4];     /* only used when p-l-ok=yes */
1204    SSizeT szB            = nBits / 8;
1205    SSizeT szL            = szB / 8;  /* Size in Longs (64-bit units) */
1206    SSizeT i, j;          /* Must be signed. */
1207    SizeT  n_addrs_bad = 0;
1208    Addr   ai;
1209    UChar  vbits8;
1210    Bool   ok;
1211 
1212    /* Code below assumes load size is a power of two and at least 64
1213       bits. */
1214    tl_assert((szB & (szB-1)) == 0 && szL > 0);
1215 
1216    /* If this triggers, you probably just need to increase the size of
1217       the pessim array. */
1218    tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
1219 
1220    for (j = 0; j < szL; j++) {
1221       pessim[j] = V_BITS64_DEFINED;
1222       res[j] = V_BITS64_UNDEFINED;
1223    }
1224 
1225    /* Make up a result V word, which contains the loaded data for
1226       valid addresses and Defined for invalid addresses.  Iterate over
1227       the bytes in the word, from the most significant down to the
1228       least.  The vbits to return are calculated into vbits128.  Also
1229       compute the pessimising value to be used when
1230       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
1231       info can be gleaned from the pessim array) but is used as a
1232       cross-check. */
1233    for (j = szL-1; j >= 0; j--) {
1234       ULong vbits64    = V_BITS64_UNDEFINED;
1235       ULong pessim64   = V_BITS64_DEFINED;
1236       UWord long_index = byte_offset_w(szL, bigendian, j);
1237       for (i = 8-1; i >= 0; i--) {
1238          PROF_EVENT(29, "mc_LOADV_128_or_256_slow(loop)");
1239          ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
1240          ok = get_vbits8(ai, &vbits8);
1241          vbits64 <<= 8;
1242          vbits64 |= vbits8;
1243          if (!ok) n_addrs_bad++;
1244          pessim64 <<= 8;
1245          pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1246       }
1247       res[long_index] = vbits64;
1248       pessim[long_index] = pessim64;
1249    }
1250 
1251    /* In the common case, all the addresses involved are valid, so we
1252       just return the computed V bits and have done. */
1253    if (LIKELY(n_addrs_bad == 0))
1254       return;
1255 
1256    /* If there's no possibility of getting a partial-loads-ok
1257       exemption, report the error and quit. */
1258    if (!MC_(clo_partial_loads_ok)) {
1259       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1260       return;
1261    }
1262 
1263    /* The partial-loads-ok excemption might apply.  Find out if it
1264       does.  If so, don't report an addressing error, but do return
1265       Undefined for the bytes that are out of range, so as to avoid
1266       false negatives.  If it doesn't apply, just report an addressing
1267       error in the usual way. */
1268 
1269    /* Some code steps along byte strings in aligned chunks
1270       even when there is only a partially defined word at the end (eg,
1271       optimised strlen).  This is allowed by the memory model of
1272       modern machines, since an aligned load cannot span two pages and
1273       thus cannot "partially fault".
1274 
1275       Therefore, a load from a partially-addressible place is allowed
1276       if all of the following hold:
1277       - the command-line flag is set [by default, it isn't]
1278       - it's an aligned load
1279       - at least one of the addresses in the word *is* valid
1280 
1281       Since this suppresses the addressing error, we avoid false
1282       negatives by marking bytes undefined when they come from an
1283       invalid address.
1284    */
1285 
1286    /* "at least one of the addresses is invalid" */
1287    ok = False;
1288    for (j = 0; j < szL; j++)
1289       ok |= pessim[j] != V_BITS8_DEFINED;
1290    tl_assert(ok);
1291 
1292    if (0 == (a & (szB - 1)) && n_addrs_bad < szB) {
1293       /* Exemption applies.  Use the previously computed pessimising
1294          value and return the combined result, but don't flag an
1295          addressing error.  The pessimising value is Defined for valid
1296          addresses and Undefined for invalid addresses. */
1297       /* for assumption that doing bitwise or implements UifU */
1298       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1299       /* (really need "UifU" here...)
1300          vbits[j] UifU= pessim[j]  (is pessimised by it, iow) */
1301       for (j = szL-1; j >= 0; j--)
1302          res[j] |= pessim[j];
1303       return;
1304    }
1305 
1306    /* Exemption doesn't apply.  Flag an addressing error in the normal
1307       way. */
1308    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1309 }
1310 
1311 
1312 static
1313 __attribute__((noinline))
mc_LOADVn_slow(Addr a,SizeT nBits,Bool bigendian)1314 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1315 {
1316    PROF_EVENT(30, "mc_LOADVn_slow");
1317 
1318    /* ------------ BEGIN semi-fast cases ------------ */
1319    /* These deal quickly-ish with the common auxiliary primary map
1320       cases on 64-bit platforms.  Are merely a speedup hack; can be
1321       omitted without loss of correctness/functionality.  Note that in
1322       both cases the "sizeof(void*) == 8" causes these cases to be
1323       folded out by compilers on 32-bit platforms.  These are derived
1324       from LOADV64 and LOADV32.
1325    */
1326    if (LIKELY(sizeof(void*) == 8
1327                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1328       SecMap* sm       = get_secmap_for_reading(a);
1329       UWord   sm_off16 = SM_OFF_16(a);
1330       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1331       if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1332          return V_BITS64_DEFINED;
1333       if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1334          return V_BITS64_UNDEFINED;
1335       /* else fall into the slow case */
1336    }
1337    if (LIKELY(sizeof(void*) == 8
1338                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1339       SecMap* sm = get_secmap_for_reading(a);
1340       UWord sm_off = SM_OFF(a);
1341       UWord vabits8 = sm->vabits8[sm_off];
1342       if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1343          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1344       if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1345          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1346       /* else fall into slow case */
1347    }
1348    /* ------------ END semi-fast cases ------------ */
1349 
1350    ULong  vbits64     = V_BITS64_UNDEFINED; /* result */
1351    ULong  pessim64    = V_BITS64_DEFINED;   /* only used when p-l-ok=yes */
1352    SSizeT szB         = nBits / 8;
1353    SSizeT i;          /* Must be signed. */
1354    SizeT  n_addrs_bad = 0;
1355    Addr   ai;
1356    UChar  vbits8;
1357    Bool   ok;
1358 
1359    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1360 
1361    /* Make up a 64-bit result V word, which contains the loaded data
1362       for valid addresses and Defined for invalid addresses.  Iterate
1363       over the bytes in the word, from the most significant down to
1364       the least.  The vbits to return are calculated into vbits64.
1365       Also compute the pessimising value to be used when
1366       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
1367       info can be gleaned from pessim64) but is used as a
1368       cross-check. */
1369    for (i = szB-1; i >= 0; i--) {
1370       PROF_EVENT(31, "mc_LOADVn_slow(loop)");
1371       ai = a + byte_offset_w(szB, bigendian, i);
1372       ok = get_vbits8(ai, &vbits8);
1373       vbits64 <<= 8;
1374       vbits64 |= vbits8;
1375       if (!ok) n_addrs_bad++;
1376       pessim64 <<= 8;
1377       pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1378    }
1379 
1380    /* In the common case, all the addresses involved are valid, so we
1381       just return the computed V bits and have done. */
1382    if (LIKELY(n_addrs_bad == 0))
1383       return vbits64;
1384 
1385    /* If there's no possibility of getting a partial-loads-ok
1386       exemption, report the error and quit. */
1387    if (!MC_(clo_partial_loads_ok)) {
1388       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1389       return vbits64;
1390    }
1391 
1392    /* The partial-loads-ok excemption might apply.  Find out if it
1393       does.  If so, don't report an addressing error, but do return
1394       Undefined for the bytes that are out of range, so as to avoid
1395       false negatives.  If it doesn't apply, just report an addressing
1396       error in the usual way. */
1397 
1398    /* Some code steps along byte strings in aligned word-sized chunks
1399       even when there is only a partially defined word at the end (eg,
1400       optimised strlen).  This is allowed by the memory model of
1401       modern machines, since an aligned load cannot span two pages and
1402       thus cannot "partially fault".  Despite such behaviour being
1403       declared undefined by ANSI C/C++.
1404 
1405       Therefore, a load from a partially-addressible place is allowed
1406       if all of the following hold:
1407       - the command-line flag is set [by default, it isn't]
1408       - it's a word-sized, word-aligned load
1409       - at least one of the addresses in the word *is* valid
1410 
1411       Since this suppresses the addressing error, we avoid false
1412       negatives by marking bytes undefined when they come from an
1413       invalid address.
1414    */
1415 
1416    /* "at least one of the addresses is invalid" */
1417    tl_assert(pessim64 != V_BITS64_DEFINED);
1418 
1419    if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
1420        && n_addrs_bad < VG_WORDSIZE) {
1421       /* Exemption applies.  Use the previously computed pessimising
1422          value for vbits64 and return the combined result, but don't
1423          flag an addressing error.  The pessimising value is Defined
1424          for valid addresses and Undefined for invalid addresses. */
1425       /* for assumption that doing bitwise or implements UifU */
1426       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1427       /* (really need "UifU" here...)
1428          vbits64 UifU= pessim64  (is pessimised by it, iow) */
1429       vbits64 |= pessim64;
1430       return vbits64;
1431    }
1432 
1433    /* Also, in appears that gcc generates string-stepping code in
1434       32-bit chunks on 64 bit platforms.  So, also grant an exception
1435       for this case.  Note that the first clause of the conditional
1436       (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1437       will get folded out in 32 bit builds. */
1438    if (VG_WORDSIZE == 8
1439        && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4) {
1440       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1441       /* (really need "UifU" here...)
1442          vbits64 UifU= pessim64  (is pessimised by it, iow) */
1443       vbits64 |= pessim64;
1444       /* Mark the upper 32 bits as undefined, just to be on the safe
1445          side. */
1446       vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
1447       return vbits64;
1448    }
1449 
1450    /* Exemption doesn't apply.  Flag an addressing error in the normal
1451       way. */
1452    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1453 
1454    return vbits64;
1455 }
1456 
1457 
1458 static
1459 __attribute__((noinline))
mc_STOREVn_slow(Addr a,SizeT nBits,ULong vbytes,Bool bigendian)1460 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1461 {
1462    SizeT szB = nBits / 8;
1463    SizeT i, n_addrs_bad = 0;
1464    UChar vbits8;
1465    Addr  ai;
1466    Bool  ok;
1467 
1468    PROF_EVENT(35, "mc_STOREVn_slow");
1469 
1470    /* ------------ BEGIN semi-fast cases ------------ */
1471    /* These deal quickly-ish with the common auxiliary primary map
1472       cases on 64-bit platforms.  Are merely a speedup hack; can be
1473       omitted without loss of correctness/functionality.  Note that in
1474       both cases the "sizeof(void*) == 8" causes these cases to be
1475       folded out by compilers on 32-bit platforms.  The logic below
1476       is somewhat similar to some cases extensively commented in
1477       MC_(helperc_STOREV8).
1478    */
1479    if (LIKELY(sizeof(void*) == 8
1480                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1481       SecMap* sm       = get_secmap_for_reading(a);
1482       UWord   sm_off16 = SM_OFF_16(a);
1483       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1484       if (LIKELY( !is_distinguished_sm(sm) &&
1485                           (VA_BITS16_DEFINED   == vabits16 ||
1486                            VA_BITS16_UNDEFINED == vabits16) )) {
1487          /* Handle common case quickly: a is suitably aligned, */
1488          /* is mapped, and is addressible. */
1489          // Convert full V-bits in register to compact 2-bit form.
1490          if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1491             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
1492             return;
1493          } else if (V_BITS64_UNDEFINED == vbytes) {
1494             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
1495             return;
1496          }
1497          /* else fall into the slow case */
1498       }
1499       /* else fall into the slow case */
1500    }
1501    if (LIKELY(sizeof(void*) == 8
1502                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1503       SecMap* sm      = get_secmap_for_reading(a);
1504       UWord   sm_off  = SM_OFF(a);
1505       UWord   vabits8 = sm->vabits8[sm_off];
1506       if (LIKELY( !is_distinguished_sm(sm) &&
1507                           (VA_BITS8_DEFINED   == vabits8 ||
1508                            VA_BITS8_UNDEFINED == vabits8) )) {
1509          /* Handle common case quickly: a is suitably aligned, */
1510          /* is mapped, and is addressible. */
1511          // Convert full V-bits in register to compact 2-bit form.
1512          if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1513             sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1514             return;
1515          } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1516             sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1517             return;
1518          }
1519          /* else fall into the slow case */
1520       }
1521       /* else fall into the slow case */
1522    }
1523    /* ------------ END semi-fast cases ------------ */
1524 
1525    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1526 
1527    /* Dump vbytes in memory, iterating from least to most significant
1528       byte.  At the same time establish addressibility of the location. */
1529    for (i = 0; i < szB; i++) {
1530       PROF_EVENT(36, "mc_STOREVn_slow(loop)");
1531       ai     = a + byte_offset_w(szB, bigendian, i);
1532       vbits8 = vbytes & 0xff;
1533       ok     = set_vbits8(ai, vbits8);
1534       if (!ok) n_addrs_bad++;
1535       vbytes >>= 8;
1536    }
1537 
1538    /* If an address error has happened, report it. */
1539    if (n_addrs_bad > 0)
1540       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1541 }
1542 
1543 
1544 /*------------------------------------------------------------*/
1545 /*--- Setting permissions over address ranges.             ---*/
1546 /*------------------------------------------------------------*/
1547 
set_address_range_perms(Addr a,SizeT lenT,UWord vabits16,UWord dsm_num)1548 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1549                                       UWord dsm_num )
1550 {
1551    UWord    sm_off, sm_off16;
1552    UWord    vabits2 = vabits16 & 0x3;
1553    SizeT    lenA, lenB, len_to_next_secmap;
1554    Addr     aNext;
1555    SecMap*  sm;
1556    SecMap** sm_ptr;
1557    SecMap*  example_dsm;
1558 
1559    PROF_EVENT(150, "set_address_range_perms");
1560 
1561    /* Check the V+A bits make sense. */
1562    tl_assert(VA_BITS16_NOACCESS  == vabits16 ||
1563              VA_BITS16_UNDEFINED == vabits16 ||
1564              VA_BITS16_DEFINED   == vabits16);
1565 
1566    // This code should never write PDBs;  ensure this.  (See comment above
1567    // set_vabits2().)
1568    tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1569 
1570    if (lenT == 0)
1571       return;
1572 
1573    if (lenT > 256 * 1024 * 1024) {
1574       if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1575          const HChar* s = "unknown???";
1576          if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1577          if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1578          if (vabits16 == VA_BITS16_DEFINED  ) s = "defined";
1579          VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1580                                   "large range [0x%lx, 0x%lx) (%s)\n",
1581                                   a, a + lenT, s);
1582       }
1583    }
1584 
1585 #ifndef PERF_FAST_SARP
1586    /*------------------ debug-only case ------------------ */
1587    {
1588       // Endianness doesn't matter here because all bytes are being set to
1589       // the same value.
1590       // Nb: We don't have to worry about updating the sec-V-bits table
1591       // after these set_vabits2() calls because this code never writes
1592       // VA_BITS2_PARTDEFINED values.
1593       SizeT i;
1594       for (i = 0; i < lenT; i++) {
1595          set_vabits2(a + i, vabits2);
1596       }
1597       return;
1598    }
1599 #endif
1600 
1601    /*------------------ standard handling ------------------ */
1602 
1603    /* Get the distinguished secondary that we might want
1604       to use (part of the space-compression scheme). */
1605    example_dsm = &sm_distinguished[dsm_num];
1606 
1607    // We have to handle ranges covering various combinations of partial and
1608    // whole sec-maps.  Here is how parts 1, 2 and 3 are used in each case.
1609    // Cases marked with a '*' are common.
1610    //
1611    //   TYPE                                             PARTS USED
1612    //   ----                                             ----------
1613    // * one partial sec-map                  (p)         1
1614    // - one whole sec-map                    (P)         2
1615    //
1616    // * two partial sec-maps                 (pp)        1,3
1617    // - one partial, one whole sec-map       (pP)        1,2
1618    // - one whole, one partial sec-map       (Pp)        2,3
1619    // - two whole sec-maps                   (PP)        2,2
1620    //
1621    // * one partial, one whole, one partial  (pPp)       1,2,3
1622    // - one partial, two whole               (pPP)       1,2,2
1623    // - two whole, one partial               (PPp)       2,2,3
1624    // - three whole                          (PPP)       2,2,2
1625    //
1626    // * one partial, N-2 whole, one partial  (pP...Pp)   1,2...2,3
1627    // - one partial, N-1 whole               (pP...PP)   1,2...2,2
1628    // - N-1 whole, one partial               (PP...Pp)   2,2...2,3
1629    // - N whole                              (PP...PP)   2,2...2,3
1630 
1631    // Break up total length (lenT) into two parts:  length in the first
1632    // sec-map (lenA), and the rest (lenB);   lenT == lenA + lenB.
1633    aNext = start_of_this_sm(a) + SM_SIZE;
1634    len_to_next_secmap = aNext - a;
1635    if ( lenT <= len_to_next_secmap ) {
1636       // Range entirely within one sec-map.  Covers almost all cases.
1637       PROF_EVENT(151, "set_address_range_perms-single-secmap");
1638       lenA = lenT;
1639       lenB = 0;
1640    } else if (is_start_of_sm(a)) {
1641       // Range spans at least one whole sec-map, and starts at the beginning
1642       // of a sec-map; skip to Part 2.
1643       PROF_EVENT(152, "set_address_range_perms-startof-secmap");
1644       lenA = 0;
1645       lenB = lenT;
1646       goto part2;
1647    } else {
1648       // Range spans two or more sec-maps, first one is partial.
1649       PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
1650       lenA = len_to_next_secmap;
1651       lenB = lenT - lenA;
1652    }
1653 
1654    //------------------------------------------------------------------------
1655    // Part 1: Deal with the first sec_map.  Most of the time the range will be
1656    // entirely within a sec_map and this part alone will suffice.  Also,
1657    // doing it this way lets us avoid repeatedly testing for the crossing of
1658    // a sec-map boundary within these loops.
1659    //------------------------------------------------------------------------
1660 
1661    // If it's distinguished, make it undistinguished if necessary.
1662    sm_ptr = get_secmap_ptr(a);
1663    if (is_distinguished_sm(*sm_ptr)) {
1664       if (*sm_ptr == example_dsm) {
1665          // Sec-map already has the V+A bits that we want, so skip.
1666          PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
1667          a    = aNext;
1668          lenA = 0;
1669       } else {
1670          PROF_EVENT(155, "set_address_range_perms-dist-sm1");
1671          *sm_ptr = copy_for_writing(*sm_ptr);
1672       }
1673    }
1674    sm = *sm_ptr;
1675 
1676    // 1 byte steps
1677    while (True) {
1678       if (VG_IS_8_ALIGNED(a)) break;
1679       if (lenA < 1)           break;
1680       PROF_EVENT(156, "set_address_range_perms-loop1a");
1681       sm_off = SM_OFF(a);
1682       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1683       a    += 1;
1684       lenA -= 1;
1685    }
1686    // 8-aligned, 8 byte steps
1687    while (True) {
1688       if (lenA < 8) break;
1689       PROF_EVENT(157, "set_address_range_perms-loop8a");
1690       sm_off16 = SM_OFF_16(a);
1691       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1692       a    += 8;
1693       lenA -= 8;
1694    }
1695    // 1 byte steps
1696    while (True) {
1697       if (lenA < 1) break;
1698       PROF_EVENT(158, "set_address_range_perms-loop1b");
1699       sm_off = SM_OFF(a);
1700       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1701       a    += 1;
1702       lenA -= 1;
1703    }
1704 
1705    // We've finished the first sec-map.  Is that it?
1706    if (lenB == 0)
1707       return;
1708 
1709    //------------------------------------------------------------------------
1710    // Part 2: Fast-set entire sec-maps at a time.
1711    //------------------------------------------------------------------------
1712   part2:
1713    // 64KB-aligned, 64KB steps.
1714    // Nb: we can reach here with lenB < SM_SIZE
1715    tl_assert(0 == lenA);
1716    while (True) {
1717       if (lenB < SM_SIZE) break;
1718       tl_assert(is_start_of_sm(a));
1719       PROF_EVENT(159, "set_address_range_perms-loop64K");
1720       sm_ptr = get_secmap_ptr(a);
1721       if (!is_distinguished_sm(*sm_ptr)) {
1722          PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
1723          // Free the non-distinguished sec-map that we're replacing.  This
1724          // case happens moderately often, enough to be worthwhile.
1725          SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1726          tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
1727       }
1728       update_SM_counts(*sm_ptr, example_dsm);
1729       // Make the sec-map entry point to the example DSM
1730       *sm_ptr = example_dsm;
1731       lenB -= SM_SIZE;
1732       a    += SM_SIZE;
1733    }
1734 
1735    // We've finished the whole sec-maps.  Is that it?
1736    if (lenB == 0)
1737       return;
1738 
1739    //------------------------------------------------------------------------
1740    // Part 3: Finish off the final partial sec-map, if necessary.
1741    //------------------------------------------------------------------------
1742 
1743    tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1744 
1745    // If it's distinguished, make it undistinguished if necessary.
1746    sm_ptr = get_secmap_ptr(a);
1747    if (is_distinguished_sm(*sm_ptr)) {
1748       if (*sm_ptr == example_dsm) {
1749          // Sec-map already has the V+A bits that we want, so stop.
1750          PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
1751          return;
1752       } else {
1753          PROF_EVENT(162, "set_address_range_perms-dist-sm2");
1754          *sm_ptr = copy_for_writing(*sm_ptr);
1755       }
1756    }
1757    sm = *sm_ptr;
1758 
1759    // 8-aligned, 8 byte steps
1760    while (True) {
1761       if (lenB < 8) break;
1762       PROF_EVENT(163, "set_address_range_perms-loop8b");
1763       sm_off16 = SM_OFF_16(a);
1764       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1765       a    += 8;
1766       lenB -= 8;
1767    }
1768    // 1 byte steps
1769    while (True) {
1770       if (lenB < 1) return;
1771       PROF_EVENT(164, "set_address_range_perms-loop1c");
1772       sm_off = SM_OFF(a);
1773       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1774       a    += 1;
1775       lenB -= 1;
1776    }
1777 }
1778 
1779 
1780 /* --- Set permissions for arbitrary address ranges --- */
1781 
MC_(make_mem_noaccess)1782 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1783 {
1784    PROF_EVENT(40, "MC_(make_mem_noaccess)");
1785    DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1786    set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1787    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1788       ocache_sarp_Clear_Origins ( a, len );
1789 }
1790 
make_mem_undefined(Addr a,SizeT len)1791 static void make_mem_undefined ( Addr a, SizeT len )
1792 {
1793    PROF_EVENT(41, "make_mem_undefined");
1794    DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1795    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1796 }
1797 
MC_(make_mem_undefined_w_otag)1798 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1799 {
1800    PROF_EVENT(43, "MC_(make_mem_undefined)");
1801    DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1802    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1803    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1804       ocache_sarp_Set_Origins ( a, len, otag );
1805 }
1806 
1807 static
make_mem_undefined_w_tid_and_okind(Addr a,SizeT len,ThreadId tid,UInt okind)1808 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1809                                           ThreadId tid, UInt okind )
1810 {
1811    UInt        ecu;
1812    ExeContext* here;
1813    /* VG_(record_ExeContext) checks for validity of tid, and asserts
1814       if it is invalid.  So no need to do it here. */
1815    tl_assert(okind <= 3);
1816    here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1817    tl_assert(here);
1818    ecu = VG_(get_ECU_from_ExeContext)(here);
1819    tl_assert(VG_(is_plausible_ECU)(ecu));
1820    MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1821 }
1822 
1823 static
mc_new_mem_w_tid_make_ECU(Addr a,SizeT len,ThreadId tid)1824 void mc_new_mem_w_tid_make_ECU  ( Addr a, SizeT len, ThreadId tid )
1825 {
1826    make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1827 }
1828 
1829 static
mc_new_mem_w_tid_no_ECU(Addr a,SizeT len,ThreadId tid)1830 void mc_new_mem_w_tid_no_ECU  ( Addr a, SizeT len, ThreadId tid )
1831 {
1832    MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
1833 }
1834 
MC_(make_mem_defined)1835 void MC_(make_mem_defined) ( Addr a, SizeT len )
1836 {
1837    PROF_EVENT(42, "MC_(make_mem_defined)");
1838    DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1839    set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1840    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1841       ocache_sarp_Clear_Origins ( a, len );
1842 }
1843 
1844 /* For each byte in [a,a+len), if the byte is addressable, make it be
1845    defined, but if it isn't addressible, leave it alone.  In other
1846    words a version of MC_(make_mem_defined) that doesn't mess with
1847    addressibility.  Low-performance implementation. */
make_mem_defined_if_addressable(Addr a,SizeT len)1848 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1849 {
1850    SizeT i;
1851    UChar vabits2;
1852    DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1853    for (i = 0; i < len; i++) {
1854       vabits2 = get_vabits2( a+i );
1855       if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1856          set_vabits2(a+i, VA_BITS2_DEFINED);
1857          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1858             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1859          }
1860       }
1861    }
1862 }
1863 
1864 /* Similarly (needed for mprotect handling ..) */
make_mem_defined_if_noaccess(Addr a,SizeT len)1865 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1866 {
1867    SizeT i;
1868    UChar vabits2;
1869    DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1870    for (i = 0; i < len; i++) {
1871       vabits2 = get_vabits2( a+i );
1872       if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1873          set_vabits2(a+i, VA_BITS2_DEFINED);
1874          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1875             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1876          }
1877       }
1878    }
1879 }
1880 
1881 /* --- Block-copy permissions (needed for implementing realloc() and
1882        sys_mremap). --- */
1883 
MC_(copy_address_range_state)1884 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
1885 {
1886    SizeT i, j;
1887    UChar vabits2, vabits8;
1888    Bool  aligned, nooverlap;
1889 
1890    DEBUG("MC_(copy_address_range_state)\n");
1891    PROF_EVENT(50, "MC_(copy_address_range_state)");
1892 
1893    if (len == 0 || src == dst)
1894       return;
1895 
1896    aligned   = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
1897    nooverlap = src+len <= dst || dst+len <= src;
1898 
1899    if (nooverlap && aligned) {
1900 
1901       /* Vectorised fast case, when no overlap and suitably aligned */
1902       /* vector loop */
1903       i = 0;
1904       while (len >= 4) {
1905          vabits8 = get_vabits8_for_aligned_word32( src+i );
1906          set_vabits8_for_aligned_word32( dst+i, vabits8 );
1907          if (LIKELY(VA_BITS8_DEFINED == vabits8
1908                             || VA_BITS8_UNDEFINED == vabits8
1909                             || VA_BITS8_NOACCESS == vabits8)) {
1910             /* do nothing */
1911          } else {
1912             /* have to copy secondary map info */
1913             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
1914                set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
1915             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
1916                set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
1917             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
1918                set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
1919             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
1920                set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
1921          }
1922          i += 4;
1923          len -= 4;
1924       }
1925       /* fixup loop */
1926       while (len >= 1) {
1927          vabits2 = get_vabits2( src+i );
1928          set_vabits2( dst+i, vabits2 );
1929          if (VA_BITS2_PARTDEFINED == vabits2) {
1930             set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1931          }
1932          i++;
1933          len--;
1934       }
1935 
1936    } else {
1937 
1938       /* We have to do things the slow way */
1939       if (src < dst) {
1940          for (i = 0, j = len-1; i < len; i++, j--) {
1941             PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
1942             vabits2 = get_vabits2( src+j );
1943             set_vabits2( dst+j, vabits2 );
1944             if (VA_BITS2_PARTDEFINED == vabits2) {
1945                set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
1946             }
1947          }
1948       }
1949 
1950       if (src > dst) {
1951          for (i = 0; i < len; i++) {
1952             PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
1953             vabits2 = get_vabits2( src+i );
1954             set_vabits2( dst+i, vabits2 );
1955             if (VA_BITS2_PARTDEFINED == vabits2) {
1956                set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1957             }
1958          }
1959       }
1960    }
1961 
1962 }
1963 
1964 
1965 /*------------------------------------------------------------*/
1966 /*--- Origin tracking stuff - cache basics                 ---*/
1967 /*------------------------------------------------------------*/
1968 
1969 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
1970    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1971 
1972    Note that this implementation draws inspiration from the "origin
1973    tracking by value piggybacking" scheme described in "Tracking Bad
1974    Apples: Reporting the Origin of Null and Undefined Value Errors"
1975    (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
1976    Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
1977    implemented completely differently.
1978 
1979    Origin tags and ECUs -- about the shadow values
1980    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1981 
1982    This implementation tracks the defining point of all uninitialised
1983    values using so called "origin tags", which are 32-bit integers,
1984    rather than using the values themselves to encode the origins.  The
1985    latter, so-called value piggybacking", is what the OOPSLA07 paper
1986    describes.
1987 
1988    Origin tags, as tracked by the machinery below, are 32-bit unsigned
1989    ints (UInts), regardless of the machine's word size.  Each tag
1990    comprises an upper 30-bit ECU field and a lower 2-bit
1991    'kind' field.  The ECU field is a number given out by m_execontext
1992    and has a 1-1 mapping with ExeContext*s.  An ECU can be used
1993    directly as an origin tag (otag), but in fact we want to put
1994    additional information 'kind' field to indicate roughly where the
1995    tag came from.  This helps print more understandable error messages
1996    for the user -- it has no other purpose.  In summary:
1997 
1998    * Both ECUs and origin tags are represented as 32-bit words
1999 
2000    * m_execontext and the core-tool interface deal purely in ECUs.
2001      They have no knowledge of origin tags - that is a purely
2002      Memcheck-internal matter.
2003 
2004    * all valid ECUs have the lowest 2 bits zero and at least
2005      one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2006 
2007    * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2008      constants defined in mc_include.h.
2009 
2010    * to convert an otag back to an ECU, AND it with ~3
2011 
2012    One important fact is that no valid otag is zero.  A zero otag is
2013    used by the implementation to indicate "no origin", which could
2014    mean that either the value is defined, or it is undefined but the
2015    implementation somehow managed to lose the origin.
2016 
2017    The ECU used for memory created by malloc etc is derived from the
2018    stack trace at the time the malloc etc happens.  This means the
2019    mechanism can show the exact allocation point for heap-created
2020    uninitialised values.
2021 
2022    In contrast, it is simply too expensive to create a complete
2023    backtrace for each stack allocation.  Therefore we merely use a
2024    depth-1 backtrace for stack allocations, which can be done once at
2025    translation time, rather than N times at run time.  The result of
2026    this is that, for stack created uninitialised values, Memcheck can
2027    only show the allocating function, and not what called it.
2028    Furthermore, compilers tend to move the stack pointer just once at
2029    the start of the function, to allocate all locals, and so in fact
2030    the stack origin almost always simply points to the opening brace
2031    of the function.  Net result is, for stack origins, the mechanism
2032    can tell you in which function the undefined value was created, but
2033    that's all.  Users will need to carefully check all locals in the
2034    specified function.
2035 
2036    Shadowing registers and memory
2037    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2038 
2039    Memory is shadowed using a two level cache structure (ocacheL1 and
2040    ocacheL2).  Memory references are first directed to ocacheL1.  This
2041    is a traditional 2-way set associative cache with 32-byte lines and
2042    approximate LRU replacement within each set.
2043 
2044    A naive implementation would require storing one 32 bit otag for
2045    each byte of memory covered, a 4:1 space overhead.  Instead, there
2046    is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2047    that shows which of the 4 bytes have that shadow value and which
2048    have a shadow value of zero (indicating no origin).  Hence a lot of
2049    space is saved, but the cost is that only one different origin per
2050    4 bytes of address space can be represented.  This is a source of
2051    imprecision, but how much of a problem it really is remains to be
2052    seen.
2053 
2054    A cache line that contains all zeroes ("no origins") contains no
2055    useful information, and can be ejected from the L1 cache "for
2056    free", in the sense that a read miss on the L1 causes a line of
2057    zeroes to be installed.  However, ejecting a line containing
2058    nonzeroes risks losing origin information permanently.  In order to
2059    prevent such lossage, ejected nonzero lines are placed in a
2060    secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2061    lines.  This can grow arbitrarily large, and so should ensure that
2062    Memcheck runs out of memory in preference to losing useful origin
2063    info due to cache size limitations.
2064 
2065    Shadowing registers is a bit tricky, because the shadow values are
2066    32 bits, regardless of the size of the register.  That gives a
2067    problem for registers smaller than 32 bits.  The solution is to
2068    find spaces in the guest state that are unused, and use those to
2069    shadow guest state fragments smaller than 32 bits.  For example, on
2070    ppc32/64, each vector register is 16 bytes long.  If 4 bytes of the
2071    shadow are allocated for the register's otag, then there are still
2072    12 bytes left over which could be used to shadow 3 other values.
2073 
2074    This implies there is some non-obvious mapping from guest state
2075    (start,length) pairs to the relevant shadow offset (for the origin
2076    tags).  And it is unfortunately guest-architecture specific.  The
2077    mapping is contained in mc_machine.c, which is quite lengthy but
2078    straightforward.
2079 
2080    Instrumenting the IR
2081    ~~~~~~~~~~~~~~~~~~~~
2082 
2083    Instrumentation is largely straightforward, and done by the
2084    functions schemeE and schemeS in mc_translate.c.  These generate
2085    code for handling the origin tags of expressions (E) and statements
2086    (S) respectively.  The rather strange names are a reference to the
2087    "compilation schemes" shown in Simon Peyton Jones' book "The
2088    Implementation of Functional Programming Languages" (Prentice Hall,
2089    1987, see
2090    http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2091 
2092    schemeS merely arranges to move shadow values around the guest
2093    state to track the incoming IR.  schemeE is largely trivial too.
2094    The only significant point is how to compute the otag corresponding
2095    to binary (or ternary, quaternary, etc) operator applications.  The
2096    rule is simple: just take whichever value is larger (32-bit
2097    unsigned max).  Constants get the special value zero.  Hence this
2098    rule always propagates a nonzero (known) otag in preference to a
2099    zero (unknown, or more likely, value-is-defined) tag, as we want.
2100    If two different undefined values are inputs to a binary operator
2101    application, then which is propagated is arbitrary, but that
2102    doesn't matter, since the program is erroneous in using either of
2103    the values, and so there's no point in attempting to propagate
2104    both.
2105 
2106    Since constants are abstracted to (otag) zero, much of the
2107    instrumentation code can be folded out without difficulty by the
2108    generic post-instrumentation IR cleanup pass, using these rules:
2109    Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2110    constants is evaluated at JIT time.  And the resulting dead code
2111    removal.  In practice this causes surprisingly few Max32Us to
2112    survive through to backend code generation.
2113 
2114    Integration with the V-bits machinery
2115    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2116 
2117    This is again largely straightforward.  Mostly the otag and V bits
2118    stuff are independent.  The only point of interaction is when the V
2119    bits instrumenter creates a call to a helper function to report an
2120    uninitialised value error -- in that case it must first use schemeE
2121    to get hold of the origin tag expression for the value, and pass
2122    that to the helper too.
2123 
2124    There is the usual stuff to do with setting address range
2125    permissions.  When memory is painted undefined, we must also know
2126    the origin tag to paint with, which involves some tedious plumbing,
2127    particularly to do with the fast case stack handlers.  When memory
2128    is painted defined or noaccess then the origin tags must be forced
2129    to zero.
2130 
2131    One of the goals of the implementation was to ensure that the
2132    non-origin tracking mode isn't slowed down at all.  To do this,
2133    various functions to do with memory permissions setting (again,
2134    mostly pertaining to the stack) are duplicated for the with- and
2135    without-otag case.
2136 
2137    Dealing with stack redzones, and the NIA cache
2138    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2139 
2140    This is one of the few non-obvious parts of the implementation.
2141 
2142    Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2143    reserved area below the stack pointer, that can be used as scratch
2144    space by compiler generated code for functions.  In the Memcheck
2145    sources this is referred to as the "stack redzone".  The important
2146    thing here is that such redzones are considered volatile across
2147    function calls and returns.  So Memcheck takes care to mark them as
2148    undefined for each call and return, on the afflicted platforms.
2149    Past experience shows this is essential in order to get reliable
2150    messages about uninitialised values that come from the stack.
2151 
2152    So the question is, when we paint a redzone undefined, what origin
2153    tag should we use for it?  Consider a function f() calling g().  If
2154    we paint the redzone using an otag derived from the ExeContext of
2155    the CALL/BL instruction in f, then any errors in g causing it to
2156    use uninitialised values that happen to lie in the redzone, will be
2157    reported as having their origin in f.  Which is highly confusing.
2158 
2159    The same applies for returns: if, on a return, we paint the redzone
2160    using a origin tag derived from the ExeContext of the RET/BLR
2161    instruction in g, then any later errors in f causing it to use
2162    uninitialised values in the redzone, will be reported as having
2163    their origin in g.  Which is just as confusing.
2164 
2165    To do it right, in both cases we need to use an origin tag which
2166    pertains to the instruction which dynamically follows the CALL/BL
2167    or RET/BLR.  In short, one derived from the NIA - the "next
2168    instruction address".
2169 
2170    To make this work, Memcheck's redzone-painting helper,
2171    MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2172    NIA.  It converts the NIA to a 1-element ExeContext, and uses that
2173    ExeContext's ECU as the basis for the otag used to paint the
2174    redzone.  The expensive part of this is converting an NIA into an
2175    ECU, since this happens once for every call and every return.  So
2176    we use a simple 511-line, 2-way set associative cache
2177    (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2178    the cost out.
2179 
2180    Further background comments
2181    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2182 
2183    > Question: why is otag a UInt?  Wouldn't a UWord be better?  Isn't
2184    > it really just the address of the relevant ExeContext?
2185 
2186    Well, it's not the address, but a value which has a 1-1 mapping
2187    with ExeContexts, and is guaranteed not to be zero, since zero
2188    denotes (to memcheck) "unknown origin or defined value".  So these
2189    UInts are just numbers starting at 4 and incrementing by 4; each
2190    ExeContext is given a number when it is created.  (*** NOTE this
2191    confuses otags and ECUs; see comments above ***).
2192 
2193    Making these otags 32-bit regardless of the machine's word size
2194    makes the 64-bit implementation easier (next para).  And it doesn't
2195    really limit us in any way, since for the tags to overflow would
2196    require that the program somehow caused 2^30-1 different
2197    ExeContexts to be created, in which case it is probably in deep
2198    trouble.  Not to mention V will have soaked up many tens of
2199    gigabytes of memory merely to store them all.
2200 
2201    So having 64-bit origins doesn't really buy you anything, and has
2202    the following downsides:
2203 
2204    Suppose that instead, an otag is a UWord.  This would mean that, on
2205    a 64-bit target,
2206 
2207    1. It becomes hard to shadow any element of guest state which is
2208       smaller than 8 bytes.  To do so means you'd need to find some
2209       8-byte-sized hole in the guest state which you don't want to
2210       shadow, and use that instead to hold the otag.  On ppc64, the
2211       condition code register(s) are split into 20 UChar sized pieces,
2212       all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2213       and so that would entail finding 160 bytes somewhere else in the
2214       guest state.
2215 
2216       Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2217       of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2218       same) and so I had to look for 4 untracked otag-sized areas in
2219       the guest state to make that possible.
2220 
2221       The same problem exists of course when origin tags are only 32
2222       bits, but it's less extreme.
2223 
2224    2. (More compelling) it doubles the size of the origin shadow
2225       memory.  Given that the shadow memory is organised as a fixed
2226       size cache, and that accuracy of tracking is limited by origins
2227       falling out the cache due to space conflicts, this isn't good.
2228 
2229    > Another question: is the origin tracking perfect, or are there
2230    > cases where it fails to determine an origin?
2231 
2232    It is imperfect for at least for the following reasons, and
2233    probably more:
2234 
2235    * Insufficient capacity in the origin cache.  When a line is
2236      evicted from the cache it is gone forever, and so subsequent
2237      queries for the line produce zero, indicating no origin
2238      information.  Interestingly, a line containing all zeroes can be
2239      evicted "free" from the cache, since it contains no useful
2240      information, so there is scope perhaps for some cleverer cache
2241      management schemes.  (*** NOTE, with the introduction of the
2242      second level origin tag cache, ocacheL2, this is no longer a
2243      problem. ***)
2244 
2245    * The origin cache only stores one otag per 32-bits of address
2246      space, plus 4 bits indicating which of the 4 bytes has that tag
2247      and which are considered defined.  The result is that if two
2248      undefined bytes in the same word are stored in memory, the first
2249      stored byte's origin will be lost and replaced by the origin for
2250      the second byte.
2251 
2252    * Nonzero origin tags for defined values.  Consider a binary
2253      operator application op(x,y).  Suppose y is undefined (and so has
2254      a valid nonzero origin tag), and x is defined, but erroneously
2255      has a nonzero origin tag (defined values should have tag zero).
2256      If the erroneous tag has a numeric value greater than y's tag,
2257      then the rule for propagating origin tags though binary
2258      operations, which is simply to take the unsigned max of the two
2259      tags, will erroneously propagate x's tag rather than y's.
2260 
2261    * Some obscure uses of x86/amd64 byte registers can cause lossage
2262      or confusion of origins.  %AH .. %DH are treated as different
2263      from, and unrelated to, their parent registers, %EAX .. %EDX.
2264      So some wierd sequences like
2265 
2266         movb undefined-value, %AH
2267         movb defined-value, %AL
2268         .. use %AX or %EAX ..
2269 
2270      will cause the origin attributed to %AH to be ignored, since %AL,
2271      %AX, %EAX are treated as the same register, and %AH as a
2272      completely separate one.
2273 
2274    But having said all that, it actually seems to work fairly well in
2275    practice.
2276 */
2277 
2278 static UWord stats_ocacheL1_find           = 0;
2279 static UWord stats_ocacheL1_found_at_1     = 0;
2280 static UWord stats_ocacheL1_found_at_N     = 0;
2281 static UWord stats_ocacheL1_misses         = 0;
2282 static UWord stats_ocacheL1_lossage        = 0;
2283 static UWord stats_ocacheL1_movefwds       = 0;
2284 
2285 static UWord stats__ocacheL2_refs          = 0;
2286 static UWord stats__ocacheL2_misses        = 0;
2287 static UWord stats__ocacheL2_n_nodes_max   = 0;
2288 
2289 /* Cache of 32-bit values, one every 32 bits of address space */
2290 
2291 #define OC_BITS_PER_LINE 5
2292 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2293 
oc_line_offset(Addr a)2294 static INLINE UWord oc_line_offset ( Addr a ) {
2295    return (a >> 2) & (OC_W32S_PER_LINE - 1);
2296 }
is_valid_oc_tag(Addr tag)2297 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2298    return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2299 }
2300 
2301 #define OC_LINES_PER_SET 2
2302 
2303 #define OC_N_SET_BITS    20
2304 #define OC_N_SETS        (1 << OC_N_SET_BITS)
2305 
2306 /* These settings give:
2307    64 bit host: ocache:  100,663,296 sizeB    67,108,864 useful
2308    32 bit host: ocache:   92,274,688 sizeB    67,108,864 useful
2309 */
2310 
2311 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2312 
2313 
2314 typedef
2315    struct {
2316       Addr  tag;
2317       UInt  w32[OC_W32S_PER_LINE];
2318       UChar descr[OC_W32S_PER_LINE];
2319    }
2320    OCacheLine;
2321 
2322 /* Classify and also sanity-check 'line'.  Return 'e' (empty) if not
2323    in use, 'n' (nonzero) if it contains at least one valid origin tag,
2324    and 'z' if all the represented tags are zero. */
classify_OCacheLine(OCacheLine * line)2325 static UChar classify_OCacheLine ( OCacheLine* line )
2326 {
2327    UWord i;
2328    if (line->tag == 1/*invalid*/)
2329       return 'e'; /* EMPTY */
2330    tl_assert(is_valid_oc_tag(line->tag));
2331    for (i = 0; i < OC_W32S_PER_LINE; i++) {
2332       tl_assert(0 == ((~0xF) & line->descr[i]));
2333       if (line->w32[i] > 0 && line->descr[i] > 0)
2334          return 'n'; /* NONZERO - contains useful info */
2335    }
2336    return 'z'; /* ZERO - no useful info */
2337 }
2338 
2339 typedef
2340    struct {
2341       OCacheLine line[OC_LINES_PER_SET];
2342    }
2343    OCacheSet;
2344 
2345 typedef
2346    struct {
2347       OCacheSet set[OC_N_SETS];
2348    }
2349    OCache;
2350 
2351 static OCache* ocacheL1 = NULL;
2352 static UWord   ocacheL1_event_ctr = 0;
2353 
2354 static void init_ocacheL2 ( void ); /* fwds */
init_OCache(void)2355 static void init_OCache ( void )
2356 {
2357    UWord line, set;
2358    tl_assert(MC_(clo_mc_level) >= 3);
2359    tl_assert(ocacheL1 == NULL);
2360    ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2361    if (ocacheL1 == NULL) {
2362       VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2363                                    sizeof(OCache) );
2364    }
2365    tl_assert(ocacheL1 != NULL);
2366    for (set = 0; set < OC_N_SETS; set++) {
2367       for (line = 0; line < OC_LINES_PER_SET; line++) {
2368          ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2369       }
2370    }
2371    init_ocacheL2();
2372 }
2373 
moveLineForwards(OCacheSet * set,UWord lineno)2374 static void moveLineForwards ( OCacheSet* set, UWord lineno )
2375 {
2376    OCacheLine tmp;
2377    stats_ocacheL1_movefwds++;
2378    tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2379    tmp = set->line[lineno-1];
2380    set->line[lineno-1] = set->line[lineno];
2381    set->line[lineno] = tmp;
2382 }
2383 
zeroise_OCacheLine(OCacheLine * line,Addr tag)2384 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2385    UWord i;
2386    for (i = 0; i < OC_W32S_PER_LINE; i++) {
2387       line->w32[i] = 0; /* NO ORIGIN */
2388       line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2389    }
2390    line->tag = tag;
2391 }
2392 
2393 //////////////////////////////////////////////////////////////
2394 //// OCache backing store
2395 
2396 static OSet* ocacheL2 = NULL;
2397 
ocacheL2_malloc(const HChar * cc,SizeT szB)2398 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
2399    return VG_(malloc)(cc, szB);
2400 }
ocacheL2_free(void * v)2401 static void ocacheL2_free ( void* v ) {
2402    VG_(free)( v );
2403 }
2404 
2405 /* Stats: # nodes currently in tree */
2406 static UWord stats__ocacheL2_n_nodes = 0;
2407 
init_ocacheL2(void)2408 static void init_ocacheL2 ( void )
2409 {
2410    tl_assert(!ocacheL2);
2411    tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2412    tl_assert(0 == offsetof(OCacheLine,tag));
2413    ocacheL2
2414       = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2415                              NULL, /* fast cmp */
2416                              ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
2417    tl_assert(ocacheL2);
2418    stats__ocacheL2_n_nodes = 0;
2419 }
2420 
2421 /* Find line with the given tag in the tree, or NULL if not found. */
ocacheL2_find_tag(Addr tag)2422 static OCacheLine* ocacheL2_find_tag ( Addr tag )
2423 {
2424    OCacheLine* line;
2425    tl_assert(is_valid_oc_tag(tag));
2426    stats__ocacheL2_refs++;
2427    line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2428    return line;
2429 }
2430 
2431 /* Delete the line with the given tag from the tree, if it is present, and
2432    free up the associated memory. */
ocacheL2_del_tag(Addr tag)2433 static void ocacheL2_del_tag ( Addr tag )
2434 {
2435    OCacheLine* line;
2436    tl_assert(is_valid_oc_tag(tag));
2437    stats__ocacheL2_refs++;
2438    line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2439    if (line) {
2440       VG_(OSetGen_FreeNode)(ocacheL2, line);
2441       tl_assert(stats__ocacheL2_n_nodes > 0);
2442       stats__ocacheL2_n_nodes--;
2443    }
2444 }
2445 
2446 /* Add a copy of the given line to the tree.  It must not already be
2447    present. */
ocacheL2_add_line(OCacheLine * line)2448 static void ocacheL2_add_line ( OCacheLine* line )
2449 {
2450    OCacheLine* copy;
2451    tl_assert(is_valid_oc_tag(line->tag));
2452    copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2453    tl_assert(copy);
2454    *copy = *line;
2455    stats__ocacheL2_refs++;
2456    VG_(OSetGen_Insert)( ocacheL2, copy );
2457    stats__ocacheL2_n_nodes++;
2458    if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2459       stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2460 }
2461 
2462 ////
2463 //////////////////////////////////////////////////////////////
2464 
2465 __attribute__((noinline))
find_OCacheLine_SLOW(Addr a)2466 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2467 {
2468    OCacheLine *victim, *inL2;
2469    UChar c;
2470    UWord line;
2471    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2472    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2473    UWord tag     = a & tagmask;
2474    tl_assert(setno >= 0 && setno < OC_N_SETS);
2475 
2476    /* we already tried line == 0; skip therefore. */
2477    for (line = 1; line < OC_LINES_PER_SET; line++) {
2478       if (ocacheL1->set[setno].line[line].tag == tag) {
2479          if (line == 1) {
2480             stats_ocacheL1_found_at_1++;
2481          } else {
2482             stats_ocacheL1_found_at_N++;
2483          }
2484          if (UNLIKELY(0 == (ocacheL1_event_ctr++
2485                             & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2486             moveLineForwards( &ocacheL1->set[setno], line );
2487             line--;
2488          }
2489          return &ocacheL1->set[setno].line[line];
2490       }
2491    }
2492 
2493    /* A miss.  Use the last slot.  Implicitly this means we're
2494       ejecting the line in the last slot. */
2495    stats_ocacheL1_misses++;
2496    tl_assert(line == OC_LINES_PER_SET);
2497    line--;
2498    tl_assert(line > 0);
2499 
2500    /* First, move the to-be-ejected line to the L2 cache. */
2501    victim = &ocacheL1->set[setno].line[line];
2502    c = classify_OCacheLine(victim);
2503    switch (c) {
2504       case 'e':
2505          /* the line is empty (has invalid tag); ignore it. */
2506          break;
2507       case 'z':
2508          /* line contains zeroes.  We must ensure the backing store is
2509             updated accordingly, either by copying the line there
2510             verbatim, or by ensuring it isn't present there.  We
2511             chosse the latter on the basis that it reduces the size of
2512             the backing store. */
2513          ocacheL2_del_tag( victim->tag );
2514          break;
2515       case 'n':
2516          /* line contains at least one real, useful origin.  Copy it
2517             to the backing store. */
2518          stats_ocacheL1_lossage++;
2519          inL2 = ocacheL2_find_tag( victim->tag );
2520          if (inL2) {
2521             *inL2 = *victim;
2522          } else {
2523             ocacheL2_add_line( victim );
2524          }
2525          break;
2526       default:
2527          tl_assert(0);
2528    }
2529 
2530    /* Now we must reload the L1 cache from the backing tree, if
2531       possible. */
2532    tl_assert(tag != victim->tag); /* stay sane */
2533    inL2 = ocacheL2_find_tag( tag );
2534    if (inL2) {
2535       /* We're in luck.  It's in the L2. */
2536       ocacheL1->set[setno].line[line] = *inL2;
2537    } else {
2538       /* Missed at both levels of the cache hierarchy.  We have to
2539          declare it as full of zeroes (unknown origins). */
2540       stats__ocacheL2_misses++;
2541       zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2542    }
2543 
2544    /* Move it one forwards */
2545    moveLineForwards( &ocacheL1->set[setno], line );
2546    line--;
2547 
2548    return &ocacheL1->set[setno].line[line];
2549 }
2550 
find_OCacheLine(Addr a)2551 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2552 {
2553    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2554    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2555    UWord tag     = a & tagmask;
2556 
2557    stats_ocacheL1_find++;
2558 
2559    if (OC_ENABLE_ASSERTIONS) {
2560       tl_assert(setno >= 0 && setno < OC_N_SETS);
2561       tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2562    }
2563 
2564    if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2565       return &ocacheL1->set[setno].line[0];
2566    }
2567 
2568    return find_OCacheLine_SLOW( a );
2569 }
2570 
set_aligned_word64_Origin_to_undef(Addr a,UInt otag)2571 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2572 {
2573    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2574    //// Set the origins for a+0 .. a+7
2575    { OCacheLine* line;
2576      UWord lineoff = oc_line_offset(a);
2577      if (OC_ENABLE_ASSERTIONS) {
2578         tl_assert(lineoff >= 0
2579                   && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2580      }
2581      line = find_OCacheLine( a );
2582      line->descr[lineoff+0] = 0xF;
2583      line->descr[lineoff+1] = 0xF;
2584      line->w32[lineoff+0]   = otag;
2585      line->w32[lineoff+1]   = otag;
2586    }
2587    //// END inlined, specialised version of MC_(helperc_b_store8)
2588 }
2589 
2590 
2591 /*------------------------------------------------------------*/
2592 /*--- Aligned fast case permission setters,                ---*/
2593 /*--- for dealing with stacks                              ---*/
2594 /*------------------------------------------------------------*/
2595 
2596 /*--------------------- 32-bit ---------------------*/
2597 
2598 /* Nb: by "aligned" here we mean 4-byte aligned */
2599 
make_aligned_word32_undefined(Addr a)2600 static INLINE void make_aligned_word32_undefined ( Addr a )
2601 {
2602    PROF_EVENT(300, "make_aligned_word32_undefined");
2603 
2604 #ifndef PERF_FAST_STACK2
2605    make_mem_undefined(a, 4);
2606 #else
2607    {
2608       UWord   sm_off;
2609       SecMap* sm;
2610 
2611       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2612          PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
2613          make_mem_undefined(a, 4);
2614          return;
2615       }
2616 
2617       sm                  = get_secmap_for_writing_low(a);
2618       sm_off              = SM_OFF(a);
2619       sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2620    }
2621 #endif
2622 }
2623 
2624 static INLINE
make_aligned_word32_undefined_w_otag(Addr a,UInt otag)2625 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2626 {
2627    make_aligned_word32_undefined(a);
2628    //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2629    //// Set the origins for a+0 .. a+3
2630    { OCacheLine* line;
2631      UWord lineoff = oc_line_offset(a);
2632      if (OC_ENABLE_ASSERTIONS) {
2633         tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2634      }
2635      line = find_OCacheLine( a );
2636      line->descr[lineoff] = 0xF;
2637      line->w32[lineoff]   = otag;
2638    }
2639    //// END inlined, specialised version of MC_(helperc_b_store4)
2640 }
2641 
2642 static INLINE
make_aligned_word32_noaccess(Addr a)2643 void make_aligned_word32_noaccess ( Addr a )
2644 {
2645    PROF_EVENT(310, "make_aligned_word32_noaccess");
2646 
2647 #ifndef PERF_FAST_STACK2
2648    MC_(make_mem_noaccess)(a, 4);
2649 #else
2650    {
2651       UWord   sm_off;
2652       SecMap* sm;
2653 
2654       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2655          PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
2656          MC_(make_mem_noaccess)(a, 4);
2657          return;
2658       }
2659 
2660       sm                  = get_secmap_for_writing_low(a);
2661       sm_off              = SM_OFF(a);
2662       sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2663 
2664       //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2665       //// Set the origins for a+0 .. a+3.
2666       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2667          OCacheLine* line;
2668          UWord lineoff = oc_line_offset(a);
2669          if (OC_ENABLE_ASSERTIONS) {
2670             tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2671          }
2672          line = find_OCacheLine( a );
2673          line->descr[lineoff] = 0;
2674       }
2675       //// END inlined, specialised version of MC_(helperc_b_store4)
2676    }
2677 #endif
2678 }
2679 
2680 /*--------------------- 64-bit ---------------------*/
2681 
2682 /* Nb: by "aligned" here we mean 8-byte aligned */
2683 
make_aligned_word64_undefined(Addr a)2684 static INLINE void make_aligned_word64_undefined ( Addr a )
2685 {
2686    PROF_EVENT(320, "make_aligned_word64_undefined");
2687 
2688 #ifndef PERF_FAST_STACK2
2689    make_mem_undefined(a, 8);
2690 #else
2691    {
2692       UWord   sm_off16;
2693       SecMap* sm;
2694 
2695       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2696          PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
2697          make_mem_undefined(a, 8);
2698          return;
2699       }
2700 
2701       sm       = get_secmap_for_writing_low(a);
2702       sm_off16 = SM_OFF_16(a);
2703       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
2704    }
2705 #endif
2706 }
2707 
2708 static INLINE
make_aligned_word64_undefined_w_otag(Addr a,UInt otag)2709 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2710 {
2711    make_aligned_word64_undefined(a);
2712    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2713    //// Set the origins for a+0 .. a+7
2714    { OCacheLine* line;
2715      UWord lineoff = oc_line_offset(a);
2716      tl_assert(lineoff >= 0
2717                && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2718      line = find_OCacheLine( a );
2719      line->descr[lineoff+0] = 0xF;
2720      line->descr[lineoff+1] = 0xF;
2721      line->w32[lineoff+0]   = otag;
2722      line->w32[lineoff+1]   = otag;
2723    }
2724    //// END inlined, specialised version of MC_(helperc_b_store8)
2725 }
2726 
2727 static INLINE
make_aligned_word64_noaccess(Addr a)2728 void make_aligned_word64_noaccess ( Addr a )
2729 {
2730    PROF_EVENT(330, "make_aligned_word64_noaccess");
2731 
2732 #ifndef PERF_FAST_STACK2
2733    MC_(make_mem_noaccess)(a, 8);
2734 #else
2735    {
2736       UWord   sm_off16;
2737       SecMap* sm;
2738 
2739       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2740          PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
2741          MC_(make_mem_noaccess)(a, 8);
2742          return;
2743       }
2744 
2745       sm       = get_secmap_for_writing_low(a);
2746       sm_off16 = SM_OFF_16(a);
2747       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
2748 
2749       //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2750       //// Clear the origins for a+0 .. a+7.
2751       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2752          OCacheLine* line;
2753          UWord lineoff = oc_line_offset(a);
2754          tl_assert(lineoff >= 0
2755                    && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2756          line = find_OCacheLine( a );
2757          line->descr[lineoff+0] = 0;
2758          line->descr[lineoff+1] = 0;
2759       }
2760       //// END inlined, specialised version of MC_(helperc_b_store8)
2761    }
2762 #endif
2763 }
2764 
2765 
2766 /*------------------------------------------------------------*/
2767 /*--- Stack pointer adjustment                             ---*/
2768 /*------------------------------------------------------------*/
2769 
2770 #ifdef PERF_FAST_STACK
2771 #  define MAYBE_USED
2772 #else
2773 #  define MAYBE_USED __attribute__((unused))
2774 #endif
2775 
2776 /*--------------- adjustment by 4 bytes ---------------*/
2777 
2778 MAYBE_USED
mc_new_mem_stack_4_w_ECU(Addr new_SP,UInt ecu)2779 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2780 {
2781    UInt otag = ecu | MC_OKIND_STACK;
2782    PROF_EVENT(110, "new_mem_stack_4");
2783    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2784       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2785    } else {
2786       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2787    }
2788 }
2789 
2790 MAYBE_USED
mc_new_mem_stack_4(Addr new_SP)2791 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2792 {
2793    PROF_EVENT(110, "new_mem_stack_4");
2794    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2795       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2796    } else {
2797       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2798    }
2799 }
2800 
2801 MAYBE_USED
mc_die_mem_stack_4(Addr new_SP)2802 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2803 {
2804    PROF_EVENT(120, "die_mem_stack_4");
2805    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2806       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2807    } else {
2808       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2809    }
2810 }
2811 
2812 /*--------------- adjustment by 8 bytes ---------------*/
2813 
2814 MAYBE_USED
mc_new_mem_stack_8_w_ECU(Addr new_SP,UInt ecu)2815 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2816 {
2817    UInt otag = ecu | MC_OKIND_STACK;
2818    PROF_EVENT(111, "new_mem_stack_8");
2819    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2820       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2821    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2822       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2823       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2824    } else {
2825       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2826    }
2827 }
2828 
2829 MAYBE_USED
mc_new_mem_stack_8(Addr new_SP)2830 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2831 {
2832    PROF_EVENT(111, "new_mem_stack_8");
2833    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2834       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2835    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2836       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2837       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2838    } else {
2839       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2840    }
2841 }
2842 
2843 MAYBE_USED
mc_die_mem_stack_8(Addr new_SP)2844 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2845 {
2846    PROF_EVENT(121, "die_mem_stack_8");
2847    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2848       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2849    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2850       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2851       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2852    } else {
2853       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2854    }
2855 }
2856 
2857 /*--------------- adjustment by 12 bytes ---------------*/
2858 
2859 MAYBE_USED
mc_new_mem_stack_12_w_ECU(Addr new_SP,UInt ecu)2860 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2861 {
2862    UInt otag = ecu | MC_OKIND_STACK;
2863    PROF_EVENT(112, "new_mem_stack_12");
2864    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2865       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2866       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2867    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2868       /* from previous test we don't have 8-alignment at offset +0,
2869          hence must have 8 alignment at offsets +4/-4.  Hence safe to
2870          do 4 at +0 and then 8 at +4/. */
2871       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2872       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2873    } else {
2874       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2875    }
2876 }
2877 
2878 MAYBE_USED
mc_new_mem_stack_12(Addr new_SP)2879 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
2880 {
2881    PROF_EVENT(112, "new_mem_stack_12");
2882    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2883       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2884       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2885    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2886       /* from previous test we don't have 8-alignment at offset +0,
2887          hence must have 8 alignment at offsets +4/-4.  Hence safe to
2888          do 4 at +0 and then 8 at +4/. */
2889       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2890       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2891    } else {
2892       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
2893    }
2894 }
2895 
2896 MAYBE_USED
mc_die_mem_stack_12(Addr new_SP)2897 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
2898 {
2899    PROF_EVENT(122, "die_mem_stack_12");
2900    /* Note the -12 in the test */
2901    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
2902       /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2903          -4. */
2904       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2905       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2906    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2907       /* We have 4-alignment at +0, but we don't have 8-alignment at
2908          -12.  So we must have 8-alignment at -8.  Hence do 4 at -12
2909          and then 8 at -8. */
2910       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2911       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
2912    } else {
2913       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
2914    }
2915 }
2916 
2917 /*--------------- adjustment by 16 bytes ---------------*/
2918 
2919 MAYBE_USED
mc_new_mem_stack_16_w_ECU(Addr new_SP,UInt ecu)2920 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
2921 {
2922    UInt otag = ecu | MC_OKIND_STACK;
2923    PROF_EVENT(113, "new_mem_stack_16");
2924    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2925       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2926       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2927       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2928    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2929       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2930          Hence do 4 at +0, 8 at +4, 4 at +12. */
2931       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2932       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2933       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2934    } else {
2935       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
2936    }
2937 }
2938 
2939 MAYBE_USED
mc_new_mem_stack_16(Addr new_SP)2940 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
2941 {
2942    PROF_EVENT(113, "new_mem_stack_16");
2943    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2944       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2945       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2946       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2947    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2948       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2949          Hence do 4 at +0, 8 at +4, 4 at +12. */
2950       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2951       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4  );
2952       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2953    } else {
2954       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
2955    }
2956 }
2957 
2958 MAYBE_USED
mc_die_mem_stack_16(Addr new_SP)2959 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
2960 {
2961    PROF_EVENT(123, "die_mem_stack_16");
2962    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2963       /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
2964       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2965       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
2966    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2967       /* 8 alignment must be at -12.  Do 4 at -16, 8 at -12, 4 at -4. */
2968       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2969       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2970       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2971    } else {
2972       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
2973    }
2974 }
2975 
2976 /*--------------- adjustment by 32 bytes ---------------*/
2977 
2978 MAYBE_USED
mc_new_mem_stack_32_w_ECU(Addr new_SP,UInt ecu)2979 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
2980 {
2981    UInt otag = ecu | MC_OKIND_STACK;
2982    PROF_EVENT(114, "new_mem_stack_32");
2983    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2984       /* Straightforward */
2985       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2986       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2987       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2988       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2989    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2990       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
2991          +0,+28. */
2992       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2993       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2994       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2995       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
2996       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
2997    } else {
2998       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
2999    }
3000 }
3001 
3002 MAYBE_USED
mc_new_mem_stack_32(Addr new_SP)3003 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
3004 {
3005    PROF_EVENT(114, "new_mem_stack_32");
3006    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3007       /* Straightforward */
3008       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3009       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3010       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3011       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3012    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3013       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
3014          +0,+28. */
3015       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3016       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3017       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3018       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
3019       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
3020    } else {
3021       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
3022    }
3023 }
3024 
3025 MAYBE_USED
mc_die_mem_stack_32(Addr new_SP)3026 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
3027 {
3028    PROF_EVENT(124, "die_mem_stack_32");
3029    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3030       /* Straightforward */
3031       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3032       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3033       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3034       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3035    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3036       /* 8 alignment must be at -4 etc.  Hence do 8 at -12,-20,-28 and
3037          4 at -32,-4. */
3038       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3039       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
3040       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
3041       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3042       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
3043    } else {
3044       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
3045    }
3046 }
3047 
3048 /*--------------- adjustment by 112 bytes ---------------*/
3049 
3050 MAYBE_USED
mc_new_mem_stack_112_w_ECU(Addr new_SP,UInt ecu)3051 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
3052 {
3053    UInt otag = ecu | MC_OKIND_STACK;
3054    PROF_EVENT(115, "new_mem_stack_112");
3055    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3056       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
3057       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3058       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3059       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3060       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3061       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3062       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3063       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3064       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3065       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3066       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3067       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3068       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3069       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3070    } else {
3071       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
3072    }
3073 }
3074 
3075 MAYBE_USED
mc_new_mem_stack_112(Addr new_SP)3076 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
3077 {
3078    PROF_EVENT(115, "new_mem_stack_112");
3079    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3080       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3081       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3082       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3083       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3084       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3085       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3086       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3087       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3088       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3089       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3090       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3091       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3092       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3093       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3094    } else {
3095       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
3096    }
3097 }
3098 
3099 MAYBE_USED
mc_die_mem_stack_112(Addr new_SP)3100 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
3101 {
3102    PROF_EVENT(125, "die_mem_stack_112");
3103    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3104       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3105       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3106       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3107       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3108       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3109       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3110       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3111       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3112       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3113       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3114       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3115       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3116       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3117       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3118    } else {
3119       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
3120    }
3121 }
3122 
3123 /*--------------- adjustment by 128 bytes ---------------*/
3124 
3125 MAYBE_USED
mc_new_mem_stack_128_w_ECU(Addr new_SP,UInt ecu)3126 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
3127 {
3128    UInt otag = ecu | MC_OKIND_STACK;
3129    PROF_EVENT(116, "new_mem_stack_128");
3130    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3131       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
3132       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3133       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3134       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3135       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3136       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3137       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3138       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3139       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3140       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3141       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3142       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3143       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3144       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3145       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3146       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3147    } else {
3148       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
3149    }
3150 }
3151 
3152 MAYBE_USED
mc_new_mem_stack_128(Addr new_SP)3153 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
3154 {
3155    PROF_EVENT(116, "new_mem_stack_128");
3156    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3157       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3158       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3159       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3160       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3161       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3162       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3163       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3164       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3165       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3166       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3167       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3168       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3169       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3170       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3171       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3172       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3173    } else {
3174       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
3175    }
3176 }
3177 
3178 MAYBE_USED
mc_die_mem_stack_128(Addr new_SP)3179 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
3180 {
3181    PROF_EVENT(126, "die_mem_stack_128");
3182    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3183       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3184       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3185       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3186       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3187       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3188       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3189       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3190       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3191       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3192       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3193       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3194       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3195       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3196       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3197       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3198       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3199    } else {
3200       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
3201    }
3202 }
3203 
3204 /*--------------- adjustment by 144 bytes ---------------*/
3205 
3206 MAYBE_USED
mc_new_mem_stack_144_w_ECU(Addr new_SP,UInt ecu)3207 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
3208 {
3209    UInt otag = ecu | MC_OKIND_STACK;
3210    PROF_EVENT(117, "new_mem_stack_144");
3211    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3212       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
3213       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
3214       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
3215       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
3216       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
3217       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
3218       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
3219       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
3220       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
3221       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
3222       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
3223       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
3224       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
3225       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3226       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3227       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3228       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3229       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3230    } else {
3231       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3232    }
3233 }
3234 
3235 MAYBE_USED
mc_new_mem_stack_144(Addr new_SP)3236 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3237 {
3238    PROF_EVENT(117, "new_mem_stack_144");
3239    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3240       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3241       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3242       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3243       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3244       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3245       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3246       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3247       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3248       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3249       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3250       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3251       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3252       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3253       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3254       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3255       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3256       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3257       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3258    } else {
3259       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3260    }
3261 }
3262 
3263 MAYBE_USED
mc_die_mem_stack_144(Addr new_SP)3264 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3265 {
3266    PROF_EVENT(127, "die_mem_stack_144");
3267    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3268       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3269       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3270       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3271       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3272       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3273       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3274       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3275       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3276       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3277       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3278       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3279       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3280       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3281       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3282       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3283       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3284       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3285       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3286    } else {
3287       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3288    }
3289 }
3290 
3291 /*--------------- adjustment by 160 bytes ---------------*/
3292 
3293 MAYBE_USED
mc_new_mem_stack_160_w_ECU(Addr new_SP,UInt ecu)3294 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3295 {
3296    UInt otag = ecu | MC_OKIND_STACK;
3297    PROF_EVENT(118, "new_mem_stack_160");
3298    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3299       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
3300       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
3301       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
3302       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
3303       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
3304       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
3305       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
3306       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
3307       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
3308       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
3309       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
3310       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
3311       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
3312       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3313       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3314       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3315       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3316       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3317       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3318       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3319    } else {
3320       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3321    }
3322 }
3323 
3324 MAYBE_USED
mc_new_mem_stack_160(Addr new_SP)3325 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3326 {
3327    PROF_EVENT(118, "new_mem_stack_160");
3328    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3329       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3330       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3331       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3332       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3333       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3334       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3335       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3336       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3337       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3338       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3339       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3340       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3341       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3342       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3343       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3344       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3345       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3346       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3347       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3348       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3349    } else {
3350       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3351    }
3352 }
3353 
3354 MAYBE_USED
mc_die_mem_stack_160(Addr new_SP)3355 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3356 {
3357    PROF_EVENT(128, "die_mem_stack_160");
3358    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3359       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3360       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3361       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3362       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3363       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3364       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3365       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3366       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3367       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3368       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3369       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3370       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3371       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3372       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3373       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3374       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3375       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3376       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3377       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3378       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3379    } else {
3380       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3381    }
3382 }
3383 
3384 /*--------------- adjustment by N bytes ---------------*/
3385 
mc_new_mem_stack_w_ECU(Addr a,SizeT len,UInt ecu)3386 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3387 {
3388    UInt otag = ecu | MC_OKIND_STACK;
3389    PROF_EVENT(115, "new_mem_stack_w_otag");
3390    MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3391 }
3392 
mc_new_mem_stack(Addr a,SizeT len)3393 static void mc_new_mem_stack ( Addr a, SizeT len )
3394 {
3395    PROF_EVENT(115, "new_mem_stack");
3396    make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3397 }
3398 
mc_die_mem_stack(Addr a,SizeT len)3399 static void mc_die_mem_stack ( Addr a, SizeT len )
3400 {
3401    PROF_EVENT(125, "die_mem_stack");
3402    MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3403 }
3404 
3405 
3406 /* The AMD64 ABI says:
3407 
3408    "The 128-byte area beyond the location pointed to by %rsp is considered
3409     to be reserved and shall not be modified by signal or interrupt
3410     handlers.  Therefore, functions may use this area for temporary data
3411     that is not needed across function calls.  In particular, leaf functions
3412     may use this area for their entire stack frame, rather than adjusting
3413     the stack pointer in the prologue and epilogue.  This area is known as
3414     red zone [sic]."
3415 
3416    So after any call or return we need to mark this redzone as containing
3417    undefined values.
3418 
3419    Consider this:  we're in function f.  f calls g.  g moves rsp down
3420    modestly (say 16 bytes) and writes stuff all over the red zone, making it
3421    defined.  g returns.  f is buggy and reads from parts of the red zone
3422    that it didn't write on.  But because g filled that area in, f is going
3423    to be picking up defined V bits and so any errors from reading bits of
3424    the red zone it didn't write, will be missed.  The only solution I could
3425    think of was to make the red zone undefined when g returns to f.
3426 
3427    This is in accordance with the ABI, which makes it clear the redzone
3428    is volatile across function calls.
3429 
3430    The problem occurs the other way round too: f could fill the RZ up
3431    with defined values and g could mistakenly read them.  So the RZ
3432    also needs to be nuked on function calls.
3433 */
3434 
3435 
3436 /* Here's a simple cache to hold nia -> ECU mappings.  It could be
3437    improved so as to have a lower miss rate. */
3438 
3439 static UWord stats__nia_cache_queries = 0;
3440 static UWord stats__nia_cache_misses  = 0;
3441 
3442 typedef
3443    struct { UWord nia0; UWord ecu0;   /* nia0 maps to ecu0 */
3444             UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3445    WCacheEnt;
3446 
3447 #define N_NIA_TO_ECU_CACHE 511
3448 
3449 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3450 
init_nia_to_ecu_cache(void)3451 static void init_nia_to_ecu_cache ( void )
3452 {
3453    UWord       i;
3454    Addr        zero_addr = 0;
3455    ExeContext* zero_ec;
3456    UInt        zero_ecu;
3457    /* Fill all the slots with an entry for address zero, and the
3458       relevant otags accordingly.  Hence the cache is initially filled
3459       with valid data. */
3460    zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3461    tl_assert(zero_ec);
3462    zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3463    tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3464    for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3465       nia_to_ecu_cache[i].nia0 = zero_addr;
3466       nia_to_ecu_cache[i].ecu0 = zero_ecu;
3467       nia_to_ecu_cache[i].nia1 = zero_addr;
3468       nia_to_ecu_cache[i].ecu1 = zero_ecu;
3469    }
3470 }
3471 
convert_nia_to_ecu(Addr nia)3472 static inline UInt convert_nia_to_ecu ( Addr nia )
3473 {
3474    UWord i;
3475    UInt        ecu;
3476    ExeContext* ec;
3477 
3478    tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3479 
3480    stats__nia_cache_queries++;
3481    i = nia % N_NIA_TO_ECU_CACHE;
3482    tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3483 
3484    if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3485       return nia_to_ecu_cache[i].ecu0;
3486 
3487    if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3488 #     define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3489       SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3490       SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3491 #     undef SWAP
3492       return nia_to_ecu_cache[i].ecu0;
3493    }
3494 
3495    stats__nia_cache_misses++;
3496    ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3497    tl_assert(ec);
3498    ecu = VG_(get_ECU_from_ExeContext)(ec);
3499    tl_assert(VG_(is_plausible_ECU)(ecu));
3500 
3501    nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3502    nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3503 
3504    nia_to_ecu_cache[i].nia0 = nia;
3505    nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3506    return ecu;
3507 }
3508 
3509 
3510 /* Note that this serves both the origin-tracking and
3511    no-origin-tracking modes.  We assume that calls to it are
3512    sufficiently infrequent that it isn't worth specialising for the
3513    with/without origin-tracking cases. */
MC_(helperc_MAKE_STACK_UNINIT)3514 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
3515 {
3516    UInt otag;
3517    tl_assert(sizeof(UWord) == sizeof(SizeT));
3518    if (0)
3519       VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
3520                   base, len, nia );
3521 
3522    if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3523       UInt ecu = convert_nia_to_ecu ( nia );
3524       tl_assert(VG_(is_plausible_ECU)(ecu));
3525       otag = ecu | MC_OKIND_STACK;
3526    } else {
3527       tl_assert(nia == 0);
3528       otag = 0;
3529    }
3530 
3531 #  if 0
3532    /* Really slow version */
3533    MC_(make_mem_undefined)(base, len, otag);
3534 #  endif
3535 
3536 #  if 0
3537    /* Slow(ish) version, which is fairly easily seen to be correct.
3538    */
3539    if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3540       make_aligned_word64_undefined(base +   0, otag);
3541       make_aligned_word64_undefined(base +   8, otag);
3542       make_aligned_word64_undefined(base +  16, otag);
3543       make_aligned_word64_undefined(base +  24, otag);
3544 
3545       make_aligned_word64_undefined(base +  32, otag);
3546       make_aligned_word64_undefined(base +  40, otag);
3547       make_aligned_word64_undefined(base +  48, otag);
3548       make_aligned_word64_undefined(base +  56, otag);
3549 
3550       make_aligned_word64_undefined(base +  64, otag);
3551       make_aligned_word64_undefined(base +  72, otag);
3552       make_aligned_word64_undefined(base +  80, otag);
3553       make_aligned_word64_undefined(base +  88, otag);
3554 
3555       make_aligned_word64_undefined(base +  96, otag);
3556       make_aligned_word64_undefined(base + 104, otag);
3557       make_aligned_word64_undefined(base + 112, otag);
3558       make_aligned_word64_undefined(base + 120, otag);
3559    } else {
3560       MC_(make_mem_undefined)(base, len, otag);
3561    }
3562 #  endif
3563 
3564    /* Idea is: go fast when
3565          * 8-aligned and length is 128
3566          * the sm is available in the main primary map
3567          * the address range falls entirely with a single secondary map
3568       If all those conditions hold, just update the V+A bits by writing
3569       directly into the vabits array.  (If the sm was distinguished, this
3570       will make a copy and then write to it.)
3571    */
3572 
3573    if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3574       /* Now we know the address range is suitably sized and aligned. */
3575       UWord a_lo = (UWord)(base);
3576       UWord a_hi = (UWord)(base + 128 - 1);
3577       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3578       if (a_hi <= MAX_PRIMARY_ADDRESS) {
3579          // Now we know the entire range is within the main primary map.
3580          SecMap* sm    = get_secmap_for_writing_low(a_lo);
3581          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3582          /* Now we know that the entire address range falls within a
3583             single secondary map, and that that secondary 'lives' in
3584             the main primary map. */
3585          if (LIKELY(sm == sm_hi)) {
3586             // Finally, we know that the range is entirely within one secmap.
3587             UWord   v_off = SM_OFF(a_lo);
3588             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
3589             p[ 0] = VA_BITS16_UNDEFINED;
3590             p[ 1] = VA_BITS16_UNDEFINED;
3591             p[ 2] = VA_BITS16_UNDEFINED;
3592             p[ 3] = VA_BITS16_UNDEFINED;
3593             p[ 4] = VA_BITS16_UNDEFINED;
3594             p[ 5] = VA_BITS16_UNDEFINED;
3595             p[ 6] = VA_BITS16_UNDEFINED;
3596             p[ 7] = VA_BITS16_UNDEFINED;
3597             p[ 8] = VA_BITS16_UNDEFINED;
3598             p[ 9] = VA_BITS16_UNDEFINED;
3599             p[10] = VA_BITS16_UNDEFINED;
3600             p[11] = VA_BITS16_UNDEFINED;
3601             p[12] = VA_BITS16_UNDEFINED;
3602             p[13] = VA_BITS16_UNDEFINED;
3603             p[14] = VA_BITS16_UNDEFINED;
3604             p[15] = VA_BITS16_UNDEFINED;
3605             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3606                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3607                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3608                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3609                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3610                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3611                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3612                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3613                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3614                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3615                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3616                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3617                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3618                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3619                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3620                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3621                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3622             }
3623             return;
3624          }
3625       }
3626    }
3627 
3628    /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3629    if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3630       /* Now we know the address range is suitably sized and aligned. */
3631       UWord a_lo = (UWord)(base);
3632       UWord a_hi = (UWord)(base + 288 - 1);
3633       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3634       if (a_hi <= MAX_PRIMARY_ADDRESS) {
3635          // Now we know the entire range is within the main primary map.
3636          SecMap* sm    = get_secmap_for_writing_low(a_lo);
3637          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3638          /* Now we know that the entire address range falls within a
3639             single secondary map, and that that secondary 'lives' in
3640             the main primary map. */
3641          if (LIKELY(sm == sm_hi)) {
3642             // Finally, we know that the range is entirely within one secmap.
3643             UWord   v_off = SM_OFF(a_lo);
3644             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
3645             p[ 0] = VA_BITS16_UNDEFINED;
3646             p[ 1] = VA_BITS16_UNDEFINED;
3647             p[ 2] = VA_BITS16_UNDEFINED;
3648             p[ 3] = VA_BITS16_UNDEFINED;
3649             p[ 4] = VA_BITS16_UNDEFINED;
3650             p[ 5] = VA_BITS16_UNDEFINED;
3651             p[ 6] = VA_BITS16_UNDEFINED;
3652             p[ 7] = VA_BITS16_UNDEFINED;
3653             p[ 8] = VA_BITS16_UNDEFINED;
3654             p[ 9] = VA_BITS16_UNDEFINED;
3655             p[10] = VA_BITS16_UNDEFINED;
3656             p[11] = VA_BITS16_UNDEFINED;
3657             p[12] = VA_BITS16_UNDEFINED;
3658             p[13] = VA_BITS16_UNDEFINED;
3659             p[14] = VA_BITS16_UNDEFINED;
3660             p[15] = VA_BITS16_UNDEFINED;
3661             p[16] = VA_BITS16_UNDEFINED;
3662             p[17] = VA_BITS16_UNDEFINED;
3663             p[18] = VA_BITS16_UNDEFINED;
3664             p[19] = VA_BITS16_UNDEFINED;
3665             p[20] = VA_BITS16_UNDEFINED;
3666             p[21] = VA_BITS16_UNDEFINED;
3667             p[22] = VA_BITS16_UNDEFINED;
3668             p[23] = VA_BITS16_UNDEFINED;
3669             p[24] = VA_BITS16_UNDEFINED;
3670             p[25] = VA_BITS16_UNDEFINED;
3671             p[26] = VA_BITS16_UNDEFINED;
3672             p[27] = VA_BITS16_UNDEFINED;
3673             p[28] = VA_BITS16_UNDEFINED;
3674             p[29] = VA_BITS16_UNDEFINED;
3675             p[30] = VA_BITS16_UNDEFINED;
3676             p[31] = VA_BITS16_UNDEFINED;
3677             p[32] = VA_BITS16_UNDEFINED;
3678             p[33] = VA_BITS16_UNDEFINED;
3679             p[34] = VA_BITS16_UNDEFINED;
3680             p[35] = VA_BITS16_UNDEFINED;
3681             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3682                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3683                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3684                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3685                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3686                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3687                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3688                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3689                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3690                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3691                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3692                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3693                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3694                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3695                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3696                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3697                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3698                set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3699                set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3700                set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3701                set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3702                set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3703                set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3704                set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3705                set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3706                set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3707                set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3708                set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3709                set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3710                set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3711                set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3712                set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3713                set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3714                set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3715                set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3716                set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3717                set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3718             }
3719             return;
3720          }
3721       }
3722    }
3723 
3724    /* else fall into slow case */
3725    MC_(make_mem_undefined_w_otag)(base, len, otag);
3726 }
3727 
3728 
3729 /*------------------------------------------------------------*/
3730 /*--- Checking memory                                      ---*/
3731 /*------------------------------------------------------------*/
3732 
3733 typedef
3734    enum {
3735       MC_Ok = 5,
3736       MC_AddrErr = 6,
3737       MC_ValueErr = 7
3738    }
3739    MC_ReadResult;
3740 
3741 
3742 /* Check permissions for address range.  If inadequate permissions
3743    exist, *bad_addr is set to the offending address, so the caller can
3744    know what it is. */
3745 
3746 /* Returns True if [a .. a+len) is not addressible.  Otherwise,
3747    returns False, and if bad_addr is non-NULL, sets *bad_addr to
3748    indicate the lowest failing address.  Functions below are
3749    similar. */
MC_(check_mem_is_noaccess)3750 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
3751 {
3752    SizeT i;
3753    UWord vabits2;
3754 
3755    PROF_EVENT(60, "check_mem_is_noaccess");
3756    for (i = 0; i < len; i++) {
3757       PROF_EVENT(61, "check_mem_is_noaccess(loop)");
3758       vabits2 = get_vabits2(a);
3759       if (VA_BITS2_NOACCESS != vabits2) {
3760          if (bad_addr != NULL) *bad_addr = a;
3761          return False;
3762       }
3763       a++;
3764    }
3765    return True;
3766 }
3767 
is_mem_addressable(Addr a,SizeT len,Addr * bad_addr)3768 static Bool is_mem_addressable ( Addr a, SizeT len,
3769                                  /*OUT*/Addr* bad_addr )
3770 {
3771    SizeT i;
3772    UWord vabits2;
3773 
3774    PROF_EVENT(62, "is_mem_addressable");
3775    for (i = 0; i < len; i++) {
3776       PROF_EVENT(63, "is_mem_addressable(loop)");
3777       vabits2 = get_vabits2(a);
3778       if (VA_BITS2_NOACCESS == vabits2) {
3779          if (bad_addr != NULL) *bad_addr = a;
3780          return False;
3781       }
3782       a++;
3783    }
3784    return True;
3785 }
3786 
is_mem_defined(Addr a,SizeT len,Addr * bad_addr,UInt * otag)3787 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
3788                                       /*OUT*/Addr* bad_addr,
3789                                       /*OUT*/UInt* otag )
3790 {
3791    SizeT i;
3792    UWord vabits2;
3793 
3794    PROF_EVENT(64, "is_mem_defined");
3795    DEBUG("is_mem_defined\n");
3796 
3797    if (otag)     *otag = 0;
3798    if (bad_addr) *bad_addr = 0;
3799    for (i = 0; i < len; i++) {
3800       PROF_EVENT(65, "is_mem_defined(loop)");
3801       vabits2 = get_vabits2(a);
3802       if (VA_BITS2_DEFINED != vabits2) {
3803          // Error!  Nb: Report addressability errors in preference to
3804          // definedness errors.  And don't report definedeness errors unless
3805          // --undef-value-errors=yes.
3806          if (bad_addr) {
3807             *bad_addr = a;
3808          }
3809          if (VA_BITS2_NOACCESS == vabits2) {
3810             return MC_AddrErr;
3811          }
3812          if (MC_(clo_mc_level) >= 2) {
3813             if (otag && MC_(clo_mc_level) == 3) {
3814                *otag = MC_(helperc_b_load1)( a );
3815             }
3816             return MC_ValueErr;
3817          }
3818       }
3819       a++;
3820    }
3821    return MC_Ok;
3822 }
3823 
3824 
3825 /* Like is_mem_defined but doesn't give up at the first uninitialised
3826    byte -- the entire range is always checked.  This is important for
3827    detecting errors in the case where a checked range strays into
3828    invalid memory, but that fact is not detected by the ordinary
3829    is_mem_defined(), because of an undefined section that precedes the
3830    out of range section, possibly as a result of an alignment hole in
3831    the checked data.  This version always checks the entire range and
3832    can report both a definedness and an accessbility error, if
3833    necessary. */
is_mem_defined_comprehensive(Addr a,SizeT len,Bool * errorV,Addr * bad_addrV,UInt * otagV,Bool * errorA,Addr * bad_addrA)3834 static void is_mem_defined_comprehensive (
3835                Addr a, SizeT len,
3836                /*OUT*/Bool* errorV,    /* is there a definedness err? */
3837                /*OUT*/Addr* bad_addrV, /* if so where? */
3838                /*OUT*/UInt* otagV,     /* and what's its otag? */
3839                /*OUT*/Bool* errorA,    /* is there an addressability err? */
3840                /*OUT*/Addr* bad_addrA  /* if so where? */
3841             )
3842 {
3843    SizeT i;
3844    UWord vabits2;
3845    Bool  already_saw_errV = False;
3846 
3847    PROF_EVENT(64, "is_mem_defined"); // fixme
3848    DEBUG("is_mem_defined_comprehensive\n");
3849 
3850    tl_assert(!(*errorV || *errorA));
3851 
3852    for (i = 0; i < len; i++) {
3853       PROF_EVENT(65, "is_mem_defined(loop)"); // fixme
3854       vabits2 = get_vabits2(a);
3855       switch (vabits2) {
3856          case VA_BITS2_DEFINED:
3857             a++;
3858             break;
3859          case VA_BITS2_UNDEFINED:
3860          case VA_BITS2_PARTDEFINED:
3861             if (!already_saw_errV) {
3862                *errorV    = True;
3863                *bad_addrV = a;
3864                if (MC_(clo_mc_level) == 3) {
3865                   *otagV = MC_(helperc_b_load1)( a );
3866                } else {
3867                   *otagV = 0;
3868                }
3869                already_saw_errV = True;
3870             }
3871             a++; /* keep going */
3872             break;
3873          case VA_BITS2_NOACCESS:
3874             *errorA    = True;
3875             *bad_addrA = a;
3876             return; /* give up now. */
3877          default:
3878             tl_assert(0);
3879       }
3880    }
3881 }
3882 
3883 
3884 /* Check a zero-terminated ascii string.  Tricky -- don't want to
3885    examine the actual bytes, to find the end, until we're sure it is
3886    safe to do so. */
3887 
mc_is_defined_asciiz(Addr a,Addr * bad_addr,UInt * otag)3888 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
3889 {
3890    UWord vabits2;
3891 
3892    PROF_EVENT(66, "mc_is_defined_asciiz");
3893    DEBUG("mc_is_defined_asciiz\n");
3894 
3895    if (otag)     *otag = 0;
3896    if (bad_addr) *bad_addr = 0;
3897    while (True) {
3898       PROF_EVENT(67, "mc_is_defined_asciiz(loop)");
3899       vabits2 = get_vabits2(a);
3900       if (VA_BITS2_DEFINED != vabits2) {
3901          // Error!  Nb: Report addressability errors in preference to
3902          // definedness errors.  And don't report definedeness errors unless
3903          // --undef-value-errors=yes.
3904          if (bad_addr) {
3905             *bad_addr = a;
3906          }
3907          if (VA_BITS2_NOACCESS == vabits2) {
3908             return MC_AddrErr;
3909          }
3910          if (MC_(clo_mc_level) >= 2) {
3911             if (otag && MC_(clo_mc_level) == 3) {
3912                *otag = MC_(helperc_b_load1)( a );
3913             }
3914             return MC_ValueErr;
3915          }
3916       }
3917       /* Ok, a is safe to read. */
3918       if (* ((UChar*)a) == 0) {
3919          return MC_Ok;
3920       }
3921       a++;
3922    }
3923 }
3924 
3925 
3926 /*------------------------------------------------------------*/
3927 /*--- Memory event handlers                                ---*/
3928 /*------------------------------------------------------------*/
3929 
3930 static
check_mem_is_addressable(CorePart part,ThreadId tid,const HChar * s,Addr base,SizeT size)3931 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
3932                                 Addr base, SizeT size )
3933 {
3934    Addr bad_addr;
3935    Bool ok = is_mem_addressable ( base, size, &bad_addr );
3936 
3937    if (!ok) {
3938       switch (part) {
3939       case Vg_CoreSysCall:
3940          MC_(record_memparam_error) ( tid, bad_addr,
3941                                       /*isAddrErr*/True, s, 0/*otag*/ );
3942          break;
3943 
3944       case Vg_CoreSignal:
3945          MC_(record_core_mem_error)( tid, s );
3946          break;
3947 
3948       default:
3949          VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
3950       }
3951    }
3952 }
3953 
3954 static
check_mem_is_defined(CorePart part,ThreadId tid,const HChar * s,Addr base,SizeT size)3955 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
3956                             Addr base, SizeT size )
3957 {
3958    UInt otag = 0;
3959    Addr bad_addr;
3960    MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
3961 
3962    if (MC_Ok != res) {
3963       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3964 
3965       switch (part) {
3966       case Vg_CoreSysCall:
3967          MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3968                                       isAddrErr ? 0 : otag );
3969          break;
3970 
3971       case Vg_CoreSysCallArgInMem:
3972          MC_(record_regparam_error) ( tid, s, otag );
3973          break;
3974 
3975       /* If we're being asked to jump to a silly address, record an error
3976          message before potentially crashing the entire system. */
3977       case Vg_CoreTranslate:
3978          MC_(record_jump_error)( tid, bad_addr );
3979          break;
3980 
3981       default:
3982          VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
3983       }
3984    }
3985 }
3986 
3987 static
check_mem_is_defined_asciiz(CorePart part,ThreadId tid,const HChar * s,Addr str)3988 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
3989                                    const HChar* s, Addr str )
3990 {
3991    MC_ReadResult res;
3992    Addr bad_addr = 0;   // shut GCC up
3993    UInt otag = 0;
3994 
3995    tl_assert(part == Vg_CoreSysCall);
3996    res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
3997    if (MC_Ok != res) {
3998       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3999       MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4000                                    isAddrErr ? 0 : otag );
4001    }
4002 }
4003 
4004 /* Handling of mmap and mprotect is not as simple as it seems.
4005 
4006    The underlying semantics are that memory obtained from mmap is
4007    always initialised, but may be inaccessible.  And changes to the
4008    protection of memory do not change its contents and hence not its
4009    definedness state.  Problem is we can't model
4010    inaccessible-but-with-some-definedness state; once we mark memory
4011    as inaccessible we lose all info about definedness, and so can't
4012    restore that if it is later made accessible again.
4013 
4014    One obvious thing to do is this:
4015 
4016       mmap/mprotect NONE  -> noaccess
4017       mmap/mprotect other -> defined
4018 
4019    The problem case here is: taking accessible memory, writing
4020    uninitialised data to it, mprotecting it NONE and later mprotecting
4021    it back to some accessible state causes the undefinedness to be
4022    lost.
4023 
4024    A better proposal is:
4025 
4026      (1) mmap NONE       ->  make noaccess
4027      (2) mmap other      ->  make defined
4028 
4029      (3) mprotect NONE   ->  # no change
4030      (4) mprotect other  ->  change any "noaccess" to "defined"
4031 
4032    (2) is OK because memory newly obtained from mmap really is defined
4033        (zeroed out by the kernel -- doing anything else would
4034        constitute a massive security hole.)
4035 
4036    (1) is OK because the only way to make the memory usable is via
4037        (4), in which case we also wind up correctly marking it all as
4038        defined.
4039 
4040    (3) is the weak case.  We choose not to change memory state.
4041        (presumably the range is in some mixture of "defined" and
4042        "undefined", viz, accessible but with arbitrary V bits).  Doing
4043        nothing means we retain the V bits, so that if the memory is
4044        later mprotected "other", the V bits remain unchanged, so there
4045        can be no false negatives.  The bad effect is that if there's
4046        an access in the area, then MC cannot warn; but at least we'll
4047        get a SEGV to show, so it's better than nothing.
4048 
4049    Consider the sequence (3) followed by (4).  Any memory that was
4050    "defined" or "undefined" previously retains its state (as
4051    required).  Any memory that was "noaccess" before can only have
4052    been made that way by (1), and so it's OK to change it to
4053    "defined".
4054 
4055    See https://bugs.kde.org/show_bug.cgi?id=205541
4056    and https://bugs.kde.org/show_bug.cgi?id=210268
4057 */
4058 static
mc_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)4059 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
4060                        ULong di_handle )
4061 {
4062    if (rr || ww || xx) {
4063       /* (2) mmap/mprotect other -> defined */
4064       MC_(make_mem_defined)(a, len);
4065    } else {
4066       /* (1) mmap/mprotect NONE  -> noaccess */
4067       MC_(make_mem_noaccess)(a, len);
4068    }
4069 }
4070 
4071 static
mc_new_mem_mprotect(Addr a,SizeT len,Bool rr,Bool ww,Bool xx)4072 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
4073 {
4074    if (rr || ww || xx) {
4075       /* (4) mprotect other  ->  change any "noaccess" to "defined" */
4076       make_mem_defined_if_noaccess(a, len);
4077    } else {
4078       /* (3) mprotect NONE   ->  # no change */
4079       /* do nothing */
4080    }
4081 }
4082 
4083 
4084 static
mc_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)4085 void mc_new_mem_startup( Addr a, SizeT len,
4086                          Bool rr, Bool ww, Bool xx, ULong di_handle )
4087 {
4088    // Because code is defined, initialised variables get put in the data
4089    // segment and are defined, and uninitialised variables get put in the
4090    // bss segment and are auto-zeroed (and so defined).
4091    //
4092    // It's possible that there will be padding between global variables.
4093    // This will also be auto-zeroed, and marked as defined by Memcheck.  If
4094    // a program uses it, Memcheck will not complain.  This is arguably a
4095    // false negative, but it's a grey area -- the behaviour is defined (the
4096    // padding is zeroed) but it's probably not what the user intended.  And
4097    // we can't avoid it.
4098    //
4099    // Note: we generally ignore RWX permissions, because we can't track them
4100    // without requiring more than one A bit which would slow things down a
4101    // lot.  But on Darwin the 0th page is mapped but !R and !W and !X.
4102    // So we mark any such pages as "unaddressable".
4103    DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4104          a, (ULong)len, rr, ww, xx);
4105    mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
4106 }
4107 
4108 static
mc_post_mem_write(CorePart part,ThreadId tid,Addr a,SizeT len)4109 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
4110 {
4111    MC_(make_mem_defined)(a, len);
4112 }
4113 
4114 
4115 /*------------------------------------------------------------*/
4116 /*--- Register event handlers                              ---*/
4117 /*------------------------------------------------------------*/
4118 
4119 /* Try and get a nonzero origin for the guest state section of thread
4120    tid characterised by (offset,size).  Return 0 if nothing to show
4121    for it. */
mb_get_origin_for_guest_offset(ThreadId tid,Int offset,SizeT size)4122 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
4123                                              Int offset, SizeT size )
4124 {
4125    Int   sh2off;
4126    UInt  area[3];
4127    UInt  otag;
4128    sh2off = MC_(get_otrack_shadow_offset)( offset, size );
4129    if (sh2off == -1)
4130       return 0;  /* This piece of guest state is not tracked */
4131    tl_assert(sh2off >= 0);
4132    tl_assert(0 == (sh2off % 4));
4133    area[0] = 0x31313131;
4134    area[2] = 0x27272727;
4135    VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
4136    tl_assert(area[0] == 0x31313131);
4137    tl_assert(area[2] == 0x27272727);
4138    otag = area[1];
4139    return otag;
4140 }
4141 
4142 
4143 /* When some chunk of guest state is written, mark the corresponding
4144    shadow area as valid.  This is used to initialise arbitrarily large
4145    chunks of guest state, hence the _SIZE value, which has to be as
4146    big as the biggest guest state.
4147 */
mc_post_reg_write(CorePart part,ThreadId tid,PtrdiffT offset,SizeT size)4148 static void mc_post_reg_write ( CorePart part, ThreadId tid,
4149                                 PtrdiffT offset, SizeT size)
4150 {
4151 #  define MAX_REG_WRITE_SIZE 1696
4152    UChar area[MAX_REG_WRITE_SIZE];
4153    tl_assert(size <= MAX_REG_WRITE_SIZE);
4154    VG_(memset)(area, V_BITS8_DEFINED, size);
4155    VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
4156 #  undef MAX_REG_WRITE_SIZE
4157 }
4158 
4159 static
mc_post_reg_write_clientcall(ThreadId tid,PtrdiffT offset,SizeT size,Addr f)4160 void mc_post_reg_write_clientcall ( ThreadId tid,
4161                                     PtrdiffT offset, SizeT size, Addr f)
4162 {
4163    mc_post_reg_write(/*dummy*/0, tid, offset, size);
4164 }
4165 
4166 /* Look at the definedness of the guest's shadow state for
4167    [offset, offset+len).  If any part of that is undefined, record
4168    a parameter error.
4169 */
mc_pre_reg_read(CorePart part,ThreadId tid,const HChar * s,PtrdiffT offset,SizeT size)4170 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
4171                               PtrdiffT offset, SizeT size)
4172 {
4173    Int   i;
4174    Bool  bad;
4175    UInt  otag;
4176 
4177    UChar area[16];
4178    tl_assert(size <= 16);
4179 
4180    VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
4181 
4182    bad = False;
4183    for (i = 0; i < size; i++) {
4184       if (area[i] != V_BITS8_DEFINED) {
4185          bad = True;
4186          break;
4187       }
4188    }
4189 
4190    if (!bad)
4191       return;
4192 
4193    /* We've found some undefinedness.  See if we can also find an
4194       origin for it. */
4195    otag = mb_get_origin_for_guest_offset( tid, offset, size );
4196    MC_(record_regparam_error) ( tid, s, otag );
4197 }
4198 
4199 
4200 /*------------------------------------------------------------*/
4201 /*--- Functions called directly from generated code:       ---*/
4202 /*--- Load/store handlers.                                 ---*/
4203 /*------------------------------------------------------------*/
4204 
4205 /* Types:  LOADV32, LOADV16, LOADV8 are:
4206                UWord fn ( Addr a )
4207    so they return 32-bits on 32-bit machines and 64-bits on
4208    64-bit machines.  Addr has the same size as a host word.
4209 
4210    LOADV64 is always  ULong fn ( Addr a )
4211 
4212    Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4213    are a UWord, and for STOREV64 they are a ULong.
4214 */
4215 
4216 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4217    naturally '_sz/8'-aligned, or it exceeds the range covered by the
4218    primary map.  This is all very tricky (and important!), so let's
4219    work through the maths by hand (below), *and* assert for these
4220    values at startup. */
4221 #define MASK(_szInBytes) \
4222    ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4223 
4224 /* MASK only exists so as to define this macro. */
4225 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4226    ((_a) & MASK((_szInBits>>3)))
4227 
4228 /* On a 32-bit machine:
4229 
4230    N_PRIMARY_BITS          == 16, so
4231    N_PRIMARY_MAP           == 0x10000, so
4232    N_PRIMARY_MAP-1         == 0xFFFF, so
4233    (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4234 
4235    MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4236            = ~ ( 0xFFFF | 0xFFFF0000 )
4237            = ~ 0xFFFF'FFFF
4238            = 0
4239 
4240    MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4241            = ~ ( 0xFFFE | 0xFFFF0000 )
4242            = ~ 0xFFFF'FFFE
4243            = 1
4244 
4245    MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4246            = ~ ( 0xFFFC | 0xFFFF0000 )
4247            = ~ 0xFFFF'FFFC
4248            = 3
4249 
4250    MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4251            = ~ ( 0xFFF8 | 0xFFFF0000 )
4252            = ~ 0xFFFF'FFF8
4253            = 7
4254 
4255    Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4256    precisely when a is not 1/2/4/8-bytes aligned.  And obviously, for
4257    the 1-byte alignment case, it is always a zero value, since MASK(1)
4258    is zero.  All as expected.
4259 
4260    On a 64-bit machine, it's more complex, since we're testing
4261    simultaneously for misalignment and for the address being at or
4262    above 64G:
4263 
4264    N_PRIMARY_BITS          == 20, so
4265    N_PRIMARY_MAP           == 0x100000, so
4266    N_PRIMARY_MAP-1         == 0xFFFFF, so
4267    (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4268 
4269    MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4270            = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4271            = ~ 0xF'FFFF'FFFF
4272            = 0xFFFF'FFF0'0000'0000
4273 
4274    MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4275            = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4276            = ~ 0xF'FFFF'FFFE
4277            = 0xFFFF'FFF0'0000'0001
4278 
4279    MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4280            = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4281            = ~ 0xF'FFFF'FFFC
4282            = 0xFFFF'FFF0'0000'0003
4283 
4284    MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4285            = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4286            = ~ 0xF'FFFF'FFF8
4287            = 0xFFFF'FFF0'0000'0007
4288 */
4289 
4290 
4291 /* ------------------------ Size = 16 ------------------------ */
4292 
4293 static INLINE
mc_LOADV_128_or_256(ULong * res,Addr a,SizeT nBits,Bool isBigEndian)4294 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
4295                            Addr a, SizeT nBits, Bool isBigEndian )
4296 {
4297    PROF_EVENT(200, "mc_LOADV_128_or_256");
4298 
4299 #ifndef PERF_FAST_LOADV
4300    mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4301    return;
4302 #else
4303    {
4304       UWord   sm_off16, vabits16, j;
4305       UWord   nBytes  = nBits / 8;
4306       UWord   nULongs = nBytes / 8;
4307       SecMap* sm;
4308 
4309       if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
4310          PROF_EVENT(201, "mc_LOADV_128_or_256-slow1");
4311          mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4312          return;
4313       }
4314 
4315       /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4316          suitably aligned, is mapped, and addressible. */
4317       for (j = 0; j < nULongs; j++) {
4318          sm       = get_secmap_for_reading_low(a + 8*j);
4319          sm_off16 = SM_OFF_16(a + 8*j);
4320          vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4321 
4322          // Convert V bits from compact memory form to expanded
4323          // register form.
4324          if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4325             res[j] = V_BITS64_DEFINED;
4326          } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4327             res[j] = V_BITS64_UNDEFINED;
4328          } else {
4329             /* Slow case: some block of 8 bytes are not all-defined or
4330                all-undefined. */
4331             PROF_EVENT(202, "mc_LOADV_128_or_256-slow2");
4332             mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4333             return;
4334          }
4335       }
4336       return;
4337    }
4338 #endif
4339 }
4340 
MC_(helperc_LOADV256be)4341 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
4342 {
4343    mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
4344 }
MC_(helperc_LOADV256le)4345 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
4346 {
4347    mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
4348 }
4349 
MC_(helperc_LOADV128be)4350 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
4351 {
4352    mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
4353 }
MC_(helperc_LOADV128le)4354 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
4355 {
4356    mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
4357 }
4358 
4359 /* ------------------------ Size = 8 ------------------------ */
4360 
4361 static INLINE
mc_LOADV64(Addr a,Bool isBigEndian)4362 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4363 {
4364    PROF_EVENT(200, "mc_LOADV64");
4365 
4366 #ifndef PERF_FAST_LOADV
4367    return mc_LOADVn_slow( a, 64, isBigEndian );
4368 #else
4369    {
4370       UWord   sm_off16, vabits16;
4371       SecMap* sm;
4372 
4373       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4374          PROF_EVENT(201, "mc_LOADV64-slow1");
4375          return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4376       }
4377 
4378       sm       = get_secmap_for_reading_low(a);
4379       sm_off16 = SM_OFF_16(a);
4380       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4381 
4382       // Handle common case quickly: a is suitably aligned, is mapped, and
4383       // addressible.
4384       // Convert V bits from compact memory form to expanded register form.
4385       if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4386          return V_BITS64_DEFINED;
4387       } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4388          return V_BITS64_UNDEFINED;
4389       } else {
4390          /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4391          PROF_EVENT(202, "mc_LOADV64-slow2");
4392          return mc_LOADVn_slow( a, 64, isBigEndian );
4393       }
4394    }
4395 #endif
4396 }
4397 
MC_(helperc_LOADV64be)4398 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4399 {
4400    return mc_LOADV64(a, True);
4401 }
MC_(helperc_LOADV64le)4402 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4403 {
4404    return mc_LOADV64(a, False);
4405 }
4406 
4407 
4408 static INLINE
mc_STOREV64(Addr a,ULong vbits64,Bool isBigEndian)4409 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4410 {
4411    PROF_EVENT(210, "mc_STOREV64");
4412 
4413 #ifndef PERF_FAST_STOREV
4414    // XXX: this slow case seems to be marginally faster than the fast case!
4415    // Investigate further.
4416    mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4417 #else
4418    {
4419       UWord   sm_off16, vabits16;
4420       SecMap* sm;
4421 
4422       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4423          PROF_EVENT(211, "mc_STOREV64-slow1");
4424          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4425          return;
4426       }
4427 
4428       sm       = get_secmap_for_reading_low(a);
4429       sm_off16 = SM_OFF_16(a);
4430       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4431 
4432       // To understand the below cleverness, see the extensive comments
4433       // in MC_(helperc_STOREV8).
4434       if (LIKELY(V_BITS64_DEFINED == vbits64)) {
4435          if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
4436             return;
4437          }
4438          if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
4439             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
4440             return;
4441          }
4442          PROF_EVENT(232, "mc_STOREV64-slow2");
4443          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4444          return;
4445       }
4446       if (V_BITS64_UNDEFINED == vbits64) {
4447          if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
4448             return;
4449          }
4450          if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
4451             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
4452             return;
4453          }
4454          PROF_EVENT(232, "mc_STOREV64-slow3");
4455          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4456          return;
4457       }
4458 
4459       PROF_EVENT(212, "mc_STOREV64-slow4");
4460       mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4461    }
4462 #endif
4463 }
4464 
MC_(helperc_STOREV64be)4465 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
4466 {
4467    mc_STOREV64(a, vbits64, True);
4468 }
MC_(helperc_STOREV64le)4469 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
4470 {
4471    mc_STOREV64(a, vbits64, False);
4472 }
4473 
4474 
4475 /* ------------------------ Size = 4 ------------------------ */
4476 
4477 static INLINE
mc_LOADV32(Addr a,Bool isBigEndian)4478 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
4479 {
4480    PROF_EVENT(220, "mc_LOADV32");
4481 
4482 #ifndef PERF_FAST_LOADV
4483    return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4484 #else
4485    {
4486       UWord   sm_off, vabits8;
4487       SecMap* sm;
4488 
4489       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4490          PROF_EVENT(221, "mc_LOADV32-slow1");
4491          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4492       }
4493 
4494       sm      = get_secmap_for_reading_low(a);
4495       sm_off  = SM_OFF(a);
4496       vabits8 = sm->vabits8[sm_off];
4497 
4498       // Handle common case quickly: a is suitably aligned, is mapped, and the
4499       // entire word32 it lives in is addressible.
4500       // Convert V bits from compact memory form to expanded register form.
4501       // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
4502       // Almost certainly not necessary, but be paranoid.
4503       if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4504          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
4505       } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
4506          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
4507       } else {
4508          /* Slow case: the 4 bytes are not all-defined or all-undefined. */
4509          PROF_EVENT(222, "mc_LOADV32-slow2");
4510          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4511       }
4512    }
4513 #endif
4514 }
4515 
MC_(helperc_LOADV32be)4516 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
4517 {
4518    return mc_LOADV32(a, True);
4519 }
MC_(helperc_LOADV32le)4520 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
4521 {
4522    return mc_LOADV32(a, False);
4523 }
4524 
4525 
4526 static INLINE
mc_STOREV32(Addr a,UWord vbits32,Bool isBigEndian)4527 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
4528 {
4529    PROF_EVENT(230, "mc_STOREV32");
4530 
4531 #ifndef PERF_FAST_STOREV
4532    mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4533 #else
4534    {
4535       UWord   sm_off, vabits8;
4536       SecMap* sm;
4537 
4538       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4539          PROF_EVENT(231, "mc_STOREV32-slow1");
4540          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4541          return;
4542       }
4543 
4544       sm      = get_secmap_for_reading_low(a);
4545       sm_off  = SM_OFF(a);
4546       vabits8 = sm->vabits8[sm_off];
4547 
4548       // To understand the below cleverness, see the extensive comments
4549       // in MC_(helperc_STOREV8).
4550       if (LIKELY(V_BITS32_DEFINED == vbits32)) {
4551          if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
4552             return;
4553          }
4554          if (!is_distinguished_sm(sm)  && VA_BITS8_UNDEFINED == vabits8) {
4555             sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
4556             return;
4557          }
4558          PROF_EVENT(232, "mc_STOREV32-slow2");
4559          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4560          return;
4561       }
4562       if (V_BITS32_UNDEFINED == vbits32) {
4563          if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
4564             return;
4565          }
4566          if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
4567             sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
4568             return;
4569          }
4570          PROF_EVENT(233, "mc_STOREV32-slow3");
4571          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4572          return;
4573       }
4574 
4575       PROF_EVENT(234, "mc_STOREV32-slow4");
4576       mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4577    }
4578 #endif
4579 }
4580 
MC_(helperc_STOREV32be)4581 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
4582 {
4583    mc_STOREV32(a, vbits32, True);
4584 }
MC_(helperc_STOREV32le)4585 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
4586 {
4587    mc_STOREV32(a, vbits32, False);
4588 }
4589 
4590 
4591 /* ------------------------ Size = 2 ------------------------ */
4592 
4593 static INLINE
mc_LOADV16(Addr a,Bool isBigEndian)4594 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
4595 {
4596    PROF_EVENT(240, "mc_LOADV16");
4597 
4598 #ifndef PERF_FAST_LOADV
4599    return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4600 #else
4601    {
4602       UWord   sm_off, vabits8;
4603       SecMap* sm;
4604 
4605       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4606          PROF_EVENT(241, "mc_LOADV16-slow1");
4607          return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4608       }
4609 
4610       sm      = get_secmap_for_reading_low(a);
4611       sm_off  = SM_OFF(a);
4612       vabits8 = sm->vabits8[sm_off];
4613       // Handle common case quickly: a is suitably aligned, is mapped, and is
4614       // addressible.
4615       // Convert V bits from compact memory form to expanded register form
4616       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS16_DEFINED;   }
4617       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
4618       else {
4619          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4620          // the two sub-bytes.
4621          UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
4622          if      (vabits4 == VA_BITS4_DEFINED  ) { return V_BITS16_DEFINED;   }
4623          else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
4624          else {
4625             /* Slow case: the two bytes are not all-defined or all-undefined. */
4626             PROF_EVENT(242, "mc_LOADV16-slow2");
4627             return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4628          }
4629       }
4630    }
4631 #endif
4632 }
4633 
MC_(helperc_LOADV16be)4634 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
4635 {
4636    return mc_LOADV16(a, True);
4637 }
MC_(helperc_LOADV16le)4638 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
4639 {
4640    return mc_LOADV16(a, False);
4641 }
4642 
4643 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
4644 static INLINE
accessible_vabits4_in_vabits8(Addr a,UChar vabits8)4645 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
4646 {
4647    UInt shift;
4648    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
4649    shift = (a & 2) << 1;               // shift by 0 or 4
4650    vabits8 >>= shift;                  // shift the four bits to the bottom
4651     // check 2 x vabits2 != VA_BITS2_NOACCESS
4652    return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
4653       &&  ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
4654 }
4655 
4656 static INLINE
mc_STOREV16(Addr a,UWord vbits16,Bool isBigEndian)4657 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
4658 {
4659    PROF_EVENT(250, "mc_STOREV16");
4660 
4661 #ifndef PERF_FAST_STOREV
4662    mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4663 #else
4664    {
4665       UWord   sm_off, vabits8;
4666       SecMap* sm;
4667 
4668       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4669          PROF_EVENT(251, "mc_STOREV16-slow1");
4670          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4671          return;
4672       }
4673 
4674       sm      = get_secmap_for_reading_low(a);
4675       sm_off  = SM_OFF(a);
4676       vabits8 = sm->vabits8[sm_off];
4677 
4678       // To understand the below cleverness, see the extensive comments
4679       // in MC_(helperc_STOREV8).
4680       if (LIKELY(V_BITS16_DEFINED == vbits16)) {
4681          if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4682             return;
4683          }
4684          if (!is_distinguished_sm(sm)
4685              && accessible_vabits4_in_vabits8(a, vabits8)) {
4686             insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
4687                                          &(sm->vabits8[sm_off]) );
4688             return;
4689          }
4690          PROF_EVENT(232, "mc_STOREV16-slow2");
4691          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4692       }
4693       if (V_BITS16_UNDEFINED == vbits16) {
4694          if (vabits8 == VA_BITS8_UNDEFINED) {
4695             return;
4696          }
4697          if (!is_distinguished_sm(sm)
4698              && accessible_vabits4_in_vabits8(a, vabits8)) {
4699             insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
4700                                          &(sm->vabits8[sm_off]) );
4701             return;
4702          }
4703          PROF_EVENT(233, "mc_STOREV16-slow3");
4704          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4705          return;
4706       }
4707 
4708       PROF_EVENT(234, "mc_STOREV16-slow4");
4709       mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4710    }
4711 #endif
4712 }
4713 
MC_(helperc_STOREV16be)4714 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
4715 {
4716    mc_STOREV16(a, vbits16, True);
4717 }
MC_(helperc_STOREV16le)4718 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
4719 {
4720    mc_STOREV16(a, vbits16, False);
4721 }
4722 
4723 
4724 /* ------------------------ Size = 1 ------------------------ */
4725 /* Note: endianness is irrelevant for size == 1 */
4726 
4727 VG_REGPARM(1)
MC_(helperc_LOADV8)4728 UWord MC_(helperc_LOADV8) ( Addr a )
4729 {
4730    PROF_EVENT(260, "mc_LOADV8");
4731 
4732 #ifndef PERF_FAST_LOADV
4733    return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4734 #else
4735    {
4736       UWord   sm_off, vabits8;
4737       SecMap* sm;
4738 
4739       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4740          PROF_EVENT(261, "mc_LOADV8-slow1");
4741          return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4742       }
4743 
4744       sm      = get_secmap_for_reading_low(a);
4745       sm_off  = SM_OFF(a);
4746       vabits8 = sm->vabits8[sm_off];
4747       // Convert V bits from compact memory form to expanded register form
4748       // Handle common case quickly: a is mapped, and the entire
4749       // word32 it lives in is addressible.
4750       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS8_DEFINED;   }
4751       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
4752       else {
4753          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4754          // the single byte.
4755          UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
4756          if      (vabits2 == VA_BITS2_DEFINED  ) { return V_BITS8_DEFINED;   }
4757          else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
4758          else {
4759             /* Slow case: the byte is not all-defined or all-undefined. */
4760             PROF_EVENT(262, "mc_LOADV8-slow2");
4761             return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4762          }
4763       }
4764    }
4765 #endif
4766 }
4767 
4768 
4769 VG_REGPARM(2)
MC_(helperc_STOREV8)4770 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
4771 {
4772    PROF_EVENT(270, "mc_STOREV8");
4773 
4774 #ifndef PERF_FAST_STOREV
4775    mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4776 #else
4777    {
4778       UWord   sm_off, vabits8;
4779       SecMap* sm;
4780 
4781       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4782          PROF_EVENT(271, "mc_STOREV8-slow1");
4783          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4784          return;
4785       }
4786 
4787       sm      = get_secmap_for_reading_low(a);
4788       sm_off  = SM_OFF(a);
4789       vabits8 = sm->vabits8[sm_off];
4790 
4791       // Clevernesses to speed up storing V bits.
4792       // The 64/32/16 bit cases also have similar clevernesses, but it
4793       // works a little differently to the code below.
4794       //
4795       // Cleverness 1:  sometimes we don't have to write the shadow memory at
4796       // all, if we can tell that what we want to write is the same as what is
4797       // already there. These cases are marked below as "defined on defined" and
4798       // "undefined on undefined".
4799       //
4800       // Cleverness 2:
4801       // We also avoid to call mc_STOREVn_slow if the V bits can directly
4802       // be written in the secondary map. V bits can be directly written
4803       // if 4 conditions are respected:
4804       //   * The address for which V bits are written is naturally aligned
4805       //        on 1 byte  for STOREV8 (this is always true)
4806       //        on 2 bytes for STOREV16
4807       //        on 4 bytes for STOREV32
4808       //        on 8 bytes for STOREV64.
4809       //   * V bits being written are either fully defined or fully undefined.
4810       //     (for partially defined V bits, V bits cannot be directly written,
4811       //      as the secondary vbits table must be maintained).
4812       //   * the secmap is not distinguished (distinguished maps cannot be
4813       //     modified).
4814       //   * the memory corresponding to the V bits being written is
4815       //     accessible (if one or more bytes are not accessible,
4816       //     we must call mc_STOREVn_slow in order to report accessibility
4817       //     errors).
4818       //     Note that for STOREV32 and STOREV64, it is too expensive
4819       //     to verify the accessibility of each byte for the benefit it
4820       //     brings. Instead, a quicker check is done by comparing to
4821       //     VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
4822       //     but misses some opportunity of direct modifications.
4823       //     Checking each byte accessibility was measured for
4824       //     STOREV32+perf tests and was slowing down all perf tests.
4825       // The cases corresponding to cleverness 2 are marked below as
4826       // "direct mod".
4827       if (LIKELY(V_BITS8_DEFINED == vbits8)) {
4828          if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4829             return; // defined on defined
4830          }
4831          if (!is_distinguished_sm(sm)
4832              && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
4833             // direct mod
4834             insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
4835                                          &(sm->vabits8[sm_off]) );
4836             return;
4837          }
4838          PROF_EVENT(232, "mc_STOREV8-slow2");
4839          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4840          return;
4841       }
4842       if (V_BITS8_UNDEFINED == vbits8) {
4843          if (vabits8 == VA_BITS8_UNDEFINED) {
4844             return; // undefined on undefined
4845          }
4846          if (!is_distinguished_sm(sm)
4847              && (VA_BITS2_NOACCESS
4848                  != extract_vabits2_from_vabits8(a, vabits8))) {
4849             // direct mod
4850             insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
4851                                          &(sm->vabits8[sm_off]) );
4852             return;
4853          }
4854          PROF_EVENT(233, "mc_STOREV8-slow3");
4855          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4856          return;
4857       }
4858 
4859       // Partially defined word
4860       PROF_EVENT(234, "mc_STOREV8-slow4");
4861       mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4862    }
4863 #endif
4864 }
4865 
4866 
4867 /*------------------------------------------------------------*/
4868 /*--- Functions called directly from generated code:       ---*/
4869 /*--- Value-check failure handlers.                        ---*/
4870 /*------------------------------------------------------------*/
4871 
4872 /* Call these ones when an origin is available ... */
4873 VG_REGPARM(1)
MC_(helperc_value_check0_fail_w_o)4874 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
4875    MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
4876 }
4877 
4878 VG_REGPARM(1)
MC_(helperc_value_check1_fail_w_o)4879 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
4880    MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
4881 }
4882 
4883 VG_REGPARM(1)
MC_(helperc_value_check4_fail_w_o)4884 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
4885    MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
4886 }
4887 
4888 VG_REGPARM(1)
MC_(helperc_value_check8_fail_w_o)4889 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
4890    MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
4891 }
4892 
4893 VG_REGPARM(2)
MC_(helperc_value_checkN_fail_w_o)4894 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
4895    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
4896 }
4897 
4898 /* ... and these when an origin isn't available. */
4899 
4900 VG_REGPARM(0)
MC_(helperc_value_check0_fail_no_o)4901 void MC_(helperc_value_check0_fail_no_o) ( void ) {
4902    MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
4903 }
4904 
4905 VG_REGPARM(0)
MC_(helperc_value_check1_fail_no_o)4906 void MC_(helperc_value_check1_fail_no_o) ( void ) {
4907    MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
4908 }
4909 
4910 VG_REGPARM(0)
MC_(helperc_value_check4_fail_no_o)4911 void MC_(helperc_value_check4_fail_no_o) ( void ) {
4912    MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
4913 }
4914 
4915 VG_REGPARM(0)
MC_(helperc_value_check8_fail_no_o)4916 void MC_(helperc_value_check8_fail_no_o) ( void ) {
4917    MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
4918 }
4919 
4920 VG_REGPARM(1)
MC_(helperc_value_checkN_fail_no_o)4921 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
4922    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
4923 }
4924 
4925 
4926 /*------------------------------------------------------------*/
4927 /*--- Metadata get/set functions, for client requests.     ---*/
4928 /*------------------------------------------------------------*/
4929 
4930 // Nb: this expands the V+A bits out into register-form V bits, even though
4931 // they're in memory.  This is for backward compatibility, and because it's
4932 // probably what the user wants.
4933 
4934 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
4935    error [no longer used], 3 == addressing error. */
4936 /* Nb: We used to issue various definedness/addressability errors from here,
4937    but we took them out because they ranged from not-very-helpful to
4938    downright annoying, and they complicated the error data structures. */
mc_get_or_set_vbits_for_client(Addr a,Addr vbits,SizeT szB,Bool setting,Bool is_client_request)4939 static Int mc_get_or_set_vbits_for_client (
4940    Addr a,
4941    Addr vbits,
4942    SizeT szB,
4943    Bool setting, /* True <=> set vbits,  False <=> get vbits */
4944    Bool is_client_request /* True <=> real user request
4945                              False <=> internal call from gdbserver */
4946 )
4947 {
4948    SizeT i;
4949    Bool  ok;
4950    UChar vbits8;
4951 
4952    /* Check that arrays are addressible before doing any getting/setting.
4953       vbits to be checked only for real user request. */
4954    for (i = 0; i < szB; i++) {
4955       if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
4956           (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
4957          return 3;
4958       }
4959    }
4960 
4961    /* Do the copy */
4962    if (setting) {
4963       /* setting */
4964       for (i = 0; i < szB; i++) {
4965          ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
4966          tl_assert(ok);
4967       }
4968    } else {
4969       /* getting */
4970       for (i = 0; i < szB; i++) {
4971          ok = get_vbits8(a + i, &vbits8);
4972          tl_assert(ok);
4973          ((UChar*)vbits)[i] = vbits8;
4974       }
4975       if (is_client_request)
4976         // The bytes in vbits[] have now been set, so mark them as such.
4977         MC_(make_mem_defined)(vbits, szB);
4978    }
4979 
4980    return 1;
4981 }
4982 
4983 
4984 /*------------------------------------------------------------*/
4985 /*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
4986 /*------------------------------------------------------------*/
4987 
4988 /* For the memory leak detector, say whether an entire 64k chunk of
4989    address space is possibly in use, or not.  If in doubt return
4990    True.
4991 */
MC_(is_within_valid_secondary)4992 Bool MC_(is_within_valid_secondary) ( Addr a )
4993 {
4994    SecMap* sm = maybe_get_secmap_for ( a );
4995    if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
4996       /* Definitely not in use. */
4997       return False;
4998    } else {
4999       return True;
5000    }
5001 }
5002 
5003 
5004 /* For the memory leak detector, say whether or not a given word
5005    address is to be regarded as valid. */
MC_(is_valid_aligned_word)5006 Bool MC_(is_valid_aligned_word) ( Addr a )
5007 {
5008    tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
5009    tl_assert(VG_IS_WORD_ALIGNED(a));
5010    if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
5011       return False;
5012    if (sizeof(UWord) == 8) {
5013       if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
5014          return False;
5015    }
5016    if (UNLIKELY(MC_(in_ignored_range)(a)))
5017       return False;
5018    else
5019       return True;
5020 }
5021 
5022 
5023 /*------------------------------------------------------------*/
5024 /*--- Initialisation                                       ---*/
5025 /*------------------------------------------------------------*/
5026 
init_shadow_memory(void)5027 static void init_shadow_memory ( void )
5028 {
5029    Int     i;
5030    SecMap* sm;
5031 
5032    tl_assert(V_BIT_UNDEFINED   == 1);
5033    tl_assert(V_BIT_DEFINED     == 0);
5034    tl_assert(V_BITS8_UNDEFINED == 0xFF);
5035    tl_assert(V_BITS8_DEFINED   == 0);
5036 
5037    /* Build the 3 distinguished secondaries */
5038    sm = &sm_distinguished[SM_DIST_NOACCESS];
5039    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
5040 
5041    sm = &sm_distinguished[SM_DIST_UNDEFINED];
5042    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
5043 
5044    sm = &sm_distinguished[SM_DIST_DEFINED];
5045    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
5046 
5047    /* Set up the primary map. */
5048    /* These entries gradually get overwritten as the used address
5049       space expands. */
5050    for (i = 0; i < N_PRIMARY_MAP; i++)
5051       primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
5052 
5053    /* Auxiliary primary maps */
5054    init_auxmap_L1_L2();
5055 
5056    /* auxmap_size = auxmap_used = 0;
5057       no ... these are statically initialised */
5058 
5059    /* Secondary V bit table */
5060    secVBitTable = createSecVBitTable();
5061 }
5062 
5063 
5064 /*------------------------------------------------------------*/
5065 /*--- Sanity check machinery (permanently engaged)         ---*/
5066 /*------------------------------------------------------------*/
5067 
mc_cheap_sanity_check(void)5068 static Bool mc_cheap_sanity_check ( void )
5069 {
5070    n_sanity_cheap++;
5071    PROF_EVENT(490, "cheap_sanity_check");
5072    /* Check for sane operating level */
5073    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5074       return False;
5075    /* nothing else useful we can rapidly check */
5076    return True;
5077 }
5078 
mc_expensive_sanity_check(void)5079 static Bool mc_expensive_sanity_check ( void )
5080 {
5081    Int     i;
5082    Word    n_secmaps_found;
5083    SecMap* sm;
5084    const HChar*  errmsg;
5085    Bool    bad = False;
5086 
5087    if (0) VG_(printf)("expensive sanity check\n");
5088    if (0) return True;
5089 
5090    n_sanity_expensive++;
5091    PROF_EVENT(491, "expensive_sanity_check");
5092 
5093    /* Check for sane operating level */
5094    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5095       return False;
5096 
5097    /* Check that the 3 distinguished SMs are still as they should be. */
5098 
5099    /* Check noaccess DSM. */
5100    sm = &sm_distinguished[SM_DIST_NOACCESS];
5101    for (i = 0; i < SM_CHUNKS; i++)
5102       if (sm->vabits8[i] != VA_BITS8_NOACCESS)
5103          bad = True;
5104 
5105    /* Check undefined DSM. */
5106    sm = &sm_distinguished[SM_DIST_UNDEFINED];
5107    for (i = 0; i < SM_CHUNKS; i++)
5108       if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
5109          bad = True;
5110 
5111    /* Check defined DSM. */
5112    sm = &sm_distinguished[SM_DIST_DEFINED];
5113    for (i = 0; i < SM_CHUNKS; i++)
5114       if (sm->vabits8[i] != VA_BITS8_DEFINED)
5115          bad = True;
5116 
5117    if (bad) {
5118       VG_(printf)("memcheck expensive sanity: "
5119                   "distinguished_secondaries have changed\n");
5120       return False;
5121    }
5122 
5123    /* If we're not checking for undefined value errors, the secondary V bit
5124     * table should be empty. */
5125    if (MC_(clo_mc_level) == 1) {
5126       if (0 != VG_(OSetGen_Size)(secVBitTable))
5127          return False;
5128    }
5129 
5130    /* check the auxiliary maps, very thoroughly */
5131    n_secmaps_found = 0;
5132    errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
5133    if (errmsg) {
5134       VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
5135       return False;
5136    }
5137 
5138    /* n_secmaps_found is now the number referred to by the auxiliary
5139       primary map.  Now add on the ones referred to by the main
5140       primary map. */
5141    for (i = 0; i < N_PRIMARY_MAP; i++) {
5142       if (primary_map[i] == NULL) {
5143          bad = True;
5144       } else {
5145          if (!is_distinguished_sm(primary_map[i]))
5146             n_secmaps_found++;
5147       }
5148    }
5149 
5150    /* check that the number of secmaps issued matches the number that
5151       are reachable (iow, no secmap leaks) */
5152    if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
5153       bad = True;
5154 
5155    if (bad) {
5156       VG_(printf)("memcheck expensive sanity: "
5157                   "apparent secmap leakage\n");
5158       return False;
5159    }
5160 
5161    if (bad) {
5162       VG_(printf)("memcheck expensive sanity: "
5163                   "auxmap covers wrong address space\n");
5164       return False;
5165    }
5166 
5167    /* there is only one pointer to each secmap (expensive) */
5168 
5169    return True;
5170 }
5171 
5172 /*------------------------------------------------------------*/
5173 /*--- Command line args                                    ---*/
5174 /*------------------------------------------------------------*/
5175 
5176 
5177 Bool          MC_(clo_partial_loads_ok)       = False;
5178 Long          MC_(clo_freelist_vol)           = 20*1000*1000LL;
5179 Long          MC_(clo_freelist_big_blocks)    =  1*1000*1000LL;
5180 LeakCheckMode MC_(clo_leak_check)             = LC_Summary;
5181 VgRes         MC_(clo_leak_resolution)        = Vg_HighRes;
5182 UInt          MC_(clo_show_leak_kinds)        = R2S(Possible) | R2S(Unreached);
5183 UInt          MC_(clo_error_for_leak_kinds)   = R2S(Possible) | R2S(Unreached);
5184 UInt          MC_(clo_leak_check_heuristics)  = 0;
5185 Bool          MC_(clo_workaround_gcc296_bugs) = False;
5186 Int           MC_(clo_malloc_fill)            = -1;
5187 Int           MC_(clo_free_fill)              = -1;
5188 KeepStacktraces MC_(clo_keep_stacktraces)     = KS_alloc_then_free;
5189 Int           MC_(clo_mc_level)               = 2;
5190 
MC_(parse_leak_heuristics)5191 static Bool MC_(parse_leak_heuristics) ( const HChar *str0, UInt *lhs )
5192 {
5193    return  VG_(parse_enum_set) ("-,stdstring,newarray,multipleinheritance",
5194                                 str0, lhs);
5195 }
5196 
5197 
mc_process_cmd_line_options(const HChar * arg)5198 static Bool mc_process_cmd_line_options(const HChar* arg)
5199 {
5200    const HChar* tmp_str;
5201    Int   tmp_show;
5202 
5203    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
5204 
5205    /* Set MC_(clo_mc_level):
5206          1 = A bit tracking only
5207          2 = A and V bit tracking, but no V bit origins
5208          3 = A and V bit tracking, and V bit origins
5209 
5210       Do this by inspecting --undef-value-errors= and
5211       --track-origins=.  Reject the case --undef-value-errors=no
5212       --track-origins=yes as meaningless.
5213    */
5214    if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
5215       if (MC_(clo_mc_level) == 3) {
5216          goto bad_level;
5217       } else {
5218          MC_(clo_mc_level) = 1;
5219          return True;
5220       }
5221    }
5222    if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
5223       if (MC_(clo_mc_level) == 1)
5224          MC_(clo_mc_level) = 2;
5225       return True;
5226    }
5227    if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
5228       if (MC_(clo_mc_level) == 3)
5229          MC_(clo_mc_level) = 2;
5230       return True;
5231    }
5232    if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
5233       if (MC_(clo_mc_level) == 1) {
5234          goto bad_level;
5235       } else {
5236          MC_(clo_mc_level) = 3;
5237          return True;
5238       }
5239    }
5240 
5241         if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
5242    else if VG_STR_CLO(arg, "--errors-for-leak-kinds" , tmp_str) {
5243       if (!MC_(parse_leak_kinds)(tmp_str, &MC_(clo_error_for_leak_kinds)))
5244          return False;
5245    }
5246    else if VG_STR_CLO(arg, "--show-leak-kinds", tmp_str) {
5247       if (!MC_(parse_leak_kinds)(tmp_str, &MC_(clo_show_leak_kinds)))
5248          return False;
5249    }
5250    else if VG_STR_CLO(arg, "--leak-check-heuristics", tmp_str) {
5251       if (!MC_(parse_leak_heuristics)(tmp_str, &MC_(clo_leak_check_heuristics)))
5252          return False;
5253    }
5254    else if (VG_BOOL_CLO(arg, "--show-reachable", tmp_show)) {
5255       if (tmp_show) {
5256          MC_(clo_show_leak_kinds) = RallS;
5257       } else {
5258          MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
5259       }
5260    }
5261    else if VG_BOOL_CLO(arg, "--show-possibly-lost", tmp_show) {
5262       if (tmp_show) {
5263          MC_(clo_show_leak_kinds) |= R2S(Possible);
5264       } else {
5265          MC_(clo_show_leak_kinds) &= ~R2S(Possible);
5266       }
5267    }
5268    else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
5269                                             MC_(clo_workaround_gcc296_bugs)) {}
5270 
5271    else if VG_BINT_CLO(arg, "--freelist-vol",  MC_(clo_freelist_vol),
5272                                                0, 10*1000*1000*1000LL) {}
5273 
5274    else if VG_BINT_CLO(arg, "--freelist-big-blocks",
5275                        MC_(clo_freelist_big_blocks),
5276                        0, 10*1000*1000*1000LL) {}
5277 
5278    else if VG_XACT_CLO(arg, "--leak-check=no",
5279                             MC_(clo_leak_check), LC_Off) {}
5280    else if VG_XACT_CLO(arg, "--leak-check=summary",
5281                             MC_(clo_leak_check), LC_Summary) {}
5282    else if VG_XACT_CLO(arg, "--leak-check=yes",
5283                             MC_(clo_leak_check), LC_Full) {}
5284    else if VG_XACT_CLO(arg, "--leak-check=full",
5285                             MC_(clo_leak_check), LC_Full) {}
5286 
5287    else if VG_XACT_CLO(arg, "--leak-resolution=low",
5288                             MC_(clo_leak_resolution), Vg_LowRes) {}
5289    else if VG_XACT_CLO(arg, "--leak-resolution=med",
5290                             MC_(clo_leak_resolution), Vg_MedRes) {}
5291    else if VG_XACT_CLO(arg, "--leak-resolution=high",
5292                             MC_(clo_leak_resolution), Vg_HighRes) {}
5293 
5294    else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
5295       Bool ok = parse_ignore_ranges(tmp_str);
5296       if (!ok) {
5297          VG_(message)(Vg_DebugMsg,
5298             "ERROR: --ignore-ranges: "
5299             "invalid syntax, or end <= start in range\n");
5300          return False;
5301       }
5302       if (gIgnoredAddressRanges) {
5303          Word i;
5304          for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
5305             UWord val     = IAR_INVALID;
5306             UWord key_min = ~(UWord)0;
5307             UWord key_max = (UWord)0;
5308             VG_(indexRangeMap)( &key_min, &key_max, &val,
5309                                 gIgnoredAddressRanges, i );
5310             tl_assert(key_min <= key_max);
5311             UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
5312             if (key_max - key_min > limit) {
5313                VG_(message)(Vg_DebugMsg,
5314                   "ERROR: --ignore-ranges: suspiciously large range:\n");
5315                VG_(message)(Vg_DebugMsg,
5316                    "       0x%lx-0x%lx (size %ld)\n", key_min, key_max,
5317                    key_max - key_min + 1);
5318                return False;
5319             }
5320          }
5321       }
5322    }
5323 
5324    else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
5325    else if VG_BHEX_CLO(arg, "--free-fill",   MC_(clo_free_fill),   0x00,0xFF) {}
5326 
5327    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
5328                        MC_(clo_keep_stacktraces), KS_alloc) {}
5329    else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
5330                        MC_(clo_keep_stacktraces), KS_free) {}
5331    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
5332                        MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
5333    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
5334                        MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
5335    else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
5336                        MC_(clo_keep_stacktraces), KS_none) {}
5337 
5338    else
5339       return VG_(replacement_malloc_process_cmd_line_option)(arg);
5340 
5341    return True;
5342 
5343 
5344   bad_level:
5345    VG_(fmsg_bad_option)(arg,
5346       "--track-origins=yes has no effect when --undef-value-errors=no.\n");
5347 }
5348 
mc_print_usage(void)5349 static void mc_print_usage(void)
5350 {
5351    VG_(printf)(
5352 "    --leak-check=no|summary|full     search for memory leaks at exit?  [summary]\n"
5353 "    --leak-resolution=low|med|high   differentiation of leak stack traces [high]\n"
5354 "    --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
5355 "                                            [definite,possible]\n"
5356 "    --errors-for-leak-kinds=kind1,kind2,..  which leak kinds are errors?\n"
5357 "                                            [definite,possible]\n"
5358 "        where kind is one of definite indirect possible reachable all none\n"
5359 "    --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
5360 "        improving leak search false positive [none]\n"
5361 "        where heur is one of stdstring newarray multipleinheritance all none\n"
5362 "    --show-reachable=yes             same as --show-leak-kinds=all\n"
5363 "    --show-reachable=no --show-possibly-lost=yes\n"
5364 "                                     same as --show-leak-kinds=definite,possible\n"
5365 "    --show-reachable=no --show-possibly-lost=no\n"
5366 "                                     same as --show-leak-kinds=definite\n"
5367 "    --undef-value-errors=no|yes      check for undefined value errors [yes]\n"
5368 "    --track-origins=no|yes           show origins of undefined values? [no]\n"
5369 "    --partial-loads-ok=no|yes        too hard to explain here; see manual [no]\n"
5370 "    --freelist-vol=<number>          volume of freed blocks queue     [20000000]\n"
5371 "    --freelist-big-blocks=<number>   releases first blocks with size>= [1000000]\n"
5372 "    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
5373 "    --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS]   assume given addresses are OK\n"
5374 "    --malloc-fill=<hexnumber>        fill malloc'd areas with given value\n"
5375 "    --free-fill=<hexnumber>          fill free'd areas with given value\n"
5376 "    --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
5377 "        stack trace(s) to keep for malloc'd/free'd areas       [alloc-then-free]\n"
5378    );
5379 }
5380 
mc_print_debug_usage(void)5381 static void mc_print_debug_usage(void)
5382 {
5383    VG_(printf)(
5384 "    (none)\n"
5385    );
5386 }
5387 
5388 
5389 /*------------------------------------------------------------*/
5390 /*--- Client blocks                                        ---*/
5391 /*------------------------------------------------------------*/
5392 
5393 /* Client block management:
5394 
5395    This is managed as an expanding array of client block descriptors.
5396    Indices of live descriptors are issued to the client, so it can ask
5397    to free them later.  Therefore we cannot slide live entries down
5398    over dead ones.  Instead we must use free/inuse flags and scan for
5399    an empty slot at allocation time.  This in turn means allocation is
5400    relatively expensive, so we hope this does not happen too often.
5401 
5402    An unused block has start == size == 0
5403 */
5404 
5405 /* type CGenBlock is defined in mc_include.h */
5406 
5407 /* This subsystem is self-initialising. */
5408 static UWord      cgb_size = 0;
5409 static UWord      cgb_used = 0;
5410 static CGenBlock* cgbs     = NULL;
5411 
5412 /* Stats for this subsystem. */
5413 static ULong cgb_used_MAX = 0;   /* Max in use. */
5414 static ULong cgb_allocs   = 0;   /* Number of allocs. */
5415 static ULong cgb_discards = 0;   /* Number of discards. */
5416 static ULong cgb_search   = 0;   /* Number of searches. */
5417 
5418 
5419 /* Get access to the client block array. */
MC_(get_ClientBlock_array)5420 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
5421                                  /*OUT*/UWord* nBlocks )
5422 {
5423    *blocks  = cgbs;
5424    *nBlocks = cgb_used;
5425 }
5426 
5427 
5428 static
alloc_client_block(void)5429 Int alloc_client_block ( void )
5430 {
5431    UWord      i, sz_new;
5432    CGenBlock* cgbs_new;
5433 
5434    cgb_allocs++;
5435 
5436    for (i = 0; i < cgb_used; i++) {
5437       cgb_search++;
5438       if (cgbs[i].start == 0 && cgbs[i].size == 0)
5439          return i;
5440    }
5441 
5442    /* Not found.  Try to allocate one at the end. */
5443    if (cgb_used < cgb_size) {
5444       cgb_used++;
5445       return cgb_used-1;
5446    }
5447 
5448    /* Ok, we have to allocate a new one. */
5449    tl_assert(cgb_used == cgb_size);
5450    sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
5451 
5452    cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
5453    for (i = 0; i < cgb_used; i++)
5454       cgbs_new[i] = cgbs[i];
5455 
5456    if (cgbs != NULL)
5457       VG_(free)( cgbs );
5458    cgbs = cgbs_new;
5459 
5460    cgb_size = sz_new;
5461    cgb_used++;
5462    if (cgb_used > cgb_used_MAX)
5463       cgb_used_MAX = cgb_used;
5464    return cgb_used-1;
5465 }
5466 
5467 
show_client_block_stats(void)5468 static void show_client_block_stats ( void )
5469 {
5470    VG_(message)(Vg_DebugMsg,
5471       "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
5472       cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
5473    );
5474 }
print_monitor_help(void)5475 static void print_monitor_help ( void )
5476 {
5477    VG_(gdb_printf)
5478       (
5479 "\n"
5480 "memcheck monitor commands:\n"
5481 "  get_vbits <addr> [<len>]\n"
5482 "        returns validity bits for <len> (or 1) bytes at <addr>\n"
5483 "            bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
5484 "        Example: get_vbits 0x8049c78 10\n"
5485 "  make_memory [noaccess|undefined\n"
5486 "                     |defined|Definedifaddressable] <addr> [<len>]\n"
5487 "        mark <len> (or 1) bytes at <addr> with the given accessibility\n"
5488 "  check_memory [addressable|defined] <addr> [<len>]\n"
5489 "        check that <len> (or 1) bytes at <addr> have the given accessibility\n"
5490 "            and outputs a description of <addr>\n"
5491 "  leak_check [full*|summary]\n"
5492 "                [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
5493 "                [heuristics heur1,heur2,...]\n"
5494 "                [increased*|changed|any]\n"
5495 "                [unlimited*|limited <max_loss_records_output>]\n"
5496 "            * = defaults\n"
5497 "       where kind is one of definite indirect possible reachable all none\n"
5498 "       where heur is one of stdstring newarray multipleinheritance all none*\n"
5499 "        Examples: leak_check\n"
5500 "                  leak_check summary any\n"
5501 "                  leak_check full kinds indirect,possible\n"
5502 "                  leak_check full reachable any limited 100\n"
5503 "  block_list <loss_record_nr>\n"
5504 "        after a leak search, shows the list of blocks of <loss_record_nr>\n"
5505 "  who_points_at <addr> [<len>]\n"
5506 "        shows places pointing inside <len> (default 1) bytes at <addr>\n"
5507 "        (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
5508 "         with len > 1, will also show \"interior pointers\")\n"
5509 "\n");
5510 }
5511 
5512 /* return True if request recognised, False otherwise */
handle_gdb_monitor_command(ThreadId tid,HChar * req)5513 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
5514 {
5515    HChar* wcmd;
5516    HChar s[VG_(strlen(req)) + 1]; /* copy for strtok_r */
5517    HChar *ssaveptr;
5518 
5519    VG_(strcpy) (s, req);
5520 
5521    wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
5522    /* NB: if possible, avoid introducing a new command below which
5523       starts with the same first letter(s) as an already existing
5524       command. This ensures a shorter abbreviation for the user. */
5525    switch (VG_(keyword_id)
5526            ("help get_vbits leak_check make_memory check_memory "
5527             "block_list who_points_at",
5528             wcmd, kwd_report_duplicated_matches)) {
5529    case -2: /* multiple matches */
5530       return True;
5531    case -1: /* not found */
5532       return False;
5533    case  0: /* help */
5534       print_monitor_help();
5535       return True;
5536    case  1: { /* get_vbits */
5537       Addr address;
5538       SizeT szB = 1;
5539       if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
5540          UChar vbits;
5541          Int i;
5542          Int unaddressable = 0;
5543          for (i = 0; i < szB; i++) {
5544             Int res = mc_get_or_set_vbits_for_client
5545                (address+i, (Addr) &vbits, 1,
5546                 False, /* get them */
5547                 False  /* is client request */ );
5548             /* we are before the first character on next line, print a \n. */
5549             if ((i % 32) == 0 && i != 0)
5550                VG_(printf) ("\n");
5551             /* we are before the next block of 4 starts, print a space. */
5552             else if ((i % 4) == 0 && i != 0)
5553                VG_(printf) (" ");
5554             if (res == 1) {
5555                VG_(printf) ("%02x", vbits);
5556             } else {
5557                tl_assert(3 == res);
5558                unaddressable++;
5559                VG_(printf) ("__");
5560             }
5561          }
5562          VG_(printf) ("\n");
5563          if (unaddressable) {
5564             VG_(printf)
5565                ("Address %p len %ld has %d bytes unaddressable\n",
5566                 (void *)address, szB, unaddressable);
5567          }
5568       }
5569       return True;
5570    }
5571    case  2: { /* leak_check */
5572       Int err = 0;
5573       LeakCheckParams lcp;
5574       HChar* kw;
5575 
5576       lcp.mode               = LC_Full;
5577       lcp.show_leak_kinds    = R2S(Possible) | R2S(Unreached);
5578       lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
5579       lcp.heuristics         = 0;
5580       lcp.deltamode          = LCD_Increased;
5581       lcp.max_loss_records_output = 999999999;
5582       lcp.requested_by_monitor_command = True;
5583 
5584       for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
5585            kw != NULL;
5586            kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
5587          switch (VG_(keyword_id)
5588                  ("full summary "
5589                   "kinds reachable possibleleak definiteleak "
5590                   "heuristics "
5591                   "increased changed any "
5592                   "unlimited limited ",
5593                   kw, kwd_report_all)) {
5594          case -2: err++; break;
5595          case -1: err++; break;
5596          case  0: /* full */
5597             lcp.mode = LC_Full; break;
5598          case  1: /* summary */
5599             lcp.mode = LC_Summary; break;
5600          case  2: { /* kinds */
5601             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
5602             if (wcmd == NULL || !MC_(parse_leak_kinds)(wcmd,
5603                                                        &lcp.show_leak_kinds)) {
5604                VG_(gdb_printf) ("missing or malformed leak kinds set\n");
5605                err++;
5606             }
5607             break;
5608          }
5609          case  3: /* reachable */
5610             lcp.show_leak_kinds = RallS;
5611             break;
5612          case  4: /* possibleleak */
5613             lcp.show_leak_kinds
5614                = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
5615             break;
5616          case  5: /* definiteleak */
5617             lcp.show_leak_kinds = R2S(Unreached);
5618             break;
5619          case  6: { /* heuristics */
5620             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
5621             if (wcmd == NULL || !MC_(parse_leak_heuristics)(wcmd,
5622                                                             &lcp.heuristics)) {
5623                VG_(gdb_printf) ("missing or malformed heuristics set\n");
5624                err++;
5625             }
5626             break;
5627          }
5628          case  7: /* increased */
5629             lcp.deltamode = LCD_Increased; break;
5630          case  8: /* changed */
5631             lcp.deltamode = LCD_Changed; break;
5632          case  9: /* any */
5633             lcp.deltamode = LCD_Any; break;
5634          case 10: /* unlimited */
5635             lcp.max_loss_records_output = 999999999; break;
5636          case 11: { /* limited */
5637             Int int_value;
5638             const HChar* endptr;
5639 
5640             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
5641             if (wcmd == NULL) {
5642                int_value = 0;
5643                endptr = "empty"; /* to report an error below */
5644             } else {
5645                HChar *the_end;
5646                int_value = VG_(strtoll10) (wcmd, &the_end);
5647                endptr = the_end;
5648             }
5649             if (*endptr != '\0')
5650                VG_(gdb_printf) ("missing or malformed integer value\n");
5651             else if (int_value > 0)
5652                lcp.max_loss_records_output = (UInt) int_value;
5653             else
5654                VG_(gdb_printf) ("max_loss_records_output must be >= 1, got %d\n",
5655                                 int_value);
5656             break;
5657          }
5658          default:
5659             tl_assert (0);
5660          }
5661       }
5662       if (!err)
5663          MC_(detect_memory_leaks)(tid, &lcp);
5664       return True;
5665    }
5666 
5667    case  3: { /* make_memory */
5668       Addr address;
5669       SizeT szB = 1;
5670       Int kwdid = VG_(keyword_id)
5671          ("noaccess undefined defined Definedifaddressable",
5672           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5673       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
5674          return True;
5675       switch (kwdid) {
5676       case -2: break;
5677       case -1: break;
5678       case  0: MC_(make_mem_noaccess) (address, szB); break;
5679       case  1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
5680                                                     MC_OKIND_USER ); break;
5681       case  2: MC_(make_mem_defined) ( address, szB ); break;
5682       case  3: make_mem_defined_if_addressable ( address, szB ); break;;
5683       default: tl_assert(0);
5684       }
5685       return True;
5686    }
5687 
5688    case  4: { /* check_memory */
5689       Addr address;
5690       SizeT szB = 1;
5691       Addr bad_addr;
5692       UInt okind;
5693       const HChar* src;
5694       UInt otag;
5695       UInt ecu;
5696       ExeContext* origin_ec;
5697       MC_ReadResult res;
5698 
5699       Int kwdid = VG_(keyword_id)
5700          ("addressable defined",
5701           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5702       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
5703          return True;
5704       switch (kwdid) {
5705       case -2: break;
5706       case -1: break;
5707       case  0: /* addressable */
5708          if (is_mem_addressable ( address, szB, &bad_addr ))
5709             VG_(printf) ("Address %p len %ld addressable\n",
5710                              (void *)address, szB);
5711          else
5712             VG_(printf)
5713                ("Address %p len %ld not addressable:\nbad address %p\n",
5714                 (void *)address, szB, (void *) bad_addr);
5715          MC_(pp_describe_addr) (address);
5716          break;
5717       case  1: /* defined */
5718          res = is_mem_defined ( address, szB, &bad_addr, &otag );
5719          if (MC_AddrErr == res)
5720             VG_(printf)
5721                ("Address %p len %ld not addressable:\nbad address %p\n",
5722                 (void *)address, szB, (void *) bad_addr);
5723          else if (MC_ValueErr == res) {
5724             okind = otag & 3;
5725             switch (okind) {
5726             case MC_OKIND_STACK:
5727                src = " was created by a stack allocation"; break;
5728             case MC_OKIND_HEAP:
5729                src = " was created by a heap allocation"; break;
5730             case MC_OKIND_USER:
5731                src = " was created by a client request"; break;
5732             case MC_OKIND_UNKNOWN:
5733                src = ""; break;
5734             default: tl_assert(0);
5735             }
5736             VG_(printf)
5737                ("Address %p len %ld not defined:\n"
5738                 "Uninitialised value at %p%s\n",
5739                 (void *)address, szB, (void *) bad_addr, src);
5740             ecu = otag & ~3;
5741             if (VG_(is_plausible_ECU)(ecu)) {
5742                origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
5743                VG_(pp_ExeContext)( origin_ec );
5744             }
5745          }
5746          else
5747             VG_(printf) ("Address %p len %ld defined\n",
5748                          (void *)address, szB);
5749          MC_(pp_describe_addr) (address);
5750          break;
5751       default: tl_assert(0);
5752       }
5753       return True;
5754    }
5755 
5756    case  5: { /* block_list */
5757       HChar* wl;
5758       HChar *endptr;
5759       UInt lr_nr = 0;
5760       wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
5761       if (wl != NULL)
5762          lr_nr = VG_(strtoull10) (wl, &endptr);
5763       if (wl == NULL || *endptr != '\0') {
5764          VG_(gdb_printf) ("malformed or missing integer\n");
5765       } else {
5766          // lr_nr-1 as what is shown to the user is 1 more than the index in lr_array.
5767          if (lr_nr == 0 || ! MC_(print_block_list) (lr_nr-1))
5768             VG_(gdb_printf) ("invalid loss record nr\n");
5769       }
5770       return True;
5771    }
5772 
5773    case  6: { /* who_points_at */
5774       Addr address;
5775       SizeT szB = 1;
5776 
5777       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
5778          return True;
5779       if (address == (Addr) 0) {
5780          VG_(gdb_printf) ("Cannot search who points at 0x0\n");
5781          return True;
5782       }
5783       MC_(who_points_at) (address, szB);
5784       return True;
5785    }
5786 
5787    default:
5788       tl_assert(0);
5789       return False;
5790    }
5791 }
5792 
5793 /*------------------------------------------------------------*/
5794 /*--- Client requests                                      ---*/
5795 /*------------------------------------------------------------*/
5796 
mc_handle_client_request(ThreadId tid,UWord * arg,UWord * ret)5797 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
5798 {
5799    Int   i;
5800    Addr  bad_addr;
5801 
5802    if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
5803        && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
5804        && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
5805        && VG_USERREQ__FREELIKE_BLOCK   != arg[0]
5806        && VG_USERREQ__CREATE_MEMPOOL   != arg[0]
5807        && VG_USERREQ__DESTROY_MEMPOOL  != arg[0]
5808        && VG_USERREQ__MEMPOOL_ALLOC    != arg[0]
5809        && VG_USERREQ__MEMPOOL_FREE     != arg[0]
5810        && VG_USERREQ__MEMPOOL_TRIM     != arg[0]
5811        && VG_USERREQ__MOVE_MEMPOOL     != arg[0]
5812        && VG_USERREQ__MEMPOOL_CHANGE   != arg[0]
5813        && VG_USERREQ__MEMPOOL_EXISTS   != arg[0]
5814        && VG_USERREQ__GDB_MONITOR_COMMAND   != arg[0]
5815        && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
5816        && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
5817       return False;
5818 
5819    switch (arg[0]) {
5820       case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
5821          Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
5822          if (!ok)
5823             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
5824          *ret = ok ? (UWord)NULL : bad_addr;
5825          break;
5826       }
5827 
5828       case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
5829          Bool errorV    = False;
5830          Addr bad_addrV = 0;
5831          UInt otagV     = 0;
5832          Bool errorA    = False;
5833          Addr bad_addrA = 0;
5834          is_mem_defined_comprehensive(
5835             arg[1], arg[2],
5836             &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
5837          );
5838          if (errorV) {
5839             MC_(record_user_error) ( tid, bad_addrV,
5840                                      /*isAddrErr*/False, otagV );
5841          }
5842          if (errorA) {
5843             MC_(record_user_error) ( tid, bad_addrA,
5844                                      /*isAddrErr*/True, 0 );
5845          }
5846          /* Return the lower of the two erring addresses, if any. */
5847          *ret = 0;
5848          if (errorV && !errorA) {
5849             *ret = bad_addrV;
5850          }
5851          if (!errorV && errorA) {
5852             *ret = bad_addrA;
5853          }
5854          if (errorV && errorA) {
5855             *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
5856          }
5857          break;
5858       }
5859 
5860       case VG_USERREQ__DO_LEAK_CHECK: {
5861          LeakCheckParams lcp;
5862 
5863          if (arg[1] == 0)
5864             lcp.mode = LC_Full;
5865          else if (arg[1] == 1)
5866             lcp.mode = LC_Summary;
5867          else {
5868             VG_(message)(Vg_UserMsg,
5869                          "Warning: unknown memcheck leak search mode\n");
5870             lcp.mode = LC_Full;
5871          }
5872 
5873          lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
5874          lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
5875          lcp.heuristics = MC_(clo_leak_check_heuristics);
5876 
5877          if (arg[2] == 0)
5878             lcp.deltamode = LCD_Any;
5879          else if (arg[2] == 1)
5880             lcp.deltamode = LCD_Increased;
5881          else if (arg[2] == 2)
5882             lcp.deltamode = LCD_Changed;
5883          else {
5884             VG_(message)
5885                (Vg_UserMsg,
5886                 "Warning: unknown memcheck leak search deltamode\n");
5887             lcp.deltamode = LCD_Any;
5888          }
5889          lcp.max_loss_records_output = 999999999;
5890          lcp.requested_by_monitor_command = False;
5891 
5892          MC_(detect_memory_leaks)(tid, &lcp);
5893          *ret = 0; /* return value is meaningless */
5894          break;
5895       }
5896 
5897       case VG_USERREQ__MAKE_MEM_NOACCESS:
5898          MC_(make_mem_noaccess) ( arg[1], arg[2] );
5899          *ret = -1;
5900          break;
5901 
5902       case VG_USERREQ__MAKE_MEM_UNDEFINED:
5903          make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
5904                                               MC_OKIND_USER );
5905          *ret = -1;
5906          break;
5907 
5908       case VG_USERREQ__MAKE_MEM_DEFINED:
5909          MC_(make_mem_defined) ( arg[1], arg[2] );
5910          *ret = -1;
5911          break;
5912 
5913       case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
5914          make_mem_defined_if_addressable ( arg[1], arg[2] );
5915          *ret = -1;
5916          break;
5917 
5918       case VG_USERREQ__CREATE_BLOCK: /* describe a block */
5919          if (arg[1] != 0 && arg[2] != 0) {
5920             i = alloc_client_block();
5921             /* VG_(printf)("allocated %d %p\n", i, cgbs); */
5922             cgbs[i].start = arg[1];
5923             cgbs[i].size  = arg[2];
5924             cgbs[i].desc  = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
5925             cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
5926             *ret = i;
5927          } else
5928             *ret = -1;
5929          break;
5930 
5931       case VG_USERREQ__DISCARD: /* discard */
5932          if (cgbs == NULL
5933              || arg[2] >= cgb_used ||
5934              (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
5935             *ret = 1;
5936          } else {
5937             tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
5938             cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
5939             VG_(free)(cgbs[arg[2]].desc);
5940             cgb_discards++;
5941             *ret = 0;
5942          }
5943          break;
5944 
5945       case VG_USERREQ__GET_VBITS:
5946          *ret = mc_get_or_set_vbits_for_client
5947                    ( arg[1], arg[2], arg[3],
5948                      False /* get them */,
5949                      True /* is client request */ );
5950          break;
5951 
5952       case VG_USERREQ__SET_VBITS:
5953          *ret = mc_get_or_set_vbits_for_client
5954                    ( arg[1], arg[2], arg[3],
5955                      True /* set them */,
5956                      True /* is client request */ );
5957          break;
5958 
5959       case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
5960          UWord** argp = (UWord**)arg;
5961          // MC_(bytes_leaked) et al were set by the last leak check (or zero
5962          // if no prior leak checks performed).
5963          *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
5964          *argp[2] = MC_(bytes_dubious);
5965          *argp[3] = MC_(bytes_reachable);
5966          *argp[4] = MC_(bytes_suppressed);
5967          // there is no argp[5]
5968          //*argp[5] = MC_(bytes_indirect);
5969          // XXX need to make *argp[1-4] defined;  currently done in the
5970          // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
5971          *ret = 0;
5972          return True;
5973       }
5974       case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
5975          UWord** argp = (UWord**)arg;
5976          // MC_(blocks_leaked) et al were set by the last leak check (or zero
5977          // if no prior leak checks performed).
5978          *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
5979          *argp[2] = MC_(blocks_dubious);
5980          *argp[3] = MC_(blocks_reachable);
5981          *argp[4] = MC_(blocks_suppressed);
5982          // there is no argp[5]
5983          //*argp[5] = MC_(blocks_indirect);
5984          // XXX need to make *argp[1-4] defined;  currently done in the
5985          // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
5986          *ret = 0;
5987          return True;
5988       }
5989       case VG_USERREQ__MALLOCLIKE_BLOCK: {
5990          Addr p         = (Addr)arg[1];
5991          SizeT sizeB    =       arg[2];
5992          UInt rzB       =       arg[3];
5993          Bool is_zeroed = (Bool)arg[4];
5994 
5995          MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
5996                           MC_AllocCustom, MC_(malloc_list) );
5997          if (rzB > 0) {
5998             MC_(make_mem_noaccess) ( p - rzB, rzB);
5999             MC_(make_mem_noaccess) ( p + sizeB, rzB);
6000          }
6001          return True;
6002       }
6003       case VG_USERREQ__RESIZEINPLACE_BLOCK: {
6004          Addr p         = (Addr)arg[1];
6005          SizeT oldSizeB =       arg[2];
6006          SizeT newSizeB =       arg[3];
6007          UInt rzB       =       arg[4];
6008 
6009          MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
6010          return True;
6011       }
6012       case VG_USERREQ__FREELIKE_BLOCK: {
6013          Addr p         = (Addr)arg[1];
6014          UInt rzB       =       arg[2];
6015 
6016          MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
6017          return True;
6018       }
6019 
6020       case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
6021          HChar* s  = (HChar*)arg[1];
6022          Addr  dst = (Addr) arg[2];
6023          Addr  src = (Addr) arg[3];
6024          SizeT len = (SizeT)arg[4];
6025          MC_(record_overlap_error)(tid, s, src, dst, len);
6026          return True;
6027       }
6028 
6029       case VG_USERREQ__CREATE_MEMPOOL: {
6030          Addr pool      = (Addr)arg[1];
6031          UInt rzB       =       arg[2];
6032          Bool is_zeroed = (Bool)arg[3];
6033 
6034          MC_(create_mempool) ( pool, rzB, is_zeroed );
6035          return True;
6036       }
6037 
6038       case VG_USERREQ__DESTROY_MEMPOOL: {
6039          Addr pool      = (Addr)arg[1];
6040 
6041          MC_(destroy_mempool) ( pool );
6042          return True;
6043       }
6044 
6045       case VG_USERREQ__MEMPOOL_ALLOC: {
6046          Addr pool      = (Addr)arg[1];
6047          Addr addr      = (Addr)arg[2];
6048          UInt size      =       arg[3];
6049 
6050          MC_(mempool_alloc) ( tid, pool, addr, size );
6051          return True;
6052       }
6053 
6054       case VG_USERREQ__MEMPOOL_FREE: {
6055          Addr pool      = (Addr)arg[1];
6056          Addr addr      = (Addr)arg[2];
6057 
6058          MC_(mempool_free) ( pool, addr );
6059          return True;
6060       }
6061 
6062       case VG_USERREQ__MEMPOOL_TRIM: {
6063          Addr pool      = (Addr)arg[1];
6064          Addr addr      = (Addr)arg[2];
6065          UInt size      =       arg[3];
6066 
6067          MC_(mempool_trim) ( pool, addr, size );
6068          return True;
6069       }
6070 
6071       case VG_USERREQ__MOVE_MEMPOOL: {
6072          Addr poolA     = (Addr)arg[1];
6073          Addr poolB     = (Addr)arg[2];
6074 
6075          MC_(move_mempool) ( poolA, poolB );
6076          return True;
6077       }
6078 
6079       case VG_USERREQ__MEMPOOL_CHANGE: {
6080          Addr pool      = (Addr)arg[1];
6081          Addr addrA     = (Addr)arg[2];
6082          Addr addrB     = (Addr)arg[3];
6083          UInt size      =       arg[4];
6084 
6085          MC_(mempool_change) ( pool, addrA, addrB, size );
6086          return True;
6087       }
6088 
6089       case VG_USERREQ__MEMPOOL_EXISTS: {
6090          Addr pool      = (Addr)arg[1];
6091 
6092          *ret = (UWord) MC_(mempool_exists) ( pool );
6093 	 return True;
6094       }
6095 
6096       case VG_USERREQ__GDB_MONITOR_COMMAND: {
6097          Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
6098          if (handled)
6099             *ret = 1;
6100          else
6101             *ret = 0;
6102          return handled;
6103       }
6104 
6105       case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
6106       case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
6107          Bool addRange
6108             = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
6109          Bool ok
6110             = modify_ignore_ranges(addRange, arg[1], arg[2]);
6111          *ret = ok ? 1 : 0;
6112          return True;
6113       }
6114 
6115       default:
6116          VG_(message)(
6117             Vg_UserMsg,
6118             "Warning: unknown memcheck client request code %llx\n",
6119             (ULong)arg[0]
6120          );
6121          return False;
6122    }
6123    return True;
6124 }
6125 
6126 
6127 /*------------------------------------------------------------*/
6128 /*--- Crude profiling machinery.                           ---*/
6129 /*------------------------------------------------------------*/
6130 
6131 // We track a number of interesting events (using PROF_EVENT)
6132 // if MC_PROFILE_MEMORY is defined.
6133 
6134 #ifdef MC_PROFILE_MEMORY
6135 
6136 UInt   MC_(event_ctr)[N_PROF_EVENTS];
6137 HChar* MC_(event_ctr_name)[N_PROF_EVENTS];
6138 
init_prof_mem(void)6139 static void init_prof_mem ( void )
6140 {
6141    Int i;
6142    for (i = 0; i < N_PROF_EVENTS; i++) {
6143       MC_(event_ctr)[i] = 0;
6144       MC_(event_ctr_name)[i] = NULL;
6145    }
6146 }
6147 
done_prof_mem(void)6148 static void done_prof_mem ( void )
6149 {
6150    Int  i;
6151    Bool spaced = False;
6152    for (i = 0; i < N_PROF_EVENTS; i++) {
6153       if (!spaced && (i % 10) == 0) {
6154          VG_(printf)("\n");
6155          spaced = True;
6156       }
6157       if (MC_(event_ctr)[i] > 0) {
6158          spaced = False;
6159          VG_(printf)( "prof mem event %3d: %9d   %s\n",
6160                       i, MC_(event_ctr)[i],
6161                       MC_(event_ctr_name)[i]
6162                          ? MC_(event_ctr_name)[i] : "unnamed");
6163       }
6164    }
6165 }
6166 
6167 #else
6168 
init_prof_mem(void)6169 static void init_prof_mem ( void ) { }
done_prof_mem(void)6170 static void done_prof_mem ( void ) { }
6171 
6172 #endif
6173 
6174 
6175 /*------------------------------------------------------------*/
6176 /*--- Origin tracking stuff                                ---*/
6177 /*------------------------------------------------------------*/
6178 
6179 /*--------------------------------------------*/
6180 /*--- Origin tracking: load handlers       ---*/
6181 /*--------------------------------------------*/
6182 
merge_origins(UInt or1,UInt or2)6183 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
6184    return or1 > or2 ? or1 : or2;
6185 }
6186 
MC_(helperc_b_load1)6187 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
6188    OCacheLine* line;
6189    UChar descr;
6190    UWord lineoff = oc_line_offset(a);
6191    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
6192 
6193    if (OC_ENABLE_ASSERTIONS) {
6194       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6195    }
6196 
6197    line = find_OCacheLine( a );
6198 
6199    descr = line->descr[lineoff];
6200    if (OC_ENABLE_ASSERTIONS) {
6201       tl_assert(descr < 0x10);
6202    }
6203 
6204    if (LIKELY(0 == (descr & (1 << byteoff))))  {
6205       return 0;
6206    } else {
6207       return line->w32[lineoff];
6208    }
6209 }
6210 
MC_(helperc_b_load2)6211 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
6212    OCacheLine* line;
6213    UChar descr;
6214    UWord lineoff, byteoff;
6215 
6216    if (UNLIKELY(a & 1)) {
6217       /* Handle misaligned case, slowly. */
6218       UInt oLo   = (UInt)MC_(helperc_b_load1)( a + 0 );
6219       UInt oHi   = (UInt)MC_(helperc_b_load1)( a + 1 );
6220       return merge_origins(oLo, oHi);
6221    }
6222 
6223    lineoff = oc_line_offset(a);
6224    byteoff = a & 3; /* 0 or 2 */
6225 
6226    if (OC_ENABLE_ASSERTIONS) {
6227       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6228    }
6229    line = find_OCacheLine( a );
6230 
6231    descr = line->descr[lineoff];
6232    if (OC_ENABLE_ASSERTIONS) {
6233       tl_assert(descr < 0x10);
6234    }
6235 
6236    if (LIKELY(0 == (descr & (3 << byteoff)))) {
6237       return 0;
6238    } else {
6239       return line->w32[lineoff];
6240    }
6241 }
6242 
MC_(helperc_b_load4)6243 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
6244    OCacheLine* line;
6245    UChar descr;
6246    UWord lineoff;
6247 
6248    if (UNLIKELY(a & 3)) {
6249       /* Handle misaligned case, slowly. */
6250       UInt oLo   = (UInt)MC_(helperc_b_load2)( a + 0 );
6251       UInt oHi   = (UInt)MC_(helperc_b_load2)( a + 2 );
6252       return merge_origins(oLo, oHi);
6253    }
6254 
6255    lineoff = oc_line_offset(a);
6256    if (OC_ENABLE_ASSERTIONS) {
6257       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6258    }
6259 
6260    line = find_OCacheLine( a );
6261 
6262    descr = line->descr[lineoff];
6263    if (OC_ENABLE_ASSERTIONS) {
6264       tl_assert(descr < 0x10);
6265    }
6266 
6267    if (LIKELY(0 == descr)) {
6268       return 0;
6269    } else {
6270       return line->w32[lineoff];
6271    }
6272 }
6273 
MC_(helperc_b_load8)6274 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
6275    OCacheLine* line;
6276    UChar descrLo, descrHi, descr;
6277    UWord lineoff;
6278 
6279    if (UNLIKELY(a & 7)) {
6280       /* Handle misaligned case, slowly. */
6281       UInt oLo   = (UInt)MC_(helperc_b_load4)( a + 0 );
6282       UInt oHi   = (UInt)MC_(helperc_b_load4)( a + 4 );
6283       return merge_origins(oLo, oHi);
6284    }
6285 
6286    lineoff = oc_line_offset(a);
6287    if (OC_ENABLE_ASSERTIONS) {
6288       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
6289    }
6290 
6291    line = find_OCacheLine( a );
6292 
6293    descrLo = line->descr[lineoff + 0];
6294    descrHi = line->descr[lineoff + 1];
6295    descr   = descrLo | descrHi;
6296    if (OC_ENABLE_ASSERTIONS) {
6297       tl_assert(descr < 0x10);
6298    }
6299 
6300    if (LIKELY(0 == descr)) {
6301       return 0; /* both 32-bit chunks are defined */
6302    } else {
6303       UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
6304       UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
6305       return merge_origins(oLo, oHi);
6306    }
6307 }
6308 
MC_(helperc_b_load16)6309 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
6310    UInt oLo   = (UInt)MC_(helperc_b_load8)( a + 0 );
6311    UInt oHi   = (UInt)MC_(helperc_b_load8)( a + 8 );
6312    UInt oBoth = merge_origins(oLo, oHi);
6313    return (UWord)oBoth;
6314 }
6315 
MC_(helperc_b_load32)6316 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
6317    UInt oQ0   = (UInt)MC_(helperc_b_load8)( a + 0 );
6318    UInt oQ1   = (UInt)MC_(helperc_b_load8)( a + 8 );
6319    UInt oQ2   = (UInt)MC_(helperc_b_load8)( a + 16 );
6320    UInt oQ3   = (UInt)MC_(helperc_b_load8)( a + 24 );
6321    UInt oAll  = merge_origins(merge_origins(oQ0, oQ1),
6322                               merge_origins(oQ2, oQ3));
6323    return (UWord)oAll;
6324 }
6325 
6326 
6327 /*--------------------------------------------*/
6328 /*--- Origin tracking: store handlers      ---*/
6329 /*--------------------------------------------*/
6330 
MC_(helperc_b_store1)6331 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
6332    OCacheLine* line;
6333    UWord lineoff = oc_line_offset(a);
6334    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
6335 
6336    if (OC_ENABLE_ASSERTIONS) {
6337       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6338    }
6339 
6340    line = find_OCacheLine( a );
6341 
6342    if (d32 == 0) {
6343       line->descr[lineoff] &= ~(1 << byteoff);
6344    } else {
6345       line->descr[lineoff] |= (1 << byteoff);
6346       line->w32[lineoff] = d32;
6347    }
6348 }
6349 
MC_(helperc_b_store2)6350 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
6351    OCacheLine* line;
6352    UWord lineoff, byteoff;
6353 
6354    if (UNLIKELY(a & 1)) {
6355       /* Handle misaligned case, slowly. */
6356       MC_(helperc_b_store1)( a + 0, d32 );
6357       MC_(helperc_b_store1)( a + 1, d32 );
6358       return;
6359    }
6360 
6361    lineoff = oc_line_offset(a);
6362    byteoff = a & 3; /* 0 or 2 */
6363 
6364    if (OC_ENABLE_ASSERTIONS) {
6365       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6366    }
6367 
6368    line = find_OCacheLine( a );
6369 
6370    if (d32 == 0) {
6371       line->descr[lineoff] &= ~(3 << byteoff);
6372    } else {
6373       line->descr[lineoff] |= (3 << byteoff);
6374       line->w32[lineoff] = d32;
6375    }
6376 }
6377 
MC_(helperc_b_store4)6378 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
6379    OCacheLine* line;
6380    UWord lineoff;
6381 
6382    if (UNLIKELY(a & 3)) {
6383       /* Handle misaligned case, slowly. */
6384       MC_(helperc_b_store2)( a + 0, d32 );
6385       MC_(helperc_b_store2)( a + 2, d32 );
6386       return;
6387    }
6388 
6389    lineoff = oc_line_offset(a);
6390    if (OC_ENABLE_ASSERTIONS) {
6391       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
6392    }
6393 
6394    line = find_OCacheLine( a );
6395 
6396    if (d32 == 0) {
6397       line->descr[lineoff] = 0;
6398    } else {
6399       line->descr[lineoff] = 0xF;
6400       line->w32[lineoff] = d32;
6401    }
6402 }
6403 
MC_(helperc_b_store8)6404 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
6405    OCacheLine* line;
6406    UWord lineoff;
6407 
6408    if (UNLIKELY(a & 7)) {
6409       /* Handle misaligned case, slowly. */
6410       MC_(helperc_b_store4)( a + 0, d32 );
6411       MC_(helperc_b_store4)( a + 4, d32 );
6412       return;
6413    }
6414 
6415    lineoff = oc_line_offset(a);
6416    if (OC_ENABLE_ASSERTIONS) {
6417       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
6418    }
6419 
6420    line = find_OCacheLine( a );
6421 
6422    if (d32 == 0) {
6423       line->descr[lineoff + 0] = 0;
6424       line->descr[lineoff + 1] = 0;
6425    } else {
6426       line->descr[lineoff + 0] = 0xF;
6427       line->descr[lineoff + 1] = 0xF;
6428       line->w32[lineoff + 0] = d32;
6429       line->w32[lineoff + 1] = d32;
6430    }
6431 }
6432 
MC_(helperc_b_store16)6433 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
6434    MC_(helperc_b_store8)( a + 0, d32 );
6435    MC_(helperc_b_store8)( a + 8, d32 );
6436 }
6437 
MC_(helperc_b_store32)6438 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
6439    MC_(helperc_b_store8)( a +  0, d32 );
6440    MC_(helperc_b_store8)( a +  8, d32 );
6441    MC_(helperc_b_store8)( a + 16, d32 );
6442    MC_(helperc_b_store8)( a + 24, d32 );
6443 }
6444 
6445 
6446 /*--------------------------------------------*/
6447 /*--- Origin tracking: sarp handlers       ---*/
6448 /*--------------------------------------------*/
6449 
6450 __attribute__((noinline))
ocache_sarp_Set_Origins(Addr a,UWord len,UInt otag)6451 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
6452    if ((a & 1) && len >= 1) {
6453       MC_(helperc_b_store1)( a, otag );
6454       a++;
6455       len--;
6456    }
6457    if ((a & 2) && len >= 2) {
6458       MC_(helperc_b_store2)( a, otag );
6459       a += 2;
6460       len -= 2;
6461    }
6462    if (len >= 4)
6463       tl_assert(0 == (a & 3));
6464    while (len >= 4) {
6465       MC_(helperc_b_store4)( a, otag );
6466       a += 4;
6467       len -= 4;
6468    }
6469    if (len >= 2) {
6470       MC_(helperc_b_store2)( a, otag );
6471       a += 2;
6472       len -= 2;
6473    }
6474    if (len >= 1) {
6475       MC_(helperc_b_store1)( a, otag );
6476       //a++;
6477       len--;
6478    }
6479    tl_assert(len == 0);
6480 }
6481 
6482 __attribute__((noinline))
ocache_sarp_Clear_Origins(Addr a,UWord len)6483 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
6484    if ((a & 1) && len >= 1) {
6485       MC_(helperc_b_store1)( a, 0 );
6486       a++;
6487       len--;
6488    }
6489    if ((a & 2) && len >= 2) {
6490       MC_(helperc_b_store2)( a, 0 );
6491       a += 2;
6492       len -= 2;
6493    }
6494    if (len >= 4)
6495       tl_assert(0 == (a & 3));
6496    while (len >= 4) {
6497       MC_(helperc_b_store4)( a, 0 );
6498       a += 4;
6499       len -= 4;
6500    }
6501    if (len >= 2) {
6502       MC_(helperc_b_store2)( a, 0 );
6503       a += 2;
6504       len -= 2;
6505    }
6506    if (len >= 1) {
6507       MC_(helperc_b_store1)( a, 0 );
6508       //a++;
6509       len--;
6510    }
6511    tl_assert(len == 0);
6512 }
6513 
6514 
6515 /*------------------------------------------------------------*/
6516 /*--- Setup and finalisation                               ---*/
6517 /*------------------------------------------------------------*/
6518 
mc_post_clo_init(void)6519 static void mc_post_clo_init ( void )
6520 {
6521    /* If we've been asked to emit XML, mash around various other
6522       options so as to constrain the output somewhat. */
6523    if (VG_(clo_xml)) {
6524       /* Extract as much info as possible from the leak checker. */
6525       MC_(clo_leak_check) = LC_Full;
6526    }
6527 
6528    if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol))
6529       VG_(message)(Vg_UserMsg,
6530                    "Warning: --freelist-big-blocks value %lld has no effect\n"
6531                    "as it is >= to --freelist-vol value %lld\n",
6532                    MC_(clo_freelist_big_blocks),
6533                    MC_(clo_freelist_vol));
6534 
6535    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
6536 
6537    if (MC_(clo_mc_level) == 3) {
6538       /* We're doing origin tracking. */
6539 #     ifdef PERF_FAST_STACK
6540       VG_(track_new_mem_stack_4_w_ECU)   ( mc_new_mem_stack_4_w_ECU   );
6541       VG_(track_new_mem_stack_8_w_ECU)   ( mc_new_mem_stack_8_w_ECU   );
6542       VG_(track_new_mem_stack_12_w_ECU)  ( mc_new_mem_stack_12_w_ECU  );
6543       VG_(track_new_mem_stack_16_w_ECU)  ( mc_new_mem_stack_16_w_ECU  );
6544       VG_(track_new_mem_stack_32_w_ECU)  ( mc_new_mem_stack_32_w_ECU  );
6545       VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
6546       VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
6547       VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
6548       VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
6549 #     endif
6550       VG_(track_new_mem_stack_w_ECU)     ( mc_new_mem_stack_w_ECU     );
6551       VG_(track_new_mem_stack_signal)    ( mc_new_mem_w_tid_make_ECU );
6552    } else {
6553       /* Not doing origin tracking */
6554 #     ifdef PERF_FAST_STACK
6555       VG_(track_new_mem_stack_4)   ( mc_new_mem_stack_4   );
6556       VG_(track_new_mem_stack_8)   ( mc_new_mem_stack_8   );
6557       VG_(track_new_mem_stack_12)  ( mc_new_mem_stack_12  );
6558       VG_(track_new_mem_stack_16)  ( mc_new_mem_stack_16  );
6559       VG_(track_new_mem_stack_32)  ( mc_new_mem_stack_32  );
6560       VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
6561       VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
6562       VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
6563       VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
6564 #     endif
6565       VG_(track_new_mem_stack)     ( mc_new_mem_stack     );
6566       VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
6567    }
6568 
6569    // We assume that brk()/sbrk() does not initialise new memory.  Is this
6570    // accurate?  John Reiser says:
6571    //
6572    //   0) sbrk() can *decrease* process address space.  No zero fill is done
6573    //   for a decrease, not even the fragment on the high end of the last page
6574    //   that is beyond the new highest address.  For maximum safety and
6575    //   portability, then the bytes in the last page that reside above [the
6576    //   new] sbrk(0) should be considered to be uninitialized, but in practice
6577    //   it is exceedingly likely that they will retain their previous
6578    //   contents.
6579    //
6580    //   1) If an increase is large enough to require new whole pages, then
6581    //   those new whole pages (like all new pages) are zero-filled by the
6582    //   operating system.  So if sbrk(0) already is page aligned, then
6583    //   sbrk(PAGE_SIZE) *does* zero-fill the new memory.
6584    //
6585    //   2) Any increase that lies within an existing allocated page is not
6586    //   changed.  So if (x = sbrk(0)) is not page aligned, then
6587    //   sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
6588    //   existing contents, and an additional PAGE_SIZE bytes which are zeroed.
6589    //   ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
6590    //   of them come along for the ride because the operating system deals
6591    //   only in whole pages.  Again, for maximum safety and portability, then
6592    //   anything that lives above [the new] sbrk(0) should be considered
6593    //   uninitialized, but in practice will retain previous contents [zero in
6594    //   this case.]"
6595    //
6596    // In short:
6597    //
6598    //   A key property of sbrk/brk is that new whole pages that are supplied
6599    //   by the operating system *do* get initialized to zero.
6600    //
6601    // As for the portability of all this:
6602    //
6603    //   sbrk and brk are not POSIX.  However, any system that is a derivative
6604    //   of *nix has sbrk and brk because there are too many softwares (such as
6605    //   the Bourne shell) which rely on the traditional memory map (.text,
6606    //   .data+.bss, stack) and the existence of sbrk/brk.
6607    //
6608    // So we should arguably observe all this.  However:
6609    // - The current inaccuracy has caused maybe one complaint in seven years(?)
6610    // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
6611    //   doubt most programmers know the above information.
6612    // So I'm not terribly unhappy with marking it as undefined. --njn.
6613    //
6614    // [More:  I think most of what John said only applies to sbrk().  It seems
6615    // that brk() always deals in whole pages.  And since this event deals
6616    // directly with brk(), not with sbrk(), perhaps it would be reasonable to
6617    // just mark all memory it allocates as defined.]
6618    //
6619    if (MC_(clo_mc_level) == 3)
6620       VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_make_ECU );
6621    else
6622       VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_no_ECU );
6623 
6624    /* This origin tracking cache is huge (~100M), so only initialise
6625       if we need it. */
6626    if (MC_(clo_mc_level) >= 3) {
6627       init_OCache();
6628       tl_assert(ocacheL1 != NULL);
6629       tl_assert(ocacheL2 != NULL);
6630    } else {
6631       tl_assert(ocacheL1 == NULL);
6632       tl_assert(ocacheL2 == NULL);
6633    }
6634 
6635    MC_(chunk_poolalloc) = VG_(newPA)
6636       (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
6637        1000,
6638        VG_(malloc),
6639        "mc.cMC.1 (MC_Chunk pools)",
6640        VG_(free));
6641 
6642    /* Do not check definedness of guest state if --undef-value-errors=no */
6643    if (MC_(clo_mc_level) >= 2)
6644       VG_(track_pre_reg_read) ( mc_pre_reg_read );
6645 }
6646 
print_SM_info(const HChar * type,Int n_SMs)6647 static void print_SM_info(const HChar* type, Int n_SMs)
6648 {
6649    VG_(message)(Vg_DebugMsg,
6650       " memcheck: SMs: %s = %d (%ldk, %ldM)\n",
6651       type,
6652       n_SMs,
6653       n_SMs * sizeof(SecMap) / 1024UL,
6654       n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
6655 }
6656 
mc_print_stats(void)6657 static void mc_print_stats (void)
6658 {
6659    SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
6660 
6661    VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
6662                 VG_(free_queue_volume), VG_(free_queue_length));
6663    VG_(message)(Vg_DebugMsg,
6664       " memcheck: sanity checks: %d cheap, %d expensive\n",
6665       n_sanity_cheap, n_sanity_expensive );
6666    VG_(message)(Vg_DebugMsg,
6667       " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n",
6668       n_auxmap_L2_nodes,
6669       n_auxmap_L2_nodes * 64,
6670       n_auxmap_L2_nodes / 16 );
6671    VG_(message)(Vg_DebugMsg,
6672       " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n",
6673       n_auxmap_L1_searches, n_auxmap_L1_cmps,
6674       (10ULL * n_auxmap_L1_cmps)
6675          / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
6676    );
6677    VG_(message)(Vg_DebugMsg,
6678       " memcheck: auxmaps_L2: %lld searches, %lld nodes\n",
6679       n_auxmap_L2_searches, n_auxmap_L2_nodes
6680    );
6681 
6682    print_SM_info("n_issued     ", n_issued_SMs);
6683    print_SM_info("n_deissued   ", n_deissued_SMs);
6684    print_SM_info("max_noaccess ", max_noaccess_SMs);
6685    print_SM_info("max_undefined", max_undefined_SMs);
6686    print_SM_info("max_defined  ", max_defined_SMs);
6687    print_SM_info("max_non_DSM  ", max_non_DSM_SMs);
6688 
6689    // Three DSMs, plus the non-DSM ones
6690    max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
6691    // The 3*sizeof(Word) bytes is the AVL node metadata size.
6692    // The VG_ROUNDUP is because the OSet pool allocator will/must align
6693    // the elements on pointer size.
6694    // Note that the pool allocator has some additional small overhead
6695    // which is not counted in the below.
6696    // Hardwiring this logic sucks, but I don't see how else to do it.
6697    max_secVBit_szB = max_secVBit_nodes *
6698          (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
6699    max_shmem_szB   = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
6700 
6701    VG_(message)(Vg_DebugMsg,
6702       " memcheck: max sec V bit nodes:    %d (%ldk, %ldM)\n",
6703       max_secVBit_nodes, max_secVBit_szB / 1024,
6704                          max_secVBit_szB / (1024 * 1024));
6705    VG_(message)(Vg_DebugMsg,
6706       " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
6707       sec_vbits_new_nodes + sec_vbits_updates,
6708       sec_vbits_new_nodes, sec_vbits_updates );
6709    VG_(message)(Vg_DebugMsg,
6710       " memcheck: max shadow mem size:   %ldk, %ldM\n",
6711       max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
6712 
6713    if (MC_(clo_mc_level) >= 3) {
6714       VG_(message)(Vg_DebugMsg,
6715                    " ocacheL1: %'12lu refs   %'12lu misses (%'lu lossage)\n",
6716                    stats_ocacheL1_find,
6717                    stats_ocacheL1_misses,
6718                    stats_ocacheL1_lossage );
6719       VG_(message)(Vg_DebugMsg,
6720                    " ocacheL1: %'12lu at 0   %'12lu at 1\n",
6721                    stats_ocacheL1_find - stats_ocacheL1_misses
6722                       - stats_ocacheL1_found_at_1
6723                       - stats_ocacheL1_found_at_N,
6724                    stats_ocacheL1_found_at_1 );
6725       VG_(message)(Vg_DebugMsg,
6726                    " ocacheL1: %'12lu at 2+  %'12lu move-fwds\n",
6727                    stats_ocacheL1_found_at_N,
6728                    stats_ocacheL1_movefwds );
6729       VG_(message)(Vg_DebugMsg,
6730                    " ocacheL1: %'12lu sizeB  %'12u useful\n",
6731                    (UWord)sizeof(OCache),
6732                    4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
6733       VG_(message)(Vg_DebugMsg,
6734                    " ocacheL2: %'12lu refs   %'12lu misses\n",
6735                    stats__ocacheL2_refs,
6736                    stats__ocacheL2_misses );
6737       VG_(message)(Vg_DebugMsg,
6738                    " ocacheL2:    %'9lu max nodes %'9lu curr nodes\n",
6739                    stats__ocacheL2_n_nodes_max,
6740                    stats__ocacheL2_n_nodes );
6741       VG_(message)(Vg_DebugMsg,
6742                    " niacache: %'12lu refs   %'12lu misses\n",
6743                    stats__nia_cache_queries, stats__nia_cache_misses);
6744    } else {
6745       tl_assert(ocacheL1 == NULL);
6746       tl_assert(ocacheL2 == NULL);
6747    }
6748 }
6749 
6750 
mc_fini(Int exitcode)6751 static void mc_fini ( Int exitcode )
6752 {
6753    MC_(print_malloc_stats)();
6754 
6755    if (MC_(clo_leak_check) != LC_Off) {
6756       LeakCheckParams lcp;
6757       lcp.mode = MC_(clo_leak_check);
6758       lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
6759       lcp.heuristics = MC_(clo_leak_check_heuristics);
6760       lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
6761       lcp.deltamode = LCD_Any;
6762       lcp.max_loss_records_output = 999999999;
6763       lcp.requested_by_monitor_command = False;
6764       MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
6765    } else {
6766       if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6767          VG_(umsg)(
6768             "For a detailed leak analysis, rerun with: --leak-check=full\n"
6769             "\n"
6770          );
6771       }
6772    }
6773 
6774    if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6775       VG_(message)(Vg_UserMsg,
6776                    "For counts of detected and suppressed errors, rerun with: -v\n");
6777    }
6778 
6779    if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
6780        && MC_(clo_mc_level) == 2) {
6781       VG_(message)(Vg_UserMsg,
6782                    "Use --track-origins=yes to see where "
6783                    "uninitialised values come from\n");
6784    }
6785 
6786    /* Print a warning if any client-request generated ignore-ranges
6787       still exist.  It would be reasonable to expect that a properly
6788       written program would remove any such ranges before exiting, and
6789       since they are a bit on the dangerous side, let's comment.  By
6790       contrast ranges which are specified on the command line normally
6791       pertain to hardware mapped into the address space, and so we
6792       can't expect the client to have got rid of them. */
6793    if (gIgnoredAddressRanges) {
6794       Word i, nBad = 0;
6795       for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
6796          UWord val     = IAR_INVALID;
6797          UWord key_min = ~(UWord)0;
6798          UWord key_max = (UWord)0;
6799          VG_(indexRangeMap)( &key_min, &key_max, &val,
6800                              gIgnoredAddressRanges, i );
6801          if (val != IAR_ClientReq)
6802            continue;
6803          /* Print the offending range.  Also, if it is the first,
6804             print a banner before it. */
6805          nBad++;
6806          if (nBad == 1) {
6807             VG_(umsg)(
6808               "WARNING: exiting program has the following client-requested\n"
6809               "WARNING: address error disablement range(s) still in force,\n"
6810               "WARNING: "
6811                  "possibly as a result of some mistake in the use of the\n"
6812               "WARNING: "
6813                  "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
6814             );
6815          }
6816          VG_(umsg)("   [%ld]  0x%016llx-0x%016llx  %s\n",
6817                    i, (ULong)key_min, (ULong)key_max, showIARKind(val));
6818       }
6819    }
6820 
6821    done_prof_mem();
6822 
6823    if (VG_(clo_stats))
6824       mc_print_stats();
6825 
6826    if (0) {
6827       VG_(message)(Vg_DebugMsg,
6828         "------ Valgrind's client block stats follow ---------------\n" );
6829       show_client_block_stats();
6830    }
6831 }
6832 
6833 /* mark the given addr/len unaddressable for watchpoint implementation
6834    The PointKind will be handled at access time */
mc_mark_unaddressable_for_watchpoint(PointKind kind,Bool insert,Addr addr,SizeT len)6835 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
6836                                                   Addr addr, SizeT len)
6837 {
6838    /* GDBTD this is somewhat fishy. We might rather have to save the previous
6839       accessibility and definedness in gdbserver so as to allow restoring it
6840       properly. Currently, we assume that the user only watches things
6841       which are properly addressable and defined */
6842    if (insert)
6843       MC_(make_mem_noaccess) (addr, len);
6844    else
6845       MC_(make_mem_defined)  (addr, len);
6846    return True;
6847 }
6848 
mc_pre_clo_init(void)6849 static void mc_pre_clo_init(void)
6850 {
6851    VG_(details_name)            ("Memcheck");
6852    VG_(details_version)         (NULL);
6853    VG_(details_description)     ("a memory error detector");
6854    VG_(details_copyright_author)(
6855       "Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.");
6856    VG_(details_bug_reports_to)  (VG_BUGS_TO);
6857    VG_(details_avg_translation_sizeB) ( 640 );
6858 
6859    VG_(basic_tool_funcs)          (mc_post_clo_init,
6860                                    MC_(instrument),
6861                                    mc_fini);
6862 
6863    VG_(needs_final_IR_tidy_pass)  ( MC_(final_tidy) );
6864 
6865 
6866    VG_(needs_core_errors)         ();
6867    VG_(needs_tool_errors)         (MC_(eq_Error),
6868                                    MC_(before_pp_Error),
6869                                    MC_(pp_Error),
6870                                    True,/*show TIDs for errors*/
6871                                    MC_(update_Error_extra),
6872                                    MC_(is_recognised_suppression),
6873                                    MC_(read_extra_suppression_info),
6874                                    MC_(error_matches_suppression),
6875                                    MC_(get_error_name),
6876                                    MC_(get_extra_suppression_info),
6877                                    MC_(print_extra_suppression_use),
6878                                    MC_(update_extra_suppression_use));
6879    VG_(needs_libc_freeres)        ();
6880    VG_(needs_command_line_options)(mc_process_cmd_line_options,
6881                                    mc_print_usage,
6882                                    mc_print_debug_usage);
6883    VG_(needs_client_requests)     (mc_handle_client_request);
6884    VG_(needs_sanity_checks)       (mc_cheap_sanity_check,
6885                                    mc_expensive_sanity_check);
6886    VG_(needs_print_stats)         (mc_print_stats);
6887    VG_(needs_info_location)       (MC_(pp_describe_addr));
6888    VG_(needs_malloc_replacement)  (MC_(malloc),
6889                                    MC_(__builtin_new),
6890                                    MC_(__builtin_vec_new),
6891                                    MC_(memalign),
6892                                    MC_(calloc),
6893                                    MC_(free),
6894                                    MC_(__builtin_delete),
6895                                    MC_(__builtin_vec_delete),
6896                                    MC_(realloc),
6897                                    MC_(malloc_usable_size),
6898                                    MC_MALLOC_DEFAULT_REDZONE_SZB );
6899    MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
6900 
6901    VG_(needs_xml_output)          ();
6902 
6903    VG_(track_new_mem_startup)     ( mc_new_mem_startup );
6904 
6905    // Handling of mmap and mprotect isn't simple (well, it is simple,
6906    // but the justification isn't.)  See comments above, just prior to
6907    // mc_new_mem_mmap.
6908    VG_(track_new_mem_mmap)        ( mc_new_mem_mmap );
6909    VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
6910 
6911    VG_(track_copy_mem_remap)      ( MC_(copy_address_range_state) );
6912 
6913    VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
6914    VG_(track_die_mem_brk)         ( MC_(make_mem_noaccess) );
6915    VG_(track_die_mem_munmap)      ( MC_(make_mem_noaccess) );
6916 
6917    /* Defer the specification of the new_mem_stack functions to the
6918       post_clo_init function, since we need to first parse the command
6919       line before deciding which set to use. */
6920 
6921 #  ifdef PERF_FAST_STACK
6922    VG_(track_die_mem_stack_4)     ( mc_die_mem_stack_4   );
6923    VG_(track_die_mem_stack_8)     ( mc_die_mem_stack_8   );
6924    VG_(track_die_mem_stack_12)    ( mc_die_mem_stack_12  );
6925    VG_(track_die_mem_stack_16)    ( mc_die_mem_stack_16  );
6926    VG_(track_die_mem_stack_32)    ( mc_die_mem_stack_32  );
6927    VG_(track_die_mem_stack_112)   ( mc_die_mem_stack_112 );
6928    VG_(track_die_mem_stack_128)   ( mc_die_mem_stack_128 );
6929    VG_(track_die_mem_stack_144)   ( mc_die_mem_stack_144 );
6930    VG_(track_die_mem_stack_160)   ( mc_die_mem_stack_160 );
6931 #  endif
6932    VG_(track_die_mem_stack)       ( mc_die_mem_stack     );
6933 
6934    VG_(track_ban_mem_stack)       ( MC_(make_mem_noaccess) );
6935 
6936    VG_(track_pre_mem_read)        ( check_mem_is_defined );
6937    VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
6938    VG_(track_pre_mem_write)       ( check_mem_is_addressable );
6939    VG_(track_post_mem_write)      ( mc_post_mem_write );
6940 
6941    VG_(track_post_reg_write)                  ( mc_post_reg_write );
6942    VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
6943 
6944    VG_(needs_watchpoint)          ( mc_mark_unaddressable_for_watchpoint );
6945 
6946    init_shadow_memory();
6947    // MC_(chunk_poolalloc) must be allocated in post_clo_init
6948    tl_assert(MC_(chunk_poolalloc) == NULL);
6949    MC_(malloc_list)  = VG_(HT_construct)( "MC_(malloc_list)" );
6950    MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
6951    init_prof_mem();
6952 
6953    tl_assert( mc_expensive_sanity_check() );
6954 
6955    // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
6956    tl_assert(sizeof(UWord) == sizeof(Addr));
6957    // Call me paranoid.  I don't care.
6958    tl_assert(sizeof(void*) == sizeof(Addr));
6959 
6960    // BYTES_PER_SEC_VBIT_NODE must be a power of two.
6961    tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
6962 
6963    /* This is small.  Always initialise it. */
6964    init_nia_to_ecu_cache();
6965 
6966    /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
6967       if we need to, since the command line args haven't been
6968       processed yet.  Hence defer it to mc_post_clo_init. */
6969    tl_assert(ocacheL1 == NULL);
6970    tl_assert(ocacheL2 == NULL);
6971 
6972    /* Check some important stuff.  See extensive comments above
6973       re UNALIGNED_OR_HIGH for background. */
6974 #  if VG_WORDSIZE == 4
6975    tl_assert(sizeof(void*) == 4);
6976    tl_assert(sizeof(Addr)  == 4);
6977    tl_assert(sizeof(UWord) == 4);
6978    tl_assert(sizeof(Word)  == 4);
6979    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
6980    tl_assert(MASK(1) == 0UL);
6981    tl_assert(MASK(2) == 1UL);
6982    tl_assert(MASK(4) == 3UL);
6983    tl_assert(MASK(8) == 7UL);
6984 #  else
6985    tl_assert(VG_WORDSIZE == 8);
6986    tl_assert(sizeof(void*) == 8);
6987    tl_assert(sizeof(Addr)  == 8);
6988    tl_assert(sizeof(UWord) == 8);
6989    tl_assert(sizeof(Word)  == 8);
6990    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFFULL);
6991    tl_assert(MASK(1) == 0xFFFFFFF000000000ULL);
6992    tl_assert(MASK(2) == 0xFFFFFFF000000001ULL);
6993    tl_assert(MASK(4) == 0xFFFFFFF000000003ULL);
6994    tl_assert(MASK(8) == 0xFFFFFFF000000007ULL);
6995 #  endif
6996 }
6997 
6998 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
6999 
7000 /*--------------------------------------------------------------------*/
7001 /*--- end                                                mc_main.c ---*/
7002 /*--------------------------------------------------------------------*/
7003