• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- MemCheck: Maintain bitmaps of memory, tracking the           ---*/
4 /*--- accessibility (A) and validity (V) status of each byte.      ---*/
5 /*---                                                    mc_main.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of MemCheck, a heavyweight Valgrind tool for
10    detecting memory errors.
11 
12    Copyright (C) 2000-2012 Julian Seward
13       jseward@acm.org
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 
33 #include "pub_tool_basics.h"
34 #include "pub_tool_aspacemgr.h"
35 #include "pub_tool_gdbserver.h"
36 #include "pub_tool_poolalloc.h"
37 #include "pub_tool_hashtable.h"     // For mc_include.h
38 #include "pub_tool_libcbase.h"
39 #include "pub_tool_libcassert.h"
40 #include "pub_tool_libcprint.h"
41 #include "pub_tool_machine.h"
42 #include "pub_tool_mallocfree.h"
43 #include "pub_tool_options.h"
44 #include "pub_tool_oset.h"
45 #include "pub_tool_replacemalloc.h"
46 #include "pub_tool_tooliface.h"
47 #include "pub_tool_threadstate.h"
48 
49 #include "mc_include.h"
50 #include "memcheck.h"   /* for client requests */
51 
52 
53 /* Set to 1 to do a little more sanity checking */
54 #define VG_DEBUG_MEMORY 0
55 
56 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
57 
58 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
59 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
60 
61 
62 /*------------------------------------------------------------*/
63 /*--- Fast-case knobs                                      ---*/
64 /*------------------------------------------------------------*/
65 
66 // Comment these out to disable the fast cases (don't just set them to zero).
67 
68 #define PERF_FAST_LOADV    1
69 #define PERF_FAST_STOREV   1
70 
71 #define PERF_FAST_SARP     1
72 
73 #define PERF_FAST_STACK    1
74 #define PERF_FAST_STACK2   1
75 
76 /* Change this to 1 to enable assertions on origin tracking cache fast
77    paths */
78 #define OC_ENABLE_ASSERTIONS 0
79 
80 
81 /*------------------------------------------------------------*/
82 /*--- Comments on the origin tracking implementation       ---*/
83 /*------------------------------------------------------------*/
84 
85 /* See detailed comment entitled
86    AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
87    which is contained further on in this file. */
88 
89 
90 /*------------------------------------------------------------*/
91 /*--- V bits and A bits                                    ---*/
92 /*------------------------------------------------------------*/
93 
94 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
95    thinks the corresponding value bit is defined.  And every memory byte
96    has an A bit, which tracks whether Memcheck thinks the program can access
97    it safely (ie. it's mapped, and has at least one of the RWX permission bits
98    set).  So every N-bit register is shadowed with N V bits, and every memory
99    byte is shadowed with 8 V bits and one A bit.
100 
101    In the implementation, we use two forms of compression (compressed V bits
102    and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
103    for memory.
104 
105    Memcheck also tracks extra information about each heap block that is
106    allocated, for detecting memory leaks and other purposes.
107 */
108 
109 /*------------------------------------------------------------*/
110 /*--- Basic A/V bitmap representation.                     ---*/
111 /*------------------------------------------------------------*/
112 
113 /* All reads and writes are checked against a memory map (a.k.a. shadow
114    memory), which records the state of all memory in the process.
115 
116    On 32-bit machines the memory map is organised as follows.
117    The top 16 bits of an address are used to index into a top-level
118    map table, containing 65536 entries.  Each entry is a pointer to a
119    second-level map, which records the accesibililty and validity
120    permissions for the 65536 bytes indexed by the lower 16 bits of the
121    address.  Each byte is represented by two bits (details are below).  So
122    each second-level map contains 16384 bytes.  This two-level arrangement
123    conveniently divides the 4G address space into 64k lumps, each size 64k
124    bytes.
125 
126    All entries in the primary (top-level) map must point to a valid
127    secondary (second-level) map.  Since many of the 64kB chunks will
128    have the same status for every bit -- ie. noaccess (for unused
129    address space) or entirely addressable and defined (for code segments) --
130    there are three distinguished secondary maps, which indicate 'noaccess',
131    'undefined' and 'defined'.  For these uniform 64kB chunks, the primary
132    map entry points to the relevant distinguished map.  In practice,
133    typically more than half of the addressable memory is represented with
134    the 'undefined' or 'defined' distinguished secondary map, so it gives a
135    good saving.  It also lets us set the V+A bits of large address regions
136    quickly in set_address_range_perms().
137 
138    On 64-bit machines it's more complicated.  If we followed the same basic
139    scheme we'd have a four-level table which would require too many memory
140    accesses.  So instead the top-level map table has 2^19 entries (indexed
141    using bits 16..34 of the address);  this covers the bottom 32GB.  Any
142    accesses above 32GB are handled with a slow, sparse auxiliary table.
143    Valgrind's address space manager tries very hard to keep things below
144    this 32GB barrier so that performance doesn't suffer too much.
145 
146    Note that this file has a lot of different functions for reading and
147    writing shadow memory.  Only a couple are strictly necessary (eg.
148    get_vabits2 and set_vabits2), most are just specialised for specific
149    common cases to improve performance.
150 
151    Aside: the V+A bits are less precise than they could be -- we have no way
152    of marking memory as read-only.  It would be great if we could add an
153    extra state VA_BITSn_READONLY.  But then we'd have 5 different states,
154    which requires 2.3 bits to hold, and there's no way to do that elegantly
155    -- we'd have to double up to 4 bits of metadata per byte, which doesn't
156    seem worth it.
157 */
158 
159 /* --------------- Basic configuration --------------- */
160 
161 /* Only change this.  N_PRIMARY_MAP *must* be a power of 2. */
162 
163 #if VG_WORDSIZE == 4
164 
165 /* cover the entire address space */
166 #  define N_PRIMARY_BITS  16
167 
168 #else
169 
170 /* Just handle the first 32G fast and the rest via auxiliary
171    primaries.  If you change this, Memcheck will assert at startup.
172    See the definition of UNALIGNED_OR_HIGH for extensive comments. */
173 #  define N_PRIMARY_BITS  19
174 
175 #endif
176 
177 
178 /* Do not change this. */
179 #define N_PRIMARY_MAP  ( ((UWord)1) << N_PRIMARY_BITS)
180 
181 /* Do not change this. */
182 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
183 
184 
185 /* --------------- Secondary maps --------------- */
186 
187 // Each byte of memory conceptually has an A bit, which indicates its
188 // addressability, and 8 V bits, which indicates its definedness.
189 //
190 // But because very few bytes are partially defined, we can use a nice
191 // compression scheme to reduce the size of shadow memory.  Each byte of
192 // memory has 2 bits which indicates its state (ie. V+A bits):
193 //
194 //   00:  noaccess    (unaddressable but treated as fully defined)
195 //   01:  undefined   (addressable and fully undefined)
196 //   10:  defined     (addressable and fully defined)
197 //   11:  partdefined (addressable and partially defined)
198 //
199 // In the "partdefined" case, we use a secondary table to store the V bits.
200 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
201 // bits.
202 //
203 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
204 // four bytes (32 bits) of memory are in each chunk.  Hence the name
205 // "vabits8".  This lets us get the V+A bits for four bytes at a time
206 // easily (without having to do any shifting and/or masking), and that is a
207 // very common operation.  (Note that although each vabits8 chunk
208 // is 8 bits in size, it represents 32 bits of memory.)
209 //
210 // The representation is "inverse" little-endian... each 4 bytes of
211 // memory is represented by a 1 byte value, where:
212 //
213 // - the status of byte (a+0) is held in bits [1..0]
214 // - the status of byte (a+1) is held in bits [3..2]
215 // - the status of byte (a+2) is held in bits [5..4]
216 // - the status of byte (a+3) is held in bits [7..6]
217 //
218 // It's "inverse" because endianness normally describes a mapping from
219 // value bits to memory addresses;  in this case the mapping is inverted.
220 // Ie. instead of particular value bits being held in certain addresses, in
221 // this case certain addresses are represented by particular value bits.
222 // See insert_vabits2_into_vabits8() for an example.
223 //
224 // But note that we don't compress the V bits stored in registers;  they
225 // need to be explicit to made the shadow operations possible.  Therefore
226 // when moving values between registers and memory we need to convert
227 // between the expanded in-register format and the compressed in-memory
228 // format.  This isn't so difficult, it just requires careful attention in a
229 // few places.
230 
231 // These represent eight bits of memory.
232 #define VA_BITS2_NOACCESS     0x0      // 00b
233 #define VA_BITS2_UNDEFINED    0x1      // 01b
234 #define VA_BITS2_DEFINED      0x2      // 10b
235 #define VA_BITS2_PARTDEFINED  0x3      // 11b
236 
237 // These represent 16 bits of memory.
238 #define VA_BITS4_NOACCESS     0x0      // 00_00b
239 #define VA_BITS4_UNDEFINED    0x5      // 01_01b
240 #define VA_BITS4_DEFINED      0xa      // 10_10b
241 
242 // These represent 32 bits of memory.
243 #define VA_BITS8_NOACCESS     0x00     // 00_00_00_00b
244 #define VA_BITS8_UNDEFINED    0x55     // 01_01_01_01b
245 #define VA_BITS8_DEFINED      0xaa     // 10_10_10_10b
246 
247 // These represent 64 bits of memory.
248 #define VA_BITS16_NOACCESS    0x0000   // 00_00_00_00b x 2
249 #define VA_BITS16_UNDEFINED   0x5555   // 01_01_01_01b x 2
250 #define VA_BITS16_DEFINED     0xaaaa   // 10_10_10_10b x 2
251 
252 
253 #define SM_CHUNKS             16384
254 #define SM_OFF(aaa)           (((aaa) & 0xffff) >> 2)
255 #define SM_OFF_16(aaa)        (((aaa) & 0xffff) >> 3)
256 
257 // Paranoia:  it's critical for performance that the requested inlining
258 // occurs.  So try extra hard.
259 #define INLINE    inline __attribute__((always_inline))
260 
start_of_this_sm(Addr a)261 static INLINE Addr start_of_this_sm ( Addr a ) {
262    return (a & (~SM_MASK));
263 }
is_start_of_sm(Addr a)264 static INLINE Bool is_start_of_sm ( Addr a ) {
265    return (start_of_this_sm(a) == a);
266 }
267 
268 typedef
269    struct {
270       UChar vabits8[SM_CHUNKS];
271    }
272    SecMap;
273 
274 // 3 distinguished secondary maps, one for no-access, one for
275 // accessible but undefined, and one for accessible and defined.
276 // Distinguished secondaries may never be modified.
277 #define SM_DIST_NOACCESS   0
278 #define SM_DIST_UNDEFINED  1
279 #define SM_DIST_DEFINED    2
280 
281 static SecMap sm_distinguished[3];
282 
is_distinguished_sm(SecMap * sm)283 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
284    return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
285 }
286 
287 // Forward declaration
288 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
289 
290 /* dist_sm points to one of our three distinguished secondaries.  Make
291    a copy of it so that we can write to it.
292 */
copy_for_writing(SecMap * dist_sm)293 static SecMap* copy_for_writing ( SecMap* dist_sm )
294 {
295    SecMap* new_sm;
296    tl_assert(dist_sm == &sm_distinguished[0]
297           || dist_sm == &sm_distinguished[1]
298           || dist_sm == &sm_distinguished[2]);
299 
300    new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
301    if (new_sm == NULL)
302       VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
303                                    sizeof(SecMap) );
304    VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
305    update_SM_counts(dist_sm, new_sm);
306    return new_sm;
307 }
308 
309 /* --------------- Stats --------------- */
310 
311 static Int   n_issued_SMs      = 0;
312 static Int   n_deissued_SMs    = 0;
313 static Int   n_noaccess_SMs    = N_PRIMARY_MAP; // start with many noaccess DSMs
314 static Int   n_undefined_SMs   = 0;
315 static Int   n_defined_SMs     = 0;
316 static Int   n_non_DSM_SMs     = 0;
317 static Int   max_noaccess_SMs  = 0;
318 static Int   max_undefined_SMs = 0;
319 static Int   max_defined_SMs   = 0;
320 static Int   max_non_DSM_SMs   = 0;
321 
322 /* # searches initiated in auxmap_L1, and # base cmps required */
323 static ULong n_auxmap_L1_searches  = 0;
324 static ULong n_auxmap_L1_cmps      = 0;
325 /* # of searches that missed in auxmap_L1 and therefore had to
326    be handed to auxmap_L2. And the number of nodes inserted. */
327 static ULong n_auxmap_L2_searches  = 0;
328 static ULong n_auxmap_L2_nodes     = 0;
329 
330 static Int   n_sanity_cheap     = 0;
331 static Int   n_sanity_expensive = 0;
332 
333 static Int   n_secVBit_nodes   = 0;
334 static Int   max_secVBit_nodes = 0;
335 
update_SM_counts(SecMap * oldSM,SecMap * newSM)336 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
337 {
338    if      (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
339    else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
340    else if (oldSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  --;
341    else                                                  { n_non_DSM_SMs  --;
342                                                            n_deissued_SMs ++; }
343 
344    if      (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
345    else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
346    else if (newSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  ++;
347    else                                                  { n_non_DSM_SMs  ++;
348                                                            n_issued_SMs   ++; }
349 
350    if (n_noaccess_SMs  > max_noaccess_SMs ) max_noaccess_SMs  = n_noaccess_SMs;
351    if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
352    if (n_defined_SMs   > max_defined_SMs  ) max_defined_SMs   = n_defined_SMs;
353    if (n_non_DSM_SMs   > max_non_DSM_SMs  ) max_non_DSM_SMs   = n_non_DSM_SMs;
354 }
355 
356 /* --------------- Primary maps --------------- */
357 
358 /* The main primary map.  This covers some initial part of the address
359    space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
360    handled using the auxiliary primary map.
361 */
362 static SecMap* primary_map[N_PRIMARY_MAP];
363 
364 
365 /* An entry in the auxiliary primary map.  base must be a 64k-aligned
366    value, and sm points at the relevant secondary map.  As with the
367    main primary map, the secondary may be either a real secondary, or
368    one of the three distinguished secondaries.  DO NOT CHANGE THIS
369    LAYOUT: the first word has to be the key for OSet fast lookups.
370 */
371 typedef
372    struct {
373       Addr    base;
374       SecMap* sm;
375    }
376    AuxMapEnt;
377 
378 /* Tunable parameter: How big is the L1 queue? */
379 #define N_AUXMAP_L1 24
380 
381 /* Tunable parameter: How far along the L1 queue to insert
382    entries resulting from L2 lookups? */
383 #define AUXMAP_L1_INSERT_IX 12
384 
385 static struct {
386           Addr       base;
387           AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
388        }
389        auxmap_L1[N_AUXMAP_L1];
390 
391 static OSet* auxmap_L2 = NULL;
392 
init_auxmap_L1_L2(void)393 static void init_auxmap_L1_L2 ( void )
394 {
395    Int i;
396    for (i = 0; i < N_AUXMAP_L1; i++) {
397       auxmap_L1[i].base = 0;
398       auxmap_L1[i].ent  = NULL;
399    }
400 
401    tl_assert(0 == offsetof(AuxMapEnt,base));
402    tl_assert(sizeof(Addr) == sizeof(void*));
403    auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
404                                     /*fastCmp*/ NULL,
405                                     VG_(malloc), "mc.iaLL.1", VG_(free) );
406 }
407 
408 /* Check representation invariants; if OK return NULL; else a
409    descriptive bit of text.  Also return the number of
410    non-distinguished secondary maps referred to from the auxiliary
411    primary maps. */
412 
check_auxmap_L1_L2_sanity(Word * n_secmaps_found)413 static HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
414 {
415    Word i, j;
416    /* On a 32-bit platform, the L2 and L1 tables should
417       both remain empty forever.
418 
419       On a 64-bit platform:
420       In the L2 table:
421        all .base & 0xFFFF == 0
422        all .base > MAX_PRIMARY_ADDRESS
423       In the L1 table:
424        all .base & 0xFFFF == 0
425        all (.base > MAX_PRIMARY_ADDRESS
426             .base & 0xFFFF == 0
427             and .ent points to an AuxMapEnt with the same .base)
428            or
429            (.base == 0 and .ent == NULL)
430    */
431    *n_secmaps_found = 0;
432    if (sizeof(void*) == 4) {
433       /* 32-bit platform */
434       if (VG_(OSetGen_Size)(auxmap_L2) != 0)
435          return "32-bit: auxmap_L2 is non-empty";
436       for (i = 0; i < N_AUXMAP_L1; i++)
437         if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
438       return "32-bit: auxmap_L1 is non-empty";
439    } else {
440       /* 64-bit platform */
441       UWord elems_seen = 0;
442       AuxMapEnt *elem, *res;
443       AuxMapEnt key;
444       /* L2 table */
445       VG_(OSetGen_ResetIter)(auxmap_L2);
446       while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
447          elems_seen++;
448          if (0 != (elem->base & (Addr)0xFFFF))
449             return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
450          if (elem->base <= MAX_PRIMARY_ADDRESS)
451             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
452          if (elem->sm == NULL)
453             return "64-bit: .sm in _L2 is NULL";
454          if (!is_distinguished_sm(elem->sm))
455             (*n_secmaps_found)++;
456       }
457       if (elems_seen != n_auxmap_L2_nodes)
458          return "64-bit: disagreement on number of elems in _L2";
459       /* Check L1-L2 correspondence */
460       for (i = 0; i < N_AUXMAP_L1; i++) {
461          if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
462             continue;
463          if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
464             return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
465          if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
466             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
467          if (auxmap_L1[i].ent == NULL)
468             return "64-bit: .ent is NULL in auxmap_L1";
469          if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
470             return "64-bit: _L1 and _L2 bases are inconsistent";
471          /* Look it up in auxmap_L2. */
472          key.base = auxmap_L1[i].base;
473          key.sm   = 0;
474          res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
475          if (res == NULL)
476             return "64-bit: _L1 .base not found in _L2";
477          if (res != auxmap_L1[i].ent)
478             return "64-bit: _L1 .ent disagrees with _L2 entry";
479       }
480       /* Check L1 contains no duplicates */
481       for (i = 0; i < N_AUXMAP_L1; i++) {
482          if (auxmap_L1[i].base == 0)
483             continue;
484 	 for (j = i+1; j < N_AUXMAP_L1; j++) {
485             if (auxmap_L1[j].base == 0)
486                continue;
487             if (auxmap_L1[j].base == auxmap_L1[i].base)
488                return "64-bit: duplicate _L1 .base entries";
489          }
490       }
491    }
492    return NULL; /* ok */
493 }
494 
insert_into_auxmap_L1_at(Word rank,AuxMapEnt * ent)495 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
496 {
497    Word i;
498    tl_assert(ent);
499    tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
500    for (i = N_AUXMAP_L1-1; i > rank; i--)
501       auxmap_L1[i] = auxmap_L1[i-1];
502    auxmap_L1[rank].base = ent->base;
503    auxmap_L1[rank].ent  = ent;
504 }
505 
maybe_find_in_auxmap(Addr a)506 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
507 {
508    AuxMapEnt  key;
509    AuxMapEnt* res;
510    Word       i;
511 
512    tl_assert(a > MAX_PRIMARY_ADDRESS);
513    a &= ~(Addr)0xFFFF;
514 
515    /* First search the front-cache, which is a self-organising
516       list containing the most popular entries. */
517 
518    if (LIKELY(auxmap_L1[0].base == a))
519       return auxmap_L1[0].ent;
520    if (LIKELY(auxmap_L1[1].base == a)) {
521       Addr       t_base = auxmap_L1[0].base;
522       AuxMapEnt* t_ent  = auxmap_L1[0].ent;
523       auxmap_L1[0].base = auxmap_L1[1].base;
524       auxmap_L1[0].ent  = auxmap_L1[1].ent;
525       auxmap_L1[1].base = t_base;
526       auxmap_L1[1].ent  = t_ent;
527       return auxmap_L1[0].ent;
528    }
529 
530    n_auxmap_L1_searches++;
531 
532    for (i = 0; i < N_AUXMAP_L1; i++) {
533       if (auxmap_L1[i].base == a) {
534          break;
535       }
536    }
537    tl_assert(i >= 0 && i <= N_AUXMAP_L1);
538 
539    n_auxmap_L1_cmps += (ULong)(i+1);
540 
541    if (i < N_AUXMAP_L1) {
542       if (i > 0) {
543          Addr       t_base = auxmap_L1[i-1].base;
544          AuxMapEnt* t_ent  = auxmap_L1[i-1].ent;
545          auxmap_L1[i-1].base = auxmap_L1[i-0].base;
546          auxmap_L1[i-1].ent  = auxmap_L1[i-0].ent;
547          auxmap_L1[i-0].base = t_base;
548          auxmap_L1[i-0].ent  = t_ent;
549          i--;
550       }
551       return auxmap_L1[i].ent;
552    }
553 
554    n_auxmap_L2_searches++;
555 
556    /* First see if we already have it. */
557    key.base = a;
558    key.sm   = 0;
559 
560    res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
561    if (res)
562       insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
563    return res;
564 }
565 
find_or_alloc_in_auxmap(Addr a)566 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
567 {
568    AuxMapEnt *nyu, *res;
569 
570    /* First see if we already have it. */
571    res = maybe_find_in_auxmap( a );
572    if (LIKELY(res))
573       return res;
574 
575    /* Ok, there's no entry in the secondary map, so we'll have
576       to allocate one. */
577    a &= ~(Addr)0xFFFF;
578 
579    nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
580    tl_assert(nyu);
581    nyu->base = a;
582    nyu->sm   = &sm_distinguished[SM_DIST_NOACCESS];
583    VG_(OSetGen_Insert)( auxmap_L2, nyu );
584    insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
585    n_auxmap_L2_nodes++;
586    return nyu;
587 }
588 
589 /* --------------- SecMap fundamentals --------------- */
590 
591 // In all these, 'low' means it's definitely in the main primary map,
592 // 'high' means it's definitely in the auxiliary table.
593 
get_secmap_low_ptr(Addr a)594 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
595 {
596    UWord pm_off = a >> 16;
597 #  if VG_DEBUG_MEMORY >= 1
598    tl_assert(pm_off < N_PRIMARY_MAP);
599 #  endif
600    return &primary_map[ pm_off ];
601 }
602 
get_secmap_high_ptr(Addr a)603 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
604 {
605    AuxMapEnt* am = find_or_alloc_in_auxmap(a);
606    return &am->sm;
607 }
608 
get_secmap_ptr(Addr a)609 static SecMap** get_secmap_ptr ( Addr a )
610 {
611    return ( a <= MAX_PRIMARY_ADDRESS
612           ? get_secmap_low_ptr(a)
613           : get_secmap_high_ptr(a));
614 }
615 
get_secmap_for_reading_low(Addr a)616 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
617 {
618    return *get_secmap_low_ptr(a);
619 }
620 
get_secmap_for_reading_high(Addr a)621 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
622 {
623    return *get_secmap_high_ptr(a);
624 }
625 
get_secmap_for_writing_low(Addr a)626 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
627 {
628    SecMap** p = get_secmap_low_ptr(a);
629    if (UNLIKELY(is_distinguished_sm(*p)))
630       *p = copy_for_writing(*p);
631    return *p;
632 }
633 
get_secmap_for_writing_high(Addr a)634 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
635 {
636    SecMap** p = get_secmap_high_ptr(a);
637    if (UNLIKELY(is_distinguished_sm(*p)))
638       *p = copy_for_writing(*p);
639    return *p;
640 }
641 
642 /* Produce the secmap for 'a', either from the primary map or by
643    ensuring there is an entry for it in the aux primary map.  The
644    secmap may be a distinguished one as the caller will only want to
645    be able to read it.
646 */
get_secmap_for_reading(Addr a)647 static INLINE SecMap* get_secmap_for_reading ( Addr a )
648 {
649    return ( a <= MAX_PRIMARY_ADDRESS
650           ? get_secmap_for_reading_low (a)
651           : get_secmap_for_reading_high(a) );
652 }
653 
654 /* Produce the secmap for 'a', either from the primary map or by
655    ensuring there is an entry for it in the aux primary map.  The
656    secmap may not be a distinguished one, since the caller will want
657    to be able to write it.  If it is a distinguished secondary, make a
658    writable copy of it, install it, and return the copy instead.  (COW
659    semantics).
660 */
get_secmap_for_writing(Addr a)661 static SecMap* get_secmap_for_writing ( Addr a )
662 {
663    return ( a <= MAX_PRIMARY_ADDRESS
664           ? get_secmap_for_writing_low (a)
665           : get_secmap_for_writing_high(a) );
666 }
667 
668 /* If 'a' has a SecMap, produce it.  Else produce NULL.  But don't
669    allocate one if one doesn't already exist.  This is used by the
670    leak checker.
671 */
maybe_get_secmap_for(Addr a)672 static SecMap* maybe_get_secmap_for ( Addr a )
673 {
674    if (a <= MAX_PRIMARY_ADDRESS) {
675       return get_secmap_for_reading_low(a);
676    } else {
677       AuxMapEnt* am = maybe_find_in_auxmap(a);
678       return am ? am->sm : NULL;
679    }
680 }
681 
682 /* --------------- Fundamental functions --------------- */
683 
684 static INLINE
insert_vabits2_into_vabits8(Addr a,UChar vabits2,UChar * vabits8)685 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
686 {
687    UInt shift =  (a & 3)  << 1;        // shift by 0, 2, 4, or 6
688    *vabits8  &= ~(0x3     << shift);   // mask out the two old bits
689    *vabits8  |=  (vabits2 << shift);   // mask  in the two new bits
690 }
691 
692 static INLINE
insert_vabits4_into_vabits8(Addr a,UChar vabits4,UChar * vabits8)693 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
694 {
695    UInt shift;
696    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
697    shift     =  (a & 2)   << 1;        // shift by 0 or 4
698    *vabits8 &= ~(0xf      << shift);   // mask out the four old bits
699    *vabits8 |=  (vabits4 << shift);    // mask  in the four new bits
700 }
701 
702 static INLINE
extract_vabits2_from_vabits8(Addr a,UChar vabits8)703 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
704 {
705    UInt shift = (a & 3) << 1;          // shift by 0, 2, 4, or 6
706    vabits8 >>= shift;                  // shift the two bits to the bottom
707    return 0x3 & vabits8;               // mask out the rest
708 }
709 
710 static INLINE
extract_vabits4_from_vabits8(Addr a,UChar vabits8)711 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
712 {
713    UInt shift;
714    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
715    shift = (a & 2) << 1;               // shift by 0 or 4
716    vabits8 >>= shift;                  // shift the four bits to the bottom
717    return 0xf & vabits8;               // mask out the rest
718 }
719 
720 // Note that these four are only used in slow cases.  The fast cases do
721 // clever things like combine the auxmap check (in
722 // get_secmap_{read,writ}able) with alignment checks.
723 
724 // *** WARNING! ***
725 // Any time this function is called, if it is possible that vabits2
726 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
727 // sec-V-bits table must also be set!
728 static INLINE
set_vabits2(Addr a,UChar vabits2)729 void set_vabits2 ( Addr a, UChar vabits2 )
730 {
731    SecMap* sm       = get_secmap_for_writing(a);
732    UWord   sm_off   = SM_OFF(a);
733    insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
734 }
735 
736 static INLINE
get_vabits2(Addr a)737 UChar get_vabits2 ( Addr a )
738 {
739    SecMap* sm       = get_secmap_for_reading(a);
740    UWord   sm_off   = SM_OFF(a);
741    UChar   vabits8  = sm->vabits8[sm_off];
742    return extract_vabits2_from_vabits8(a, vabits8);
743 }
744 
745 // *** WARNING! ***
746 // Any time this function is called, if it is possible that any of the
747 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
748 // corresponding entry(s) in the sec-V-bits table must also be set!
749 static INLINE
get_vabits8_for_aligned_word32(Addr a)750 UChar get_vabits8_for_aligned_word32 ( Addr a )
751 {
752    SecMap* sm       = get_secmap_for_reading(a);
753    UWord   sm_off   = SM_OFF(a);
754    UChar   vabits8  = sm->vabits8[sm_off];
755    return vabits8;
756 }
757 
758 static INLINE
set_vabits8_for_aligned_word32(Addr a,UChar vabits8)759 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
760 {
761    SecMap* sm       = get_secmap_for_writing(a);
762    UWord   sm_off   = SM_OFF(a);
763    sm->vabits8[sm_off] = vabits8;
764 }
765 
766 
767 // Forward declarations
768 static UWord get_sec_vbits8(Addr a);
769 static void  set_sec_vbits8(Addr a, UWord vbits8);
770 
771 // Returns False if there was an addressability error.
772 static INLINE
set_vbits8(Addr a,UChar vbits8)773 Bool set_vbits8 ( Addr a, UChar vbits8 )
774 {
775    Bool  ok      = True;
776    UChar vabits2 = get_vabits2(a);
777    if ( VA_BITS2_NOACCESS != vabits2 ) {
778       // Addressable.  Convert in-register format to in-memory format.
779       // Also remove any existing sec V bit entry for the byte if no
780       // longer necessary.
781       if      ( V_BITS8_DEFINED   == vbits8 ) { vabits2 = VA_BITS2_DEFINED;   }
782       else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
783       else                                    { vabits2 = VA_BITS2_PARTDEFINED;
784                                                 set_sec_vbits8(a, vbits8);  }
785       set_vabits2(a, vabits2);
786 
787    } else {
788       // Unaddressable!  Do nothing -- when writing to unaddressable
789       // memory it acts as a black hole, and the V bits can never be seen
790       // again.  So we don't have to write them at all.
791       ok = False;
792    }
793    return ok;
794 }
795 
796 // Returns False if there was an addressability error.  In that case, we put
797 // all defined bits into vbits8.
798 static INLINE
get_vbits8(Addr a,UChar * vbits8)799 Bool get_vbits8 ( Addr a, UChar* vbits8 )
800 {
801    Bool  ok      = True;
802    UChar vabits2 = get_vabits2(a);
803 
804    // Convert the in-memory format to in-register format.
805    if      ( VA_BITS2_DEFINED   == vabits2 ) { *vbits8 = V_BITS8_DEFINED;   }
806    else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
807    else if ( VA_BITS2_NOACCESS  == vabits2 ) {
808       *vbits8 = V_BITS8_DEFINED;    // Make V bits defined!
809       ok = False;
810    } else {
811       tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
812       *vbits8 = get_sec_vbits8(a);
813    }
814    return ok;
815 }
816 
817 
818 /* --------------- Secondary V bit table ------------ */
819 
820 // This table holds the full V bit pattern for partially-defined bytes
821 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
822 // memory.
823 //
824 // Note: the nodes in this table can become stale.  Eg. if you write a PDB,
825 // then overwrite the same address with a fully defined byte, the sec-V-bit
826 // node will not necessarily be removed.  This is because checking for
827 // whether removal is necessary would slow down the fast paths.
828 //
829 // To avoid the stale nodes building up too much, we periodically (once the
830 // table reaches a certain size) garbage collect (GC) the table by
831 // traversing it and evicting any nodes not having PDB.
832 // If more than a certain proportion of nodes survived, we increase the
833 // table size so that GCs occur less often.
834 //
835 // This policy is designed to avoid bad table bloat in the worst case where
836 // a program creates huge numbers of stale PDBs -- we would get this bloat
837 // if we had no GC -- while handling well the case where a node becomes
838 // stale but shortly afterwards is rewritten with a PDB and so becomes
839 // non-stale again (which happens quite often, eg. in perf/bz2).  If we just
840 // remove all stale nodes as soon as possible, we just end up re-adding a
841 // lot of them in later again.  The "sufficiently stale" approach avoids
842 // this.  (If a program has many live PDBs, performance will just suck,
843 // there's no way around that.)
844 //
845 // Further comments, JRS 14 Feb 2012.  It turns out that the policy of
846 // holding on to stale entries for 2 GCs before discarding them can lead
847 // to massive space leaks.  So we're changing to an arrangement where
848 // lines are evicted as soon as they are observed to be stale during a
849 // GC.  This also has a side benefit of allowing the sufficiently_stale
850 // field to be removed from the SecVBitNode struct, reducing its size by
851 // 8 bytes, which is a substantial space saving considering that the
852 // struct was previously 32 or so bytes, on a 64 bit target.
853 //
854 // In order to try and mitigate the problem that the "sufficiently stale"
855 // heuristic was designed to avoid, the table size is allowed to drift
856 // up ("DRIFTUP") slowly to 80000, even if the residency is low.  This
857 // means that nodes will exist in the table longer on average, and hopefully
858 // will be deleted and re-added less frequently.
859 //
860 // The previous scaling up mechanism (now called STEPUP) is retained:
861 // if residency exceeds 50%, the table is scaled up, although by a
862 // factor sqrt(2) rather than 2 as before.  This effectively doubles the
863 // frequency of GCs when there are many PDBs at reduces the tendency of
864 // stale PDBs to reside for long periods in the table.
865 
866 static OSet* secVBitTable;
867 
868 // Stats
869 static ULong sec_vbits_new_nodes = 0;
870 static ULong sec_vbits_updates   = 0;
871 
872 // This must be a power of two;  this is checked in mc_pre_clo_init().
873 // The size chosen here is a trade-off:  if the nodes are bigger (ie. cover
874 // a larger address range) they take more space but we can get multiple
875 // partially-defined bytes in one if they are close to each other, reducing
876 // the number of total nodes.  In practice sometimes they are clustered (eg.
877 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
878 // row), but often not.  So we choose something intermediate.
879 #define BYTES_PER_SEC_VBIT_NODE     16
880 
881 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
882 // more than this many nodes survive a GC.
883 #define STEPUP_SURVIVOR_PROPORTION  0.5
884 #define STEPUP_GROWTH_FACTOR        1.414213562
885 
886 // If the above heuristic doesn't apply, then we may make the table
887 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
888 // this many nodes survive a GC, _and_ the total table size does
889 // not exceed a fixed limit.  The numbers are somewhat arbitrary, but
890 // work tolerably well on long Firefox runs.  The scaleup ratio of 1.5%
891 // effectively although gradually reduces residency and increases time
892 // between GCs for programs with small numbers of PDBs.  The 80000 limit
893 // effectively limits the table size to around 2MB for programs with
894 // small numbers of PDBs, whilst giving a reasonably long lifetime to
895 // entries, to try and reduce the costs resulting from deleting and
896 // re-adding of entries.
897 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
898 #define DRIFTUP_GROWTH_FACTOR       1.015
899 #define DRIFTUP_MAX_SIZE            80000
900 
901 // We GC the table when it gets this many nodes in it, ie. it's effectively
902 // the table size.  It can change.
903 static Int  secVBitLimit = 1000;
904 
905 // The number of GCs done, used to age sec-V-bit nodes for eviction.
906 // Because it's unsigned, wrapping doesn't matter -- the right answer will
907 // come out anyway.
908 static UInt GCs_done = 0;
909 
910 typedef
911    struct {
912       Addr  a;
913       UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
914    }
915    SecVBitNode;
916 
createSecVBitTable(void)917 static OSet* createSecVBitTable(void)
918 {
919    OSet* newSecVBitTable;
920    newSecVBitTable = VG_(OSetGen_Create_With_Pool)
921       ( offsetof(SecVBitNode, a),
922         NULL, // use fast comparisons
923         VG_(malloc), "mc.cSVT.1 (sec VBit table)",
924         VG_(free),
925         1000,
926         sizeof(SecVBitNode));
927    return newSecVBitTable;
928 }
929 
gcSecVBitTable(void)930 static void gcSecVBitTable(void)
931 {
932    OSet*        secVBitTable2;
933    SecVBitNode* n;
934    Int          i, n_nodes = 0, n_survivors = 0;
935 
936    GCs_done++;
937 
938    // Create the new table.
939    secVBitTable2 = createSecVBitTable();
940 
941    // Traverse the table, moving fresh nodes into the new table.
942    VG_(OSetGen_ResetIter)(secVBitTable);
943    while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
944       // Keep node if any of its bytes are non-stale.  Using
945       // get_vabits2() for the lookup is not very efficient, but I don't
946       // think it matters.
947       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
948          if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
949             // Found a non-stale byte, so keep =>
950             // Insert a copy of the node into the new table.
951             SecVBitNode* n2 =
952                VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
953             *n2 = *n;
954             VG_(OSetGen_Insert)(secVBitTable2, n2);
955             break;
956          }
957       }
958    }
959 
960    // Get the before and after sizes.
961    n_nodes     = VG_(OSetGen_Size)(secVBitTable);
962    n_survivors = VG_(OSetGen_Size)(secVBitTable2);
963 
964    // Destroy the old table, and put the new one in its place.
965    VG_(OSetGen_Destroy)(secVBitTable);
966    secVBitTable = secVBitTable2;
967 
968    if (VG_(clo_verbosity) > 1) {
969       Char percbuf[7];
970       VG_(percentify)(n_survivors, n_nodes, 1, 6, percbuf);
971       VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%s)\n",
972                    n_nodes, n_survivors, percbuf);
973    }
974 
975    // Increase table size if necessary.
976    if ((Double)n_survivors
977        > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
978       secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
979       if (VG_(clo_verbosity) > 1)
980          VG_(message)(Vg_DebugMsg,
981                       "memcheck GC: %d new table size (stepup)\n",
982                       secVBitLimit);
983    }
984    else
985    if (secVBitLimit < DRIFTUP_MAX_SIZE
986        && (Double)n_survivors
987           > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
988       secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
989       if (VG_(clo_verbosity) > 1)
990          VG_(message)(Vg_DebugMsg,
991                       "memcheck GC: %d new table size (driftup)\n",
992                       secVBitLimit);
993    }
994 }
995 
get_sec_vbits8(Addr a)996 static UWord get_sec_vbits8(Addr a)
997 {
998    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
999    Int          amod     = a % BYTES_PER_SEC_VBIT_NODE;
1000    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1001    UChar        vbits8;
1002    tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
1003    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1004    // make it to the secondary V bits table.
1005    vbits8 = n->vbits8[amod];
1006    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1007    return vbits8;
1008 }
1009 
set_sec_vbits8(Addr a,UWord vbits8)1010 static void set_sec_vbits8(Addr a, UWord vbits8)
1011 {
1012    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1013    Int          i, amod  = a % BYTES_PER_SEC_VBIT_NODE;
1014    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1015    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1016    // make it to the secondary V bits table.
1017    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1018    if (n) {
1019       n->vbits8[amod] = vbits8;     // update
1020       sec_vbits_updates++;
1021    } else {
1022       // Do a table GC if necessary.  Nb: do this before creating and
1023       // inserting the new node, to avoid erroneously GC'ing the new node.
1024       if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1025          gcSecVBitTable();
1026       }
1027 
1028       // New node:  assign the specific byte, make the rest invalid (they
1029       // should never be read as-is, but be cautious).
1030       n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1031       n->a            = aAligned;
1032       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1033          n->vbits8[i] = V_BITS8_UNDEFINED;
1034       }
1035       n->vbits8[amod] = vbits8;
1036 
1037       // Insert the new node.
1038       VG_(OSetGen_Insert)(secVBitTable, n);
1039       sec_vbits_new_nodes++;
1040 
1041       n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1042       if (n_secVBit_nodes > max_secVBit_nodes)
1043          max_secVBit_nodes = n_secVBit_nodes;
1044    }
1045 }
1046 
1047 /* --------------- Endianness helpers --------------- */
1048 
1049 /* Returns the offset in memory of the byteno-th most significant byte
1050    in a wordszB-sized word, given the specified endianness. */
byte_offset_w(UWord wordszB,Bool bigendian,UWord byteno)1051 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1052                                     UWord byteno ) {
1053    return bigendian ? (wordszB-1-byteno) : byteno;
1054 }
1055 
1056 
1057 /* --------------- Ignored address ranges --------------- */
1058 
1059 #define M_IGNORE_RANGES 4
1060 
1061 typedef
1062    struct {
1063       Int  used;
1064       Addr start[M_IGNORE_RANGES];
1065       Addr end[M_IGNORE_RANGES];
1066    }
1067    IgnoreRanges;
1068 
1069 static IgnoreRanges ignoreRanges;
1070 
MC_(in_ignored_range)1071 INLINE Bool MC_(in_ignored_range) ( Addr a )
1072 {
1073    Int i;
1074    if (LIKELY(ignoreRanges.used == 0))
1075       return False;
1076    for (i = 0; i < ignoreRanges.used; i++) {
1077       if (a >= ignoreRanges.start[i] && a < ignoreRanges.end[i])
1078          return True;
1079    }
1080    return False;
1081 }
1082 
1083 /* Parse two Addr separated by a dash, or fail. */
1084 
parse_range(UChar ** ppc,Addr * result1,Addr * result2)1085 static Bool parse_range ( UChar** ppc, Addr* result1, Addr* result2 )
1086 {
1087    Bool ok = VG_(parse_Addr) (ppc, result1);
1088    if (!ok)
1089       return False;
1090    if (**ppc != '-')
1091       return False;
1092    (*ppc)++;
1093    ok = VG_(parse_Addr) (ppc, result2);
1094    if (!ok)
1095       return False;
1096    return True;
1097 }
1098 
1099 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1100    fail. */
1101 
parse_ignore_ranges(UChar * str0)1102 static Bool parse_ignore_ranges ( UChar* str0 )
1103 {
1104    Addr start, end;
1105    Bool ok;
1106    UChar*  str = str0;
1107    UChar** ppc = &str;
1108    ignoreRanges.used = 0;
1109    while (1) {
1110       ok = parse_range(ppc, &start, &end);
1111       if (!ok)
1112          return False;
1113       if (ignoreRanges.used >= M_IGNORE_RANGES)
1114          return False;
1115       ignoreRanges.start[ignoreRanges.used] = start;
1116       ignoreRanges.end[ignoreRanges.used] = end;
1117       ignoreRanges.used++;
1118       if (**ppc == 0)
1119          return True;
1120       if (**ppc != ',')
1121          return False;
1122       (*ppc)++;
1123    }
1124    /*NOTREACHED*/
1125    return False;
1126 }
1127 
1128 
1129 /* --------------- Load/store slow cases. --------------- */
1130 
1131 static
1132 __attribute__((noinline))
mc_LOADVn_slow(Addr a,SizeT nBits,Bool bigendian)1133 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1134 {
1135    PROF_EVENT(30, "mc_LOADVn_slow");
1136 
1137    /* ------------ BEGIN semi-fast cases ------------ */
1138    /* These deal quickly-ish with the common auxiliary primary map
1139       cases on 64-bit platforms.  Are merely a speedup hack; can be
1140       omitted without loss of correctness/functionality.  Note that in
1141       both cases the "sizeof(void*) == 8" causes these cases to be
1142       folded out by compilers on 32-bit platforms.  These are derived
1143       from LOADV64 and LOADV32.
1144    */
1145    if (LIKELY(sizeof(void*) == 8
1146                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1147       SecMap* sm       = get_secmap_for_reading(a);
1148       UWord   sm_off16 = SM_OFF_16(a);
1149       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1150       if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1151          return V_BITS64_DEFINED;
1152       if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1153          return V_BITS64_UNDEFINED;
1154       /* else fall into the slow case */
1155    }
1156    if (LIKELY(sizeof(void*) == 8
1157                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1158       SecMap* sm = get_secmap_for_reading(a);
1159       UWord sm_off = SM_OFF(a);
1160       UWord vabits8 = sm->vabits8[sm_off];
1161       if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1162          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1163       if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1164          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1165       /* else fall into slow case */
1166    }
1167    /* ------------ END semi-fast cases ------------ */
1168 
1169    ULong  vbits64     = V_BITS64_UNDEFINED; /* result */
1170    ULong  pessim64    = V_BITS64_DEFINED;   /* only used when p-l-ok=yes */
1171    SSizeT szB         = nBits / 8;
1172    SSizeT i;          /* Must be signed. */
1173    SizeT  n_addrs_bad = 0;
1174    Addr   ai;
1175    UChar  vbits8;
1176    Bool   ok;
1177 
1178    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1179 
1180    /* Make up a 64-bit result V word, which contains the loaded data
1181       for valid addresses and Defined for invalid addresses.  Iterate
1182       over the bytes in the word, from the most significant down to
1183       the least.  The vbits to return are calculated into vbits64.
1184       Also compute the pessimising value to be used when
1185       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
1186       info can be gleaned from pessim64) but is used as a
1187       cross-check. */
1188    for (i = szB-1; i >= 0; i--) {
1189       PROF_EVENT(31, "mc_LOADVn_slow(loop)");
1190       ai = a + byte_offset_w(szB, bigendian, i);
1191       ok = get_vbits8(ai, &vbits8);
1192       vbits64 <<= 8;
1193       vbits64 |= vbits8;
1194       if (!ok) n_addrs_bad++;
1195       pessim64 <<= 8;
1196       pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1197    }
1198 
1199    /* In the common case, all the addresses involved are valid, so we
1200       just return the computed V bits and have done. */
1201    if (LIKELY(n_addrs_bad == 0))
1202       return vbits64;
1203 
1204    /* If there's no possibility of getting a partial-loads-ok
1205       exemption, report the error and quit. */
1206    if (!MC_(clo_partial_loads_ok)) {
1207       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1208       return vbits64;
1209    }
1210 
1211    /* The partial-loads-ok excemption might apply.  Find out if it
1212       does.  If so, don't report an addressing error, but do return
1213       Undefined for the bytes that are out of range, so as to avoid
1214       false negatives.  If it doesn't apply, just report an addressing
1215       error in the usual way. */
1216 
1217    /* Some code steps along byte strings in aligned word-sized chunks
1218       even when there is only a partially defined word at the end (eg,
1219       optimised strlen).  This is allowed by the memory model of
1220       modern machines, since an aligned load cannot span two pages and
1221       thus cannot "partially fault".  Despite such behaviour being
1222       declared undefined by ANSI C/C++.
1223 
1224       Therefore, a load from a partially-addressible place is allowed
1225       if all of the following hold:
1226       - the command-line flag is set [by default, it isn't]
1227       - it's a word-sized, word-aligned load
1228       - at least one of the addresses in the word *is* valid
1229 
1230       Since this suppresses the addressing error, we avoid false
1231       negatives by marking bytes undefined when they come from an
1232       invalid address.
1233    */
1234 
1235    /* "at least one of the addresses is invalid" */
1236    tl_assert(pessim64 != V_BITS64_DEFINED);
1237 
1238    if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
1239        && n_addrs_bad < VG_WORDSIZE) {
1240       /* Exemption applies.  Use the previously computed pessimising
1241          value for vbits64 and return the combined result, but don't
1242          flag an addressing error.  The pessimising value is Defined
1243          for valid addresses and Undefined for invalid addresses. */
1244       /* for assumption that doing bitwise or implements UifU */
1245       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1246       /* (really need "UifU" here...)
1247          vbits64 UifU= pessim64  (is pessimised by it, iow) */
1248       vbits64 |= pessim64;
1249       return vbits64;
1250    }
1251 
1252    /* Exemption doesn't apply.  Flag an addressing error in the normal
1253       way. */
1254    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1255 
1256    return vbits64;
1257 }
1258 
1259 
1260 static
1261 __attribute__((noinline))
mc_STOREVn_slow(Addr a,SizeT nBits,ULong vbytes,Bool bigendian)1262 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1263 {
1264    SizeT szB = nBits / 8;
1265    SizeT i, n_addrs_bad = 0;
1266    UChar vbits8;
1267    Addr  ai;
1268    Bool  ok;
1269 
1270    PROF_EVENT(35, "mc_STOREVn_slow");
1271 
1272    /* ------------ BEGIN semi-fast cases ------------ */
1273    /* These deal quickly-ish with the common auxiliary primary map
1274       cases on 64-bit platforms.  Are merely a speedup hack; can be
1275       omitted without loss of correctness/functionality.  Note that in
1276       both cases the "sizeof(void*) == 8" causes these cases to be
1277       folded out by compilers on 32-bit platforms.  These are derived
1278       from STOREV64 and STOREV32.
1279    */
1280    if (LIKELY(sizeof(void*) == 8
1281                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1282       SecMap* sm       = get_secmap_for_reading(a);
1283       UWord   sm_off16 = SM_OFF_16(a);
1284       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1285       if (LIKELY( !is_distinguished_sm(sm) &&
1286                           (VA_BITS16_DEFINED   == vabits16 ||
1287                            VA_BITS16_UNDEFINED == vabits16) )) {
1288          /* Handle common case quickly: a is suitably aligned, */
1289          /* is mapped, and is addressible. */
1290          // Convert full V-bits in register to compact 2-bit form.
1291          if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1292             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
1293             return;
1294          } else if (V_BITS64_UNDEFINED == vbytes) {
1295             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
1296             return;
1297          }
1298          /* else fall into the slow case */
1299       }
1300       /* else fall into the slow case */
1301    }
1302    if (LIKELY(sizeof(void*) == 8
1303                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1304       SecMap* sm      = get_secmap_for_reading(a);
1305       UWord   sm_off  = SM_OFF(a);
1306       UWord   vabits8 = sm->vabits8[sm_off];
1307       if (LIKELY( !is_distinguished_sm(sm) &&
1308                           (VA_BITS8_DEFINED   == vabits8 ||
1309                            VA_BITS8_UNDEFINED == vabits8) )) {
1310          /* Handle common case quickly: a is suitably aligned, */
1311          /* is mapped, and is addressible. */
1312          // Convert full V-bits in register to compact 2-bit form.
1313          if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1314             sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1315             return;
1316          } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1317             sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1318             return;
1319          }
1320          /* else fall into the slow case */
1321       }
1322       /* else fall into the slow case */
1323    }
1324    /* ------------ END semi-fast cases ------------ */
1325 
1326    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1327 
1328    /* Dump vbytes in memory, iterating from least to most significant
1329       byte.  At the same time establish addressibility of the location. */
1330    for (i = 0; i < szB; i++) {
1331       PROF_EVENT(36, "mc_STOREVn_slow(loop)");
1332       ai     = a + byte_offset_w(szB, bigendian, i);
1333       vbits8 = vbytes & 0xff;
1334       ok     = set_vbits8(ai, vbits8);
1335       if (!ok) n_addrs_bad++;
1336       vbytes >>= 8;
1337    }
1338 
1339    /* If an address error has happened, report it. */
1340    if (n_addrs_bad > 0)
1341       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1342 }
1343 
1344 
1345 /*------------------------------------------------------------*/
1346 /*--- Setting permissions over address ranges.             ---*/
1347 /*------------------------------------------------------------*/
1348 
set_address_range_perms(Addr a,SizeT lenT,UWord vabits16,UWord dsm_num)1349 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1350                                       UWord dsm_num )
1351 {
1352    UWord    sm_off, sm_off16;
1353    UWord    vabits2 = vabits16 & 0x3;
1354    SizeT    lenA, lenB, len_to_next_secmap;
1355    Addr     aNext;
1356    SecMap*  sm;
1357    SecMap** sm_ptr;
1358    SecMap*  example_dsm;
1359 
1360    PROF_EVENT(150, "set_address_range_perms");
1361 
1362    /* Check the V+A bits make sense. */
1363    tl_assert(VA_BITS16_NOACCESS  == vabits16 ||
1364              VA_BITS16_UNDEFINED == vabits16 ||
1365              VA_BITS16_DEFINED   == vabits16);
1366 
1367    // This code should never write PDBs;  ensure this.  (See comment above
1368    // set_vabits2().)
1369    tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1370 
1371    if (lenT == 0)
1372       return;
1373 
1374    if (lenT > 256 * 1024 * 1024) {
1375       if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1376          Char* s = "unknown???";
1377          if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1378          if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1379          if (vabits16 == VA_BITS16_DEFINED  ) s = "defined";
1380          VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1381                                   "large range [0x%lx, 0x%lx) (%s)\n",
1382                                   a, a + lenT, s);
1383       }
1384    }
1385 
1386 #ifndef PERF_FAST_SARP
1387    /*------------------ debug-only case ------------------ */
1388    {
1389       // Endianness doesn't matter here because all bytes are being set to
1390       // the same value.
1391       // Nb: We don't have to worry about updating the sec-V-bits table
1392       // after these set_vabits2() calls because this code never writes
1393       // VA_BITS2_PARTDEFINED values.
1394       SizeT i;
1395       for (i = 0; i < lenT; i++) {
1396          set_vabits2(a + i, vabits2);
1397       }
1398       return;
1399    }
1400 #endif
1401 
1402    /*------------------ standard handling ------------------ */
1403 
1404    /* Get the distinguished secondary that we might want
1405       to use (part of the space-compression scheme). */
1406    example_dsm = &sm_distinguished[dsm_num];
1407 
1408    // We have to handle ranges covering various combinations of partial and
1409    // whole sec-maps.  Here is how parts 1, 2 and 3 are used in each case.
1410    // Cases marked with a '*' are common.
1411    //
1412    //   TYPE                                             PARTS USED
1413    //   ----                                             ----------
1414    // * one partial sec-map                  (p)         1
1415    // - one whole sec-map                    (P)         2
1416    //
1417    // * two partial sec-maps                 (pp)        1,3
1418    // - one partial, one whole sec-map       (pP)        1,2
1419    // - one whole, one partial sec-map       (Pp)        2,3
1420    // - two whole sec-maps                   (PP)        2,2
1421    //
1422    // * one partial, one whole, one partial  (pPp)       1,2,3
1423    // - one partial, two whole               (pPP)       1,2,2
1424    // - two whole, one partial               (PPp)       2,2,3
1425    // - three whole                          (PPP)       2,2,2
1426    //
1427    // * one partial, N-2 whole, one partial  (pP...Pp)   1,2...2,3
1428    // - one partial, N-1 whole               (pP...PP)   1,2...2,2
1429    // - N-1 whole, one partial               (PP...Pp)   2,2...2,3
1430    // - N whole                              (PP...PP)   2,2...2,3
1431 
1432    // Break up total length (lenT) into two parts:  length in the first
1433    // sec-map (lenA), and the rest (lenB);   lenT == lenA + lenB.
1434    aNext = start_of_this_sm(a) + SM_SIZE;
1435    len_to_next_secmap = aNext - a;
1436    if ( lenT <= len_to_next_secmap ) {
1437       // Range entirely within one sec-map.  Covers almost all cases.
1438       PROF_EVENT(151, "set_address_range_perms-single-secmap");
1439       lenA = lenT;
1440       lenB = 0;
1441    } else if (is_start_of_sm(a)) {
1442       // Range spans at least one whole sec-map, and starts at the beginning
1443       // of a sec-map; skip to Part 2.
1444       PROF_EVENT(152, "set_address_range_perms-startof-secmap");
1445       lenA = 0;
1446       lenB = lenT;
1447       goto part2;
1448    } else {
1449       // Range spans two or more sec-maps, first one is partial.
1450       PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
1451       lenA = len_to_next_secmap;
1452       lenB = lenT - lenA;
1453    }
1454 
1455    //------------------------------------------------------------------------
1456    // Part 1: Deal with the first sec_map.  Most of the time the range will be
1457    // entirely within a sec_map and this part alone will suffice.  Also,
1458    // doing it this way lets us avoid repeatedly testing for the crossing of
1459    // a sec-map boundary within these loops.
1460    //------------------------------------------------------------------------
1461 
1462    // If it's distinguished, make it undistinguished if necessary.
1463    sm_ptr = get_secmap_ptr(a);
1464    if (is_distinguished_sm(*sm_ptr)) {
1465       if (*sm_ptr == example_dsm) {
1466          // Sec-map already has the V+A bits that we want, so skip.
1467          PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
1468          a    = aNext;
1469          lenA = 0;
1470       } else {
1471          PROF_EVENT(155, "set_address_range_perms-dist-sm1");
1472          *sm_ptr = copy_for_writing(*sm_ptr);
1473       }
1474    }
1475    sm = *sm_ptr;
1476 
1477    // 1 byte steps
1478    while (True) {
1479       if (VG_IS_8_ALIGNED(a)) break;
1480       if (lenA < 1)           break;
1481       PROF_EVENT(156, "set_address_range_perms-loop1a");
1482       sm_off = SM_OFF(a);
1483       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1484       a    += 1;
1485       lenA -= 1;
1486    }
1487    // 8-aligned, 8 byte steps
1488    while (True) {
1489       if (lenA < 8) break;
1490       PROF_EVENT(157, "set_address_range_perms-loop8a");
1491       sm_off16 = SM_OFF_16(a);
1492       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1493       a    += 8;
1494       lenA -= 8;
1495    }
1496    // 1 byte steps
1497    while (True) {
1498       if (lenA < 1) break;
1499       PROF_EVENT(158, "set_address_range_perms-loop1b");
1500       sm_off = SM_OFF(a);
1501       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1502       a    += 1;
1503       lenA -= 1;
1504    }
1505 
1506    // We've finished the first sec-map.  Is that it?
1507    if (lenB == 0)
1508       return;
1509 
1510    //------------------------------------------------------------------------
1511    // Part 2: Fast-set entire sec-maps at a time.
1512    //------------------------------------------------------------------------
1513   part2:
1514    // 64KB-aligned, 64KB steps.
1515    // Nb: we can reach here with lenB < SM_SIZE
1516    tl_assert(0 == lenA);
1517    while (True) {
1518       if (lenB < SM_SIZE) break;
1519       tl_assert(is_start_of_sm(a));
1520       PROF_EVENT(159, "set_address_range_perms-loop64K");
1521       sm_ptr = get_secmap_ptr(a);
1522       if (!is_distinguished_sm(*sm_ptr)) {
1523          PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
1524          // Free the non-distinguished sec-map that we're replacing.  This
1525          // case happens moderately often, enough to be worthwhile.
1526          SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1527          tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
1528       }
1529       update_SM_counts(*sm_ptr, example_dsm);
1530       // Make the sec-map entry point to the example DSM
1531       *sm_ptr = example_dsm;
1532       lenB -= SM_SIZE;
1533       a    += SM_SIZE;
1534    }
1535 
1536    // We've finished the whole sec-maps.  Is that it?
1537    if (lenB == 0)
1538       return;
1539 
1540    //------------------------------------------------------------------------
1541    // Part 3: Finish off the final partial sec-map, if necessary.
1542    //------------------------------------------------------------------------
1543 
1544    tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1545 
1546    // If it's distinguished, make it undistinguished if necessary.
1547    sm_ptr = get_secmap_ptr(a);
1548    if (is_distinguished_sm(*sm_ptr)) {
1549       if (*sm_ptr == example_dsm) {
1550          // Sec-map already has the V+A bits that we want, so stop.
1551          PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
1552          return;
1553       } else {
1554          PROF_EVENT(162, "set_address_range_perms-dist-sm2");
1555          *sm_ptr = copy_for_writing(*sm_ptr);
1556       }
1557    }
1558    sm = *sm_ptr;
1559 
1560    // 8-aligned, 8 byte steps
1561    while (True) {
1562       if (lenB < 8) break;
1563       PROF_EVENT(163, "set_address_range_perms-loop8b");
1564       sm_off16 = SM_OFF_16(a);
1565       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1566       a    += 8;
1567       lenB -= 8;
1568    }
1569    // 1 byte steps
1570    while (True) {
1571       if (lenB < 1) return;
1572       PROF_EVENT(164, "set_address_range_perms-loop1c");
1573       sm_off = SM_OFF(a);
1574       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1575       a    += 1;
1576       lenB -= 1;
1577    }
1578 }
1579 
1580 
1581 /* --- Set permissions for arbitrary address ranges --- */
1582 
MC_(make_mem_noaccess)1583 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1584 {
1585    PROF_EVENT(40, "MC_(make_mem_noaccess)");
1586    DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1587    set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1588    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1589       ocache_sarp_Clear_Origins ( a, len );
1590 }
1591 
make_mem_undefined(Addr a,SizeT len)1592 static void make_mem_undefined ( Addr a, SizeT len )
1593 {
1594    PROF_EVENT(41, "make_mem_undefined");
1595    DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1596    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1597 }
1598 
MC_(make_mem_undefined_w_otag)1599 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1600 {
1601    PROF_EVENT(41, "MC_(make_mem_undefined)");
1602    DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1603    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1604    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1605       ocache_sarp_Set_Origins ( a, len, otag );
1606 }
1607 
1608 static
make_mem_undefined_w_tid_and_okind(Addr a,SizeT len,ThreadId tid,UInt okind)1609 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1610                                           ThreadId tid, UInt okind )
1611 {
1612    UInt        ecu;
1613    ExeContext* here;
1614    /* VG_(record_ExeContext) checks for validity of tid, and asserts
1615       if it is invalid.  So no need to do it here. */
1616    tl_assert(okind <= 3);
1617    here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1618    tl_assert(here);
1619    ecu = VG_(get_ECU_from_ExeContext)(here);
1620    tl_assert(VG_(is_plausible_ECU)(ecu));
1621    MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1622 }
1623 
1624 static
make_mem_undefined_w_tid(Addr a,SizeT len,ThreadId tid)1625 void make_mem_undefined_w_tid ( Addr a, SizeT len, ThreadId tid ) {
1626    make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1627 }
1628 
1629 
MC_(make_mem_defined)1630 void MC_(make_mem_defined) ( Addr a, SizeT len )
1631 {
1632    PROF_EVENT(42, "MC_(make_mem_defined)");
1633    DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1634    set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1635    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1636       ocache_sarp_Clear_Origins ( a, len );
1637 }
1638 
1639 /* For each byte in [a,a+len), if the byte is addressable, make it be
1640    defined, but if it isn't addressible, leave it alone.  In other
1641    words a version of MC_(make_mem_defined) that doesn't mess with
1642    addressibility.  Low-performance implementation. */
make_mem_defined_if_addressable(Addr a,SizeT len)1643 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1644 {
1645    SizeT i;
1646    UChar vabits2;
1647    DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1648    for (i = 0; i < len; i++) {
1649       vabits2 = get_vabits2( a+i );
1650       if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1651          set_vabits2(a+i, VA_BITS2_DEFINED);
1652          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1653             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1654          }
1655       }
1656    }
1657 }
1658 
1659 /* Similarly (needed for mprotect handling ..) */
make_mem_defined_if_noaccess(Addr a,SizeT len)1660 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1661 {
1662    SizeT i;
1663    UChar vabits2;
1664    DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1665    for (i = 0; i < len; i++) {
1666       vabits2 = get_vabits2( a+i );
1667       if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1668          set_vabits2(a+i, VA_BITS2_DEFINED);
1669          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1670             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1671          }
1672       }
1673    }
1674 }
1675 
1676 /* --- Block-copy permissions (needed for implementing realloc() and
1677        sys_mremap). --- */
1678 
MC_(copy_address_range_state)1679 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
1680 {
1681    SizeT i, j;
1682    UChar vabits2, vabits8;
1683    Bool  aligned, nooverlap;
1684 
1685    DEBUG("MC_(copy_address_range_state)\n");
1686    PROF_EVENT(50, "MC_(copy_address_range_state)");
1687 
1688    if (len == 0 || src == dst)
1689       return;
1690 
1691    aligned   = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
1692    nooverlap = src+len <= dst || dst+len <= src;
1693 
1694    if (nooverlap && aligned) {
1695 
1696       /* Vectorised fast case, when no overlap and suitably aligned */
1697       /* vector loop */
1698       i = 0;
1699       while (len >= 4) {
1700          vabits8 = get_vabits8_for_aligned_word32( src+i );
1701          set_vabits8_for_aligned_word32( dst+i, vabits8 );
1702          if (LIKELY(VA_BITS8_DEFINED == vabits8
1703                             || VA_BITS8_UNDEFINED == vabits8
1704                             || VA_BITS8_NOACCESS == vabits8)) {
1705             /* do nothing */
1706          } else {
1707             /* have to copy secondary map info */
1708             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
1709                set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
1710             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
1711                set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
1712             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
1713                set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
1714             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
1715                set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
1716          }
1717          i += 4;
1718          len -= 4;
1719       }
1720       /* fixup loop */
1721       while (len >= 1) {
1722          vabits2 = get_vabits2( src+i );
1723          set_vabits2( dst+i, vabits2 );
1724          if (VA_BITS2_PARTDEFINED == vabits2) {
1725             set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1726          }
1727          i++;
1728          len--;
1729       }
1730 
1731    } else {
1732 
1733       /* We have to do things the slow way */
1734       if (src < dst) {
1735          for (i = 0, j = len-1; i < len; i++, j--) {
1736             PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
1737             vabits2 = get_vabits2( src+j );
1738             set_vabits2( dst+j, vabits2 );
1739             if (VA_BITS2_PARTDEFINED == vabits2) {
1740                set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
1741             }
1742          }
1743       }
1744 
1745       if (src > dst) {
1746          for (i = 0; i < len; i++) {
1747             PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
1748             vabits2 = get_vabits2( src+i );
1749             set_vabits2( dst+i, vabits2 );
1750             if (VA_BITS2_PARTDEFINED == vabits2) {
1751                set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1752             }
1753          }
1754       }
1755    }
1756 
1757 }
1758 
1759 
1760 /*------------------------------------------------------------*/
1761 /*--- Origin tracking stuff - cache basics                 ---*/
1762 /*------------------------------------------------------------*/
1763 
1764 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
1765    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1766 
1767    Note that this implementation draws inspiration from the "origin
1768    tracking by value piggybacking" scheme described in "Tracking Bad
1769    Apples: Reporting the Origin of Null and Undefined Value Errors"
1770    (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
1771    Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
1772    implemented completely differently.
1773 
1774    Origin tags and ECUs -- about the shadow values
1775    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1776 
1777    This implementation tracks the defining point of all uninitialised
1778    values using so called "origin tags", which are 32-bit integers,
1779    rather than using the values themselves to encode the origins.  The
1780    latter, so-called value piggybacking", is what the OOPSLA07 paper
1781    describes.
1782 
1783    Origin tags, as tracked by the machinery below, are 32-bit unsigned
1784    ints (UInts), regardless of the machine's word size.  Each tag
1785    comprises an upper 30-bit ECU field and a lower 2-bit
1786    'kind' field.  The ECU field is a number given out by m_execontext
1787    and has a 1-1 mapping with ExeContext*s.  An ECU can be used
1788    directly as an origin tag (otag), but in fact we want to put
1789    additional information 'kind' field to indicate roughly where the
1790    tag came from.  This helps print more understandable error messages
1791    for the user -- it has no other purpose.  In summary:
1792 
1793    * Both ECUs and origin tags are represented as 32-bit words
1794 
1795    * m_execontext and the core-tool interface deal purely in ECUs.
1796      They have no knowledge of origin tags - that is a purely
1797      Memcheck-internal matter.
1798 
1799    * all valid ECUs have the lowest 2 bits zero and at least
1800      one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
1801 
1802    * to convert from an ECU to an otag, OR in one of the MC_OKIND_
1803      constants defined in mc_include.h.
1804 
1805    * to convert an otag back to an ECU, AND it with ~3
1806 
1807    One important fact is that no valid otag is zero.  A zero otag is
1808    used by the implementation to indicate "no origin", which could
1809    mean that either the value is defined, or it is undefined but the
1810    implementation somehow managed to lose the origin.
1811 
1812    The ECU used for memory created by malloc etc is derived from the
1813    stack trace at the time the malloc etc happens.  This means the
1814    mechanism can show the exact allocation point for heap-created
1815    uninitialised values.
1816 
1817    In contrast, it is simply too expensive to create a complete
1818    backtrace for each stack allocation.  Therefore we merely use a
1819    depth-1 backtrace for stack allocations, which can be done once at
1820    translation time, rather than N times at run time.  The result of
1821    this is that, for stack created uninitialised values, Memcheck can
1822    only show the allocating function, and not what called it.
1823    Furthermore, compilers tend to move the stack pointer just once at
1824    the start of the function, to allocate all locals, and so in fact
1825    the stack origin almost always simply points to the opening brace
1826    of the function.  Net result is, for stack origins, the mechanism
1827    can tell you in which function the undefined value was created, but
1828    that's all.  Users will need to carefully check all locals in the
1829    specified function.
1830 
1831    Shadowing registers and memory
1832    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1833 
1834    Memory is shadowed using a two level cache structure (ocacheL1 and
1835    ocacheL2).  Memory references are first directed to ocacheL1.  This
1836    is a traditional 2-way set associative cache with 32-byte lines and
1837    approximate LRU replacement within each set.
1838 
1839    A naive implementation would require storing one 32 bit otag for
1840    each byte of memory covered, a 4:1 space overhead.  Instead, there
1841    is one otag for every 4 bytes of memory covered, plus a 4-bit mask
1842    that shows which of the 4 bytes have that shadow value and which
1843    have a shadow value of zero (indicating no origin).  Hence a lot of
1844    space is saved, but the cost is that only one different origin per
1845    4 bytes of address space can be represented.  This is a source of
1846    imprecision, but how much of a problem it really is remains to be
1847    seen.
1848 
1849    A cache line that contains all zeroes ("no origins") contains no
1850    useful information, and can be ejected from the L1 cache "for
1851    free", in the sense that a read miss on the L1 causes a line of
1852    zeroes to be installed.  However, ejecting a line containing
1853    nonzeroes risks losing origin information permanently.  In order to
1854    prevent such lossage, ejected nonzero lines are placed in a
1855    secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
1856    lines.  This can grow arbitrarily large, and so should ensure that
1857    Memcheck runs out of memory in preference to losing useful origin
1858    info due to cache size limitations.
1859 
1860    Shadowing registers is a bit tricky, because the shadow values are
1861    32 bits, regardless of the size of the register.  That gives a
1862    problem for registers smaller than 32 bits.  The solution is to
1863    find spaces in the guest state that are unused, and use those to
1864    shadow guest state fragments smaller than 32 bits.  For example, on
1865    ppc32/64, each vector register is 16 bytes long.  If 4 bytes of the
1866    shadow are allocated for the register's otag, then there are still
1867    12 bytes left over which could be used to shadow 3 other values.
1868 
1869    This implies there is some non-obvious mapping from guest state
1870    (start,length) pairs to the relevant shadow offset (for the origin
1871    tags).  And it is unfortunately guest-architecture specific.  The
1872    mapping is contained in mc_machine.c, which is quite lengthy but
1873    straightforward.
1874 
1875    Instrumenting the IR
1876    ~~~~~~~~~~~~~~~~~~~~
1877 
1878    Instrumentation is largely straightforward, and done by the
1879    functions schemeE and schemeS in mc_translate.c.  These generate
1880    code for handling the origin tags of expressions (E) and statements
1881    (S) respectively.  The rather strange names are a reference to the
1882    "compilation schemes" shown in Simon Peyton Jones' book "The
1883    Implementation of Functional Programming Languages" (Prentice Hall,
1884    1987, see
1885    http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
1886 
1887    schemeS merely arranges to move shadow values around the guest
1888    state to track the incoming IR.  schemeE is largely trivial too.
1889    The only significant point is how to compute the otag corresponding
1890    to binary (or ternary, quaternary, etc) operator applications.  The
1891    rule is simple: just take whichever value is larger (32-bit
1892    unsigned max).  Constants get the special value zero.  Hence this
1893    rule always propagates a nonzero (known) otag in preference to a
1894    zero (unknown, or more likely, value-is-defined) tag, as we want.
1895    If two different undefined values are inputs to a binary operator
1896    application, then which is propagated is arbitrary, but that
1897    doesn't matter, since the program is erroneous in using either of
1898    the values, and so there's no point in attempting to propagate
1899    both.
1900 
1901    Since constants are abstracted to (otag) zero, much of the
1902    instrumentation code can be folded out without difficulty by the
1903    generic post-instrumentation IR cleanup pass, using these rules:
1904    Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
1905    constants is evaluated at JIT time.  And the resulting dead code
1906    removal.  In practice this causes surprisingly few Max32Us to
1907    survive through to backend code generation.
1908 
1909    Integration with the V-bits machinery
1910    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1911 
1912    This is again largely straightforward.  Mostly the otag and V bits
1913    stuff are independent.  The only point of interaction is when the V
1914    bits instrumenter creates a call to a helper function to report an
1915    uninitialised value error -- in that case it must first use schemeE
1916    to get hold of the origin tag expression for the value, and pass
1917    that to the helper too.
1918 
1919    There is the usual stuff to do with setting address range
1920    permissions.  When memory is painted undefined, we must also know
1921    the origin tag to paint with, which involves some tedious plumbing,
1922    particularly to do with the fast case stack handlers.  When memory
1923    is painted defined or noaccess then the origin tags must be forced
1924    to zero.
1925 
1926    One of the goals of the implementation was to ensure that the
1927    non-origin tracking mode isn't slowed down at all.  To do this,
1928    various functions to do with memory permissions setting (again,
1929    mostly pertaining to the stack) are duplicated for the with- and
1930    without-otag case.
1931 
1932    Dealing with stack redzones, and the NIA cache
1933    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1934 
1935    This is one of the few non-obvious parts of the implementation.
1936 
1937    Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
1938    reserved area below the stack pointer, that can be used as scratch
1939    space by compiler generated code for functions.  In the Memcheck
1940    sources this is referred to as the "stack redzone".  The important
1941    thing here is that such redzones are considered volatile across
1942    function calls and returns.  So Memcheck takes care to mark them as
1943    undefined for each call and return, on the afflicted platforms.
1944    Past experience shows this is essential in order to get reliable
1945    messages about uninitialised values that come from the stack.
1946 
1947    So the question is, when we paint a redzone undefined, what origin
1948    tag should we use for it?  Consider a function f() calling g().  If
1949    we paint the redzone using an otag derived from the ExeContext of
1950    the CALL/BL instruction in f, then any errors in g causing it to
1951    use uninitialised values that happen to lie in the redzone, will be
1952    reported as having their origin in f.  Which is highly confusing.
1953 
1954    The same applies for returns: if, on a return, we paint the redzone
1955    using a origin tag derived from the ExeContext of the RET/BLR
1956    instruction in g, then any later errors in f causing it to use
1957    uninitialised values in the redzone, will be reported as having
1958    their origin in g.  Which is just as confusing.
1959 
1960    To do it right, in both cases we need to use an origin tag which
1961    pertains to the instruction which dynamically follows the CALL/BL
1962    or RET/BLR.  In short, one derived from the NIA - the "next
1963    instruction address".
1964 
1965    To make this work, Memcheck's redzone-painting helper,
1966    MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
1967    NIA.  It converts the NIA to a 1-element ExeContext, and uses that
1968    ExeContext's ECU as the basis for the otag used to paint the
1969    redzone.  The expensive part of this is converting an NIA into an
1970    ECU, since this happens once for every call and every return.  So
1971    we use a simple 511-line, 2-way set associative cache
1972    (nia_to_ecu_cache) to cache the mappings, and that knocks most of
1973    the cost out.
1974 
1975    Further background comments
1976    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1977 
1978    > Question: why is otag a UInt?  Wouldn't a UWord be better?  Isn't
1979    > it really just the address of the relevant ExeContext?
1980 
1981    Well, it's not the address, but a value which has a 1-1 mapping
1982    with ExeContexts, and is guaranteed not to be zero, since zero
1983    denotes (to memcheck) "unknown origin or defined value".  So these
1984    UInts are just numbers starting at 4 and incrementing by 4; each
1985    ExeContext is given a number when it is created.  (*** NOTE this
1986    confuses otags and ECUs; see comments above ***).
1987 
1988    Making these otags 32-bit regardless of the machine's word size
1989    makes the 64-bit implementation easier (next para).  And it doesn't
1990    really limit us in any way, since for the tags to overflow would
1991    require that the program somehow caused 2^30-1 different
1992    ExeContexts to be created, in which case it is probably in deep
1993    trouble.  Not to mention V will have soaked up many tens of
1994    gigabytes of memory merely to store them all.
1995 
1996    So having 64-bit origins doesn't really buy you anything, and has
1997    the following downsides:
1998 
1999    Suppose that instead, an otag is a UWord.  This would mean that, on
2000    a 64-bit target,
2001 
2002    1. It becomes hard to shadow any element of guest state which is
2003       smaller than 8 bytes.  To do so means you'd need to find some
2004       8-byte-sized hole in the guest state which you don't want to
2005       shadow, and use that instead to hold the otag.  On ppc64, the
2006       condition code register(s) are split into 20 UChar sized pieces,
2007       all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2008       and so that would entail finding 160 bytes somewhere else in the
2009       guest state.
2010 
2011       Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2012       of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2013       same) and so I had to look for 4 untracked otag-sized areas in
2014       the guest state to make that possible.
2015 
2016       The same problem exists of course when origin tags are only 32
2017       bits, but it's less extreme.
2018 
2019    2. (More compelling) it doubles the size of the origin shadow
2020       memory.  Given that the shadow memory is organised as a fixed
2021       size cache, and that accuracy of tracking is limited by origins
2022       falling out the cache due to space conflicts, this isn't good.
2023 
2024    > Another question: is the origin tracking perfect, or are there
2025    > cases where it fails to determine an origin?
2026 
2027    It is imperfect for at least for the following reasons, and
2028    probably more:
2029 
2030    * Insufficient capacity in the origin cache.  When a line is
2031      evicted from the cache it is gone forever, and so subsequent
2032      queries for the line produce zero, indicating no origin
2033      information.  Interestingly, a line containing all zeroes can be
2034      evicted "free" from the cache, since it contains no useful
2035      information, so there is scope perhaps for some cleverer cache
2036      management schemes.  (*** NOTE, with the introduction of the
2037      second level origin tag cache, ocacheL2, this is no longer a
2038      problem. ***)
2039 
2040    * The origin cache only stores one otag per 32-bits of address
2041      space, plus 4 bits indicating which of the 4 bytes has that tag
2042      and which are considered defined.  The result is that if two
2043      undefined bytes in the same word are stored in memory, the first
2044      stored byte's origin will be lost and replaced by the origin for
2045      the second byte.
2046 
2047    * Nonzero origin tags for defined values.  Consider a binary
2048      operator application op(x,y).  Suppose y is undefined (and so has
2049      a valid nonzero origin tag), and x is defined, but erroneously
2050      has a nonzero origin tag (defined values should have tag zero).
2051      If the erroneous tag has a numeric value greater than y's tag,
2052      then the rule for propagating origin tags though binary
2053      operations, which is simply to take the unsigned max of the two
2054      tags, will erroneously propagate x's tag rather than y's.
2055 
2056    * Some obscure uses of x86/amd64 byte registers can cause lossage
2057      or confusion of origins.  %AH .. %DH are treated as different
2058      from, and unrelated to, their parent registers, %EAX .. %EDX.
2059      So some wierd sequences like
2060 
2061         movb undefined-value, %AH
2062         movb defined-value, %AL
2063         .. use %AX or %EAX ..
2064 
2065      will cause the origin attributed to %AH to be ignored, since %AL,
2066      %AX, %EAX are treated as the same register, and %AH as a
2067      completely separate one.
2068 
2069    But having said all that, it actually seems to work fairly well in
2070    practice.
2071 */
2072 
2073 static UWord stats_ocacheL1_find           = 0;
2074 static UWord stats_ocacheL1_found_at_1     = 0;
2075 static UWord stats_ocacheL1_found_at_N     = 0;
2076 static UWord stats_ocacheL1_misses         = 0;
2077 static UWord stats_ocacheL1_lossage        = 0;
2078 static UWord stats_ocacheL1_movefwds       = 0;
2079 
2080 static UWord stats__ocacheL2_refs          = 0;
2081 static UWord stats__ocacheL2_misses        = 0;
2082 static UWord stats__ocacheL2_n_nodes_max   = 0;
2083 
2084 /* Cache of 32-bit values, one every 32 bits of address space */
2085 
2086 #define OC_BITS_PER_LINE 5
2087 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2088 
oc_line_offset(Addr a)2089 static INLINE UWord oc_line_offset ( Addr a ) {
2090    return (a >> 2) & (OC_W32S_PER_LINE - 1);
2091 }
is_valid_oc_tag(Addr tag)2092 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2093    return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2094 }
2095 
2096 #define OC_LINES_PER_SET 2
2097 
2098 #define OC_N_SET_BITS    20
2099 #define OC_N_SETS        (1 << OC_N_SET_BITS)
2100 
2101 /* These settings give:
2102    64 bit host: ocache:  100,663,296 sizeB    67,108,864 useful
2103    32 bit host: ocache:   92,274,688 sizeB    67,108,864 useful
2104 */
2105 
2106 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2107 
2108 
2109 typedef
2110    struct {
2111       Addr  tag;
2112       UInt  w32[OC_W32S_PER_LINE];
2113       UChar descr[OC_W32S_PER_LINE];
2114    }
2115    OCacheLine;
2116 
2117 /* Classify and also sanity-check 'line'.  Return 'e' (empty) if not
2118    in use, 'n' (nonzero) if it contains at least one valid origin tag,
2119    and 'z' if all the represented tags are zero. */
classify_OCacheLine(OCacheLine * line)2120 static UChar classify_OCacheLine ( OCacheLine* line )
2121 {
2122    UWord i;
2123    if (line->tag == 1/*invalid*/)
2124       return 'e'; /* EMPTY */
2125    tl_assert(is_valid_oc_tag(line->tag));
2126    for (i = 0; i < OC_W32S_PER_LINE; i++) {
2127       tl_assert(0 == ((~0xF) & line->descr[i]));
2128       if (line->w32[i] > 0 && line->descr[i] > 0)
2129          return 'n'; /* NONZERO - contains useful info */
2130    }
2131    return 'z'; /* ZERO - no useful info */
2132 }
2133 
2134 typedef
2135    struct {
2136       OCacheLine line[OC_LINES_PER_SET];
2137    }
2138    OCacheSet;
2139 
2140 typedef
2141    struct {
2142       OCacheSet set[OC_N_SETS];
2143    }
2144    OCache;
2145 
2146 static OCache* ocacheL1 = NULL;
2147 static UWord   ocacheL1_event_ctr = 0;
2148 
2149 static void init_ocacheL2 ( void ); /* fwds */
init_OCache(void)2150 static void init_OCache ( void )
2151 {
2152    UWord line, set;
2153    tl_assert(MC_(clo_mc_level) >= 3);
2154    tl_assert(ocacheL1 == NULL);
2155    ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2156    if (ocacheL1 == NULL) {
2157       VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2158                                    sizeof(OCache) );
2159    }
2160    tl_assert(ocacheL1 != NULL);
2161    for (set = 0; set < OC_N_SETS; set++) {
2162       for (line = 0; line < OC_LINES_PER_SET; line++) {
2163          ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2164       }
2165    }
2166    init_ocacheL2();
2167 }
2168 
moveLineForwards(OCacheSet * set,UWord lineno)2169 static void moveLineForwards ( OCacheSet* set, UWord lineno )
2170 {
2171    OCacheLine tmp;
2172    stats_ocacheL1_movefwds++;
2173    tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2174    tmp = set->line[lineno-1];
2175    set->line[lineno-1] = set->line[lineno];
2176    set->line[lineno] = tmp;
2177 }
2178 
zeroise_OCacheLine(OCacheLine * line,Addr tag)2179 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2180    UWord i;
2181    for (i = 0; i < OC_W32S_PER_LINE; i++) {
2182       line->w32[i] = 0; /* NO ORIGIN */
2183       line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2184    }
2185    line->tag = tag;
2186 }
2187 
2188 //////////////////////////////////////////////////////////////
2189 //// OCache backing store
2190 
2191 static OSet* ocacheL2 = NULL;
2192 
ocacheL2_malloc(HChar * cc,SizeT szB)2193 static void* ocacheL2_malloc ( HChar* cc, SizeT szB ) {
2194    return VG_(malloc)(cc, szB);
2195 }
ocacheL2_free(void * v)2196 static void ocacheL2_free ( void* v ) {
2197    VG_(free)( v );
2198 }
2199 
2200 /* Stats: # nodes currently in tree */
2201 static UWord stats__ocacheL2_n_nodes = 0;
2202 
init_ocacheL2(void)2203 static void init_ocacheL2 ( void )
2204 {
2205    tl_assert(!ocacheL2);
2206    tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2207    tl_assert(0 == offsetof(OCacheLine,tag));
2208    ocacheL2
2209       = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2210                              NULL, /* fast cmp */
2211                              ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
2212    tl_assert(ocacheL2);
2213    stats__ocacheL2_n_nodes = 0;
2214 }
2215 
2216 /* Find line with the given tag in the tree, or NULL if not found. */
ocacheL2_find_tag(Addr tag)2217 static OCacheLine* ocacheL2_find_tag ( Addr tag )
2218 {
2219    OCacheLine* line;
2220    tl_assert(is_valid_oc_tag(tag));
2221    stats__ocacheL2_refs++;
2222    line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2223    return line;
2224 }
2225 
2226 /* Delete the line with the given tag from the tree, if it is present, and
2227    free up the associated memory. */
ocacheL2_del_tag(Addr tag)2228 static void ocacheL2_del_tag ( Addr tag )
2229 {
2230    OCacheLine* line;
2231    tl_assert(is_valid_oc_tag(tag));
2232    stats__ocacheL2_refs++;
2233    line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2234    if (line) {
2235       VG_(OSetGen_FreeNode)(ocacheL2, line);
2236       tl_assert(stats__ocacheL2_n_nodes > 0);
2237       stats__ocacheL2_n_nodes--;
2238    }
2239 }
2240 
2241 /* Add a copy of the given line to the tree.  It must not already be
2242    present. */
ocacheL2_add_line(OCacheLine * line)2243 static void ocacheL2_add_line ( OCacheLine* line )
2244 {
2245    OCacheLine* copy;
2246    tl_assert(is_valid_oc_tag(line->tag));
2247    copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2248    tl_assert(copy);
2249    *copy = *line;
2250    stats__ocacheL2_refs++;
2251    VG_(OSetGen_Insert)( ocacheL2, copy );
2252    stats__ocacheL2_n_nodes++;
2253    if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2254       stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2255 }
2256 
2257 ////
2258 //////////////////////////////////////////////////////////////
2259 
2260 __attribute__((noinline))
find_OCacheLine_SLOW(Addr a)2261 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2262 {
2263    OCacheLine *victim, *inL2;
2264    UChar c;
2265    UWord line;
2266    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2267    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2268    UWord tag     = a & tagmask;
2269    tl_assert(setno >= 0 && setno < OC_N_SETS);
2270 
2271    /* we already tried line == 0; skip therefore. */
2272    for (line = 1; line < OC_LINES_PER_SET; line++) {
2273       if (ocacheL1->set[setno].line[line].tag == tag) {
2274          if (line == 1) {
2275             stats_ocacheL1_found_at_1++;
2276          } else {
2277             stats_ocacheL1_found_at_N++;
2278          }
2279          if (UNLIKELY(0 == (ocacheL1_event_ctr++
2280                             & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2281             moveLineForwards( &ocacheL1->set[setno], line );
2282             line--;
2283          }
2284          return &ocacheL1->set[setno].line[line];
2285       }
2286    }
2287 
2288    /* A miss.  Use the last slot.  Implicitly this means we're
2289       ejecting the line in the last slot. */
2290    stats_ocacheL1_misses++;
2291    tl_assert(line == OC_LINES_PER_SET);
2292    line--;
2293    tl_assert(line > 0);
2294 
2295    /* First, move the to-be-ejected line to the L2 cache. */
2296    victim = &ocacheL1->set[setno].line[line];
2297    c = classify_OCacheLine(victim);
2298    switch (c) {
2299       case 'e':
2300          /* the line is empty (has invalid tag); ignore it. */
2301          break;
2302       case 'z':
2303          /* line contains zeroes.  We must ensure the backing store is
2304             updated accordingly, either by copying the line there
2305             verbatim, or by ensuring it isn't present there.  We
2306             chosse the latter on the basis that it reduces the size of
2307             the backing store. */
2308          ocacheL2_del_tag( victim->tag );
2309          break;
2310       case 'n':
2311          /* line contains at least one real, useful origin.  Copy it
2312             to the backing store. */
2313          stats_ocacheL1_lossage++;
2314          inL2 = ocacheL2_find_tag( victim->tag );
2315          if (inL2) {
2316             *inL2 = *victim;
2317          } else {
2318             ocacheL2_add_line( victim );
2319          }
2320          break;
2321       default:
2322          tl_assert(0);
2323    }
2324 
2325    /* Now we must reload the L1 cache from the backing tree, if
2326       possible. */
2327    tl_assert(tag != victim->tag); /* stay sane */
2328    inL2 = ocacheL2_find_tag( tag );
2329    if (inL2) {
2330       /* We're in luck.  It's in the L2. */
2331       ocacheL1->set[setno].line[line] = *inL2;
2332    } else {
2333       /* Missed at both levels of the cache hierarchy.  We have to
2334          declare it as full of zeroes (unknown origins). */
2335       stats__ocacheL2_misses++;
2336       zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2337    }
2338 
2339    /* Move it one forwards */
2340    moveLineForwards( &ocacheL1->set[setno], line );
2341    line--;
2342 
2343    return &ocacheL1->set[setno].line[line];
2344 }
2345 
find_OCacheLine(Addr a)2346 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2347 {
2348    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2349    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2350    UWord tag     = a & tagmask;
2351 
2352    stats_ocacheL1_find++;
2353 
2354    if (OC_ENABLE_ASSERTIONS) {
2355       tl_assert(setno >= 0 && setno < OC_N_SETS);
2356       tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2357    }
2358 
2359    if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2360       return &ocacheL1->set[setno].line[0];
2361    }
2362 
2363    return find_OCacheLine_SLOW( a );
2364 }
2365 
set_aligned_word64_Origin_to_undef(Addr a,UInt otag)2366 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2367 {
2368    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2369    //// Set the origins for a+0 .. a+7
2370    { OCacheLine* line;
2371      UWord lineoff = oc_line_offset(a);
2372      if (OC_ENABLE_ASSERTIONS) {
2373         tl_assert(lineoff >= 0
2374                   && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2375      }
2376      line = find_OCacheLine( a );
2377      line->descr[lineoff+0] = 0xF;
2378      line->descr[lineoff+1] = 0xF;
2379      line->w32[lineoff+0]   = otag;
2380      line->w32[lineoff+1]   = otag;
2381    }
2382    //// END inlined, specialised version of MC_(helperc_b_store8)
2383 }
2384 
2385 
2386 /*------------------------------------------------------------*/
2387 /*--- Aligned fast case permission setters,                ---*/
2388 /*--- for dealing with stacks                              ---*/
2389 /*------------------------------------------------------------*/
2390 
2391 /*--------------------- 32-bit ---------------------*/
2392 
2393 /* Nb: by "aligned" here we mean 4-byte aligned */
2394 
make_aligned_word32_undefined(Addr a)2395 static INLINE void make_aligned_word32_undefined ( Addr a )
2396 {
2397    PROF_EVENT(300, "make_aligned_word32_undefined");
2398 
2399 #ifndef PERF_FAST_STACK2
2400    make_mem_undefined(a, 4);
2401 #else
2402    {
2403       UWord   sm_off;
2404       SecMap* sm;
2405 
2406       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2407          PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
2408          make_mem_undefined(a, 4);
2409          return;
2410       }
2411 
2412       sm                  = get_secmap_for_writing_low(a);
2413       sm_off              = SM_OFF(a);
2414       sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2415    }
2416 #endif
2417 }
2418 
2419 static INLINE
make_aligned_word32_undefined_w_otag(Addr a,UInt otag)2420 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2421 {
2422    make_aligned_word32_undefined(a);
2423    //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2424    //// Set the origins for a+0 .. a+3
2425    { OCacheLine* line;
2426      UWord lineoff = oc_line_offset(a);
2427      if (OC_ENABLE_ASSERTIONS) {
2428         tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2429      }
2430      line = find_OCacheLine( a );
2431      line->descr[lineoff] = 0xF;
2432      line->w32[lineoff]   = otag;
2433    }
2434    //// END inlined, specialised version of MC_(helperc_b_store4)
2435 }
2436 
2437 static INLINE
make_aligned_word32_noaccess(Addr a)2438 void make_aligned_word32_noaccess ( Addr a )
2439 {
2440    PROF_EVENT(310, "make_aligned_word32_noaccess");
2441 
2442 #ifndef PERF_FAST_STACK2
2443    MC_(make_mem_noaccess)(a, 4);
2444 #else
2445    {
2446       UWord   sm_off;
2447       SecMap* sm;
2448 
2449       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2450          PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
2451          MC_(make_mem_noaccess)(a, 4);
2452          return;
2453       }
2454 
2455       sm                  = get_secmap_for_writing_low(a);
2456       sm_off              = SM_OFF(a);
2457       sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2458 
2459       //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2460       //// Set the origins for a+0 .. a+3.
2461       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2462          OCacheLine* line;
2463          UWord lineoff = oc_line_offset(a);
2464          if (OC_ENABLE_ASSERTIONS) {
2465             tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2466          }
2467          line = find_OCacheLine( a );
2468          line->descr[lineoff] = 0;
2469       }
2470       //// END inlined, specialised version of MC_(helperc_b_store4)
2471    }
2472 #endif
2473 }
2474 
2475 /*--------------------- 64-bit ---------------------*/
2476 
2477 /* Nb: by "aligned" here we mean 8-byte aligned */
2478 
make_aligned_word64_undefined(Addr a)2479 static INLINE void make_aligned_word64_undefined ( Addr a )
2480 {
2481    PROF_EVENT(320, "make_aligned_word64_undefined");
2482 
2483 #ifndef PERF_FAST_STACK2
2484    make_mem_undefined(a, 8);
2485 #else
2486    {
2487       UWord   sm_off16;
2488       SecMap* sm;
2489 
2490       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2491          PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
2492          make_mem_undefined(a, 8);
2493          return;
2494       }
2495 
2496       sm       = get_secmap_for_writing_low(a);
2497       sm_off16 = SM_OFF_16(a);
2498       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
2499    }
2500 #endif
2501 }
2502 
2503 static INLINE
make_aligned_word64_undefined_w_otag(Addr a,UInt otag)2504 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2505 {
2506    make_aligned_word64_undefined(a);
2507    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2508    //// Set the origins for a+0 .. a+7
2509    { OCacheLine* line;
2510      UWord lineoff = oc_line_offset(a);
2511      tl_assert(lineoff >= 0
2512                && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2513      line = find_OCacheLine( a );
2514      line->descr[lineoff+0] = 0xF;
2515      line->descr[lineoff+1] = 0xF;
2516      line->w32[lineoff+0]   = otag;
2517      line->w32[lineoff+1]   = otag;
2518    }
2519    //// END inlined, specialised version of MC_(helperc_b_store8)
2520 }
2521 
2522 static INLINE
make_aligned_word64_noaccess(Addr a)2523 void make_aligned_word64_noaccess ( Addr a )
2524 {
2525    PROF_EVENT(330, "make_aligned_word64_noaccess");
2526 
2527 #ifndef PERF_FAST_STACK2
2528    MC_(make_mem_noaccess)(a, 8);
2529 #else
2530    {
2531       UWord   sm_off16;
2532       SecMap* sm;
2533 
2534       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2535          PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
2536          MC_(make_mem_noaccess)(a, 8);
2537          return;
2538       }
2539 
2540       sm       = get_secmap_for_writing_low(a);
2541       sm_off16 = SM_OFF_16(a);
2542       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
2543 
2544       //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2545       //// Clear the origins for a+0 .. a+7.
2546       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2547          OCacheLine* line;
2548          UWord lineoff = oc_line_offset(a);
2549          tl_assert(lineoff >= 0
2550                    && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2551          line = find_OCacheLine( a );
2552          line->descr[lineoff+0] = 0;
2553          line->descr[lineoff+1] = 0;
2554       }
2555       //// END inlined, specialised version of MC_(helperc_b_store8)
2556    }
2557 #endif
2558 }
2559 
2560 
2561 /*------------------------------------------------------------*/
2562 /*--- Stack pointer adjustment                             ---*/
2563 /*------------------------------------------------------------*/
2564 
2565 #ifdef PERF_FAST_STACK
2566 #  define MAYBE_USED
2567 #else
2568 #  define MAYBE_USED __attribute__((unused))
2569 #endif
2570 
2571 /*--------------- adjustment by 4 bytes ---------------*/
2572 
2573 MAYBE_USED
mc_new_mem_stack_4_w_ECU(Addr new_SP,UInt ecu)2574 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2575 {
2576    UInt otag = ecu | MC_OKIND_STACK;
2577    PROF_EVENT(110, "new_mem_stack_4");
2578    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2579       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2580    } else {
2581       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2582    }
2583 }
2584 
2585 MAYBE_USED
mc_new_mem_stack_4(Addr new_SP)2586 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2587 {
2588    PROF_EVENT(110, "new_mem_stack_4");
2589    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2590       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2591    } else {
2592       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2593    }
2594 }
2595 
2596 MAYBE_USED
mc_die_mem_stack_4(Addr new_SP)2597 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2598 {
2599    PROF_EVENT(120, "die_mem_stack_4");
2600    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2601       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2602    } else {
2603       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2604    }
2605 }
2606 
2607 /*--------------- adjustment by 8 bytes ---------------*/
2608 
2609 MAYBE_USED
mc_new_mem_stack_8_w_ECU(Addr new_SP,UInt ecu)2610 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2611 {
2612    UInt otag = ecu | MC_OKIND_STACK;
2613    PROF_EVENT(111, "new_mem_stack_8");
2614    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2615       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2616    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2617       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2618       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2619    } else {
2620       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2621    }
2622 }
2623 
2624 MAYBE_USED
mc_new_mem_stack_8(Addr new_SP)2625 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2626 {
2627    PROF_EVENT(111, "new_mem_stack_8");
2628    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2629       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2630    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2631       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2632       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2633    } else {
2634       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2635    }
2636 }
2637 
2638 MAYBE_USED
mc_die_mem_stack_8(Addr new_SP)2639 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2640 {
2641    PROF_EVENT(121, "die_mem_stack_8");
2642    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2643       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2644    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2645       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2646       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2647    } else {
2648       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2649    }
2650 }
2651 
2652 /*--------------- adjustment by 12 bytes ---------------*/
2653 
2654 MAYBE_USED
mc_new_mem_stack_12_w_ECU(Addr new_SP,UInt ecu)2655 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2656 {
2657    UInt otag = ecu | MC_OKIND_STACK;
2658    PROF_EVENT(112, "new_mem_stack_12");
2659    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2660       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2661       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2662    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2663       /* from previous test we don't have 8-alignment at offset +0,
2664          hence must have 8 alignment at offsets +4/-4.  Hence safe to
2665          do 4 at +0 and then 8 at +4/. */
2666       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2667       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2668    } else {
2669       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2670    }
2671 }
2672 
2673 MAYBE_USED
mc_new_mem_stack_12(Addr new_SP)2674 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
2675 {
2676    PROF_EVENT(112, "new_mem_stack_12");
2677    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2678       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2679       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2680    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2681       /* from previous test we don't have 8-alignment at offset +0,
2682          hence must have 8 alignment at offsets +4/-4.  Hence safe to
2683          do 4 at +0 and then 8 at +4/. */
2684       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2685       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2686    } else {
2687       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
2688    }
2689 }
2690 
2691 MAYBE_USED
mc_die_mem_stack_12(Addr new_SP)2692 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
2693 {
2694    PROF_EVENT(122, "die_mem_stack_12");
2695    /* Note the -12 in the test */
2696    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
2697       /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2698          -4. */
2699       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2700       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2701    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2702       /* We have 4-alignment at +0, but we don't have 8-alignment at
2703          -12.  So we must have 8-alignment at -8.  Hence do 4 at -12
2704          and then 8 at -8. */
2705       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2706       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
2707    } else {
2708       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
2709    }
2710 }
2711 
2712 /*--------------- adjustment by 16 bytes ---------------*/
2713 
2714 MAYBE_USED
mc_new_mem_stack_16_w_ECU(Addr new_SP,UInt ecu)2715 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
2716 {
2717    UInt otag = ecu | MC_OKIND_STACK;
2718    PROF_EVENT(113, "new_mem_stack_16");
2719    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2720       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2721       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2722       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2723    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2724       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2725          Hence do 4 at +0, 8 at +4, 4 at +12. */
2726       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2727       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2728       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2729    } else {
2730       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
2731    }
2732 }
2733 
2734 MAYBE_USED
mc_new_mem_stack_16(Addr new_SP)2735 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
2736 {
2737    PROF_EVENT(113, "new_mem_stack_16");
2738    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2739       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2740       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2741       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2742    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2743       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2744          Hence do 4 at +0, 8 at +4, 4 at +12. */
2745       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2746       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4  );
2747       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2748    } else {
2749       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
2750    }
2751 }
2752 
2753 MAYBE_USED
mc_die_mem_stack_16(Addr new_SP)2754 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
2755 {
2756    PROF_EVENT(123, "die_mem_stack_16");
2757    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2758       /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
2759       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2760       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
2761    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2762       /* 8 alignment must be at -12.  Do 4 at -16, 8 at -12, 4 at -4. */
2763       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2764       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2765       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2766    } else {
2767       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
2768    }
2769 }
2770 
2771 /*--------------- adjustment by 32 bytes ---------------*/
2772 
2773 MAYBE_USED
mc_new_mem_stack_32_w_ECU(Addr new_SP,UInt ecu)2774 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
2775 {
2776    UInt otag = ecu | MC_OKIND_STACK;
2777    PROF_EVENT(114, "new_mem_stack_32");
2778    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2779       /* Straightforward */
2780       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2781       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2782       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2783       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2784    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2785       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
2786          +0,+28. */
2787       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2788       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2789       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2790       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
2791       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
2792    } else {
2793       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
2794    }
2795 }
2796 
2797 MAYBE_USED
mc_new_mem_stack_32(Addr new_SP)2798 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
2799 {
2800    PROF_EVENT(114, "new_mem_stack_32");
2801    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2802       /* Straightforward */
2803       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2804       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2805       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2806       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2807    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2808       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
2809          +0,+28. */
2810       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2811       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2812       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2813       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
2814       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
2815    } else {
2816       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
2817    }
2818 }
2819 
2820 MAYBE_USED
mc_die_mem_stack_32(Addr new_SP)2821 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
2822 {
2823    PROF_EVENT(124, "die_mem_stack_32");
2824    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2825       /* Straightforward */
2826       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2827       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2828       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2829       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2830    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2831       /* 8 alignment must be at -4 etc.  Hence do 8 at -12,-20,-28 and
2832          4 at -32,-4. */
2833       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2834       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
2835       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
2836       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2837       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2838    } else {
2839       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
2840    }
2841 }
2842 
2843 /*--------------- adjustment by 112 bytes ---------------*/
2844 
2845 MAYBE_USED
mc_new_mem_stack_112_w_ECU(Addr new_SP,UInt ecu)2846 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
2847 {
2848    UInt otag = ecu | MC_OKIND_STACK;
2849    PROF_EVENT(115, "new_mem_stack_112");
2850    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2851       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2852       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2853       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2854       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2855       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
2856       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
2857       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
2858       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
2859       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
2860       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
2861       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
2862       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
2863       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
2864       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
2865    } else {
2866       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
2867    }
2868 }
2869 
2870 MAYBE_USED
mc_new_mem_stack_112(Addr new_SP)2871 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
2872 {
2873    PROF_EVENT(115, "new_mem_stack_112");
2874    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2875       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2876       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2877       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2878       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2879       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
2880       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
2881       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
2882       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
2883       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
2884       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
2885       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
2886       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
2887       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
2888       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
2889    } else {
2890       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
2891    }
2892 }
2893 
2894 MAYBE_USED
mc_die_mem_stack_112(Addr new_SP)2895 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
2896 {
2897    PROF_EVENT(125, "die_mem_stack_112");
2898    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2899       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
2900       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
2901       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
2902       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
2903       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
2904       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
2905       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
2906       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
2907       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
2908       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
2909       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2910       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2911       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2912       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2913    } else {
2914       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
2915    }
2916 }
2917 
2918 /*--------------- adjustment by 128 bytes ---------------*/
2919 
2920 MAYBE_USED
mc_new_mem_stack_128_w_ECU(Addr new_SP,UInt ecu)2921 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
2922 {
2923    UInt otag = ecu | MC_OKIND_STACK;
2924    PROF_EVENT(116, "new_mem_stack_128");
2925    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2926       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2927       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2928       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2929       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2930       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
2931       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
2932       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
2933       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
2934       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
2935       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
2936       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
2937       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
2938       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
2939       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
2940       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
2941       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
2942    } else {
2943       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
2944    }
2945 }
2946 
2947 MAYBE_USED
mc_new_mem_stack_128(Addr new_SP)2948 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
2949 {
2950    PROF_EVENT(116, "new_mem_stack_128");
2951    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2952       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2953       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2954       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2955       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2956       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
2957       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
2958       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
2959       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
2960       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
2961       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
2962       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
2963       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
2964       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
2965       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
2966       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
2967       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
2968    } else {
2969       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
2970    }
2971 }
2972 
2973 MAYBE_USED
mc_die_mem_stack_128(Addr new_SP)2974 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
2975 {
2976    PROF_EVENT(126, "die_mem_stack_128");
2977    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2978       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
2979       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
2980       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
2981       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
2982       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
2983       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
2984       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
2985       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
2986       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
2987       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
2988       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
2989       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
2990       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2991       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2992       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2993       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2994    } else {
2995       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
2996    }
2997 }
2998 
2999 /*--------------- adjustment by 144 bytes ---------------*/
3000 
3001 MAYBE_USED
mc_new_mem_stack_144_w_ECU(Addr new_SP,UInt ecu)3002 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
3003 {
3004    UInt otag = ecu | MC_OKIND_STACK;
3005    PROF_EVENT(117, "new_mem_stack_144");
3006    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3007       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
3008       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
3009       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
3010       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
3011       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
3012       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
3013       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
3014       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
3015       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
3016       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
3017       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
3018       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
3019       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
3020       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3021       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3022       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3023       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3024       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3025    } else {
3026       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3027    }
3028 }
3029 
3030 MAYBE_USED
mc_new_mem_stack_144(Addr new_SP)3031 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3032 {
3033    PROF_EVENT(117, "new_mem_stack_144");
3034    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3035       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3036       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3037       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3038       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3039       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3040       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3041       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3042       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3043       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3044       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3045       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3046       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3047       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3048       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3049       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3050       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3051       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3052       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3053    } else {
3054       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3055    }
3056 }
3057 
3058 MAYBE_USED
mc_die_mem_stack_144(Addr new_SP)3059 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3060 {
3061    PROF_EVENT(127, "die_mem_stack_144");
3062    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3063       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3064       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3065       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3066       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3067       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3068       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3069       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3070       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3071       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3072       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3073       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3074       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3075       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3076       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3077       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3078       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3079       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3080       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3081    } else {
3082       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3083    }
3084 }
3085 
3086 /*--------------- adjustment by 160 bytes ---------------*/
3087 
3088 MAYBE_USED
mc_new_mem_stack_160_w_ECU(Addr new_SP,UInt ecu)3089 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3090 {
3091    UInt otag = ecu | MC_OKIND_STACK;
3092    PROF_EVENT(118, "new_mem_stack_160");
3093    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3094       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
3095       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
3096       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
3097       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
3098       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
3099       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
3100       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
3101       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
3102       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
3103       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
3104       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
3105       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
3106       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
3107       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3108       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3109       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3110       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3111       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3112       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3113       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3114    } else {
3115       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3116    }
3117 }
3118 
3119 MAYBE_USED
mc_new_mem_stack_160(Addr new_SP)3120 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3121 {
3122    PROF_EVENT(118, "new_mem_stack_160");
3123    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3124       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3125       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3126       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3127       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3128       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3129       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3130       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3131       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3132       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3133       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3134       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3135       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3136       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3137       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3138       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3139       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3140       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3141       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3142       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3143       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3144    } else {
3145       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3146    }
3147 }
3148 
3149 MAYBE_USED
mc_die_mem_stack_160(Addr new_SP)3150 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3151 {
3152    PROF_EVENT(128, "die_mem_stack_160");
3153    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3154       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3155       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3156       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3157       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3158       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3159       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3160       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3161       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3162       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3163       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3164       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3165       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3166       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3167       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3168       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3169       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3170       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3171       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3172       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3173       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3174    } else {
3175       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3176    }
3177 }
3178 
3179 /*--------------- adjustment by N bytes ---------------*/
3180 
mc_new_mem_stack_w_ECU(Addr a,SizeT len,UInt ecu)3181 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3182 {
3183    UInt otag = ecu | MC_OKIND_STACK;
3184    PROF_EVENT(115, "new_mem_stack_w_otag");
3185    MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3186 }
3187 
mc_new_mem_stack(Addr a,SizeT len)3188 static void mc_new_mem_stack ( Addr a, SizeT len )
3189 {
3190    PROF_EVENT(115, "new_mem_stack");
3191    make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3192 }
3193 
mc_die_mem_stack(Addr a,SizeT len)3194 static void mc_die_mem_stack ( Addr a, SizeT len )
3195 {
3196    PROF_EVENT(125, "die_mem_stack");
3197    MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3198 }
3199 
3200 
3201 /* The AMD64 ABI says:
3202 
3203    "The 128-byte area beyond the location pointed to by %rsp is considered
3204     to be reserved and shall not be modified by signal or interrupt
3205     handlers.  Therefore, functions may use this area for temporary data
3206     that is not needed across function calls.  In particular, leaf functions
3207     may use this area for their entire stack frame, rather than adjusting
3208     the stack pointer in the prologue and epilogue.  This area is known as
3209     red zone [sic]."
3210 
3211    So after any call or return we need to mark this redzone as containing
3212    undefined values.
3213 
3214    Consider this:  we're in function f.  f calls g.  g moves rsp down
3215    modestly (say 16 bytes) and writes stuff all over the red zone, making it
3216    defined.  g returns.  f is buggy and reads from parts of the red zone
3217    that it didn't write on.  But because g filled that area in, f is going
3218    to be picking up defined V bits and so any errors from reading bits of
3219    the red zone it didn't write, will be missed.  The only solution I could
3220    think of was to make the red zone undefined when g returns to f.
3221 
3222    This is in accordance with the ABI, which makes it clear the redzone
3223    is volatile across function calls.
3224 
3225    The problem occurs the other way round too: f could fill the RZ up
3226    with defined values and g could mistakenly read them.  So the RZ
3227    also needs to be nuked on function calls.
3228 */
3229 
3230 
3231 /* Here's a simple cache to hold nia -> ECU mappings.  It could be
3232    improved so as to have a lower miss rate. */
3233 
3234 static UWord stats__nia_cache_queries = 0;
3235 static UWord stats__nia_cache_misses  = 0;
3236 
3237 typedef
3238    struct { UWord nia0; UWord ecu0;   /* nia0 maps to ecu0 */
3239             UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3240    WCacheEnt;
3241 
3242 #define N_NIA_TO_ECU_CACHE 511
3243 
3244 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3245 
init_nia_to_ecu_cache(void)3246 static void init_nia_to_ecu_cache ( void )
3247 {
3248    UWord       i;
3249    Addr        zero_addr = 0;
3250    ExeContext* zero_ec;
3251    UInt        zero_ecu;
3252    /* Fill all the slots with an entry for address zero, and the
3253       relevant otags accordingly.  Hence the cache is initially filled
3254       with valid data. */
3255    zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3256    tl_assert(zero_ec);
3257    zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3258    tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3259    for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3260       nia_to_ecu_cache[i].nia0 = zero_addr;
3261       nia_to_ecu_cache[i].ecu0 = zero_ecu;
3262       nia_to_ecu_cache[i].nia1 = zero_addr;
3263       nia_to_ecu_cache[i].ecu1 = zero_ecu;
3264    }
3265 }
3266 
convert_nia_to_ecu(Addr nia)3267 static inline UInt convert_nia_to_ecu ( Addr nia )
3268 {
3269    UWord i;
3270    UInt        ecu;
3271    ExeContext* ec;
3272 
3273    tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3274 
3275    stats__nia_cache_queries++;
3276    i = nia % N_NIA_TO_ECU_CACHE;
3277    tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3278 
3279    if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3280       return nia_to_ecu_cache[i].ecu0;
3281 
3282    if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3283 #     define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3284       SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3285       SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3286 #     undef SWAP
3287       return nia_to_ecu_cache[i].ecu0;
3288    }
3289 
3290    stats__nia_cache_misses++;
3291    ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3292    tl_assert(ec);
3293    ecu = VG_(get_ECU_from_ExeContext)(ec);
3294    tl_assert(VG_(is_plausible_ECU)(ecu));
3295 
3296    nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3297    nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3298 
3299    nia_to_ecu_cache[i].nia0 = nia;
3300    nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3301    return ecu;
3302 }
3303 
3304 
3305 /* Note that this serves both the origin-tracking and
3306    no-origin-tracking modes.  We assume that calls to it are
3307    sufficiently infrequent that it isn't worth specialising for the
3308    with/without origin-tracking cases. */
MC_(helperc_MAKE_STACK_UNINIT)3309 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
3310 {
3311    UInt otag;
3312    tl_assert(sizeof(UWord) == sizeof(SizeT));
3313    if (0)
3314       VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
3315                   base, len, nia );
3316 
3317    if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3318       UInt ecu = convert_nia_to_ecu ( nia );
3319       tl_assert(VG_(is_plausible_ECU)(ecu));
3320       otag = ecu | MC_OKIND_STACK;
3321    } else {
3322       tl_assert(nia == 0);
3323       otag = 0;
3324    }
3325 
3326 #  if 0
3327    /* Really slow version */
3328    MC_(make_mem_undefined)(base, len, otag);
3329 #  endif
3330 
3331 #  if 0
3332    /* Slow(ish) version, which is fairly easily seen to be correct.
3333    */
3334    if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3335       make_aligned_word64_undefined(base +   0, otag);
3336       make_aligned_word64_undefined(base +   8, otag);
3337       make_aligned_word64_undefined(base +  16, otag);
3338       make_aligned_word64_undefined(base +  24, otag);
3339 
3340       make_aligned_word64_undefined(base +  32, otag);
3341       make_aligned_word64_undefined(base +  40, otag);
3342       make_aligned_word64_undefined(base +  48, otag);
3343       make_aligned_word64_undefined(base +  56, otag);
3344 
3345       make_aligned_word64_undefined(base +  64, otag);
3346       make_aligned_word64_undefined(base +  72, otag);
3347       make_aligned_word64_undefined(base +  80, otag);
3348       make_aligned_word64_undefined(base +  88, otag);
3349 
3350       make_aligned_word64_undefined(base +  96, otag);
3351       make_aligned_word64_undefined(base + 104, otag);
3352       make_aligned_word64_undefined(base + 112, otag);
3353       make_aligned_word64_undefined(base + 120, otag);
3354    } else {
3355       MC_(make_mem_undefined)(base, len, otag);
3356    }
3357 #  endif
3358 
3359    /* Idea is: go fast when
3360          * 8-aligned and length is 128
3361          * the sm is available in the main primary map
3362          * the address range falls entirely with a single secondary map
3363       If all those conditions hold, just update the V+A bits by writing
3364       directly into the vabits array.  (If the sm was distinguished, this
3365       will make a copy and then write to it.)
3366    */
3367 
3368    if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3369       /* Now we know the address range is suitably sized and aligned. */
3370       UWord a_lo = (UWord)(base);
3371       UWord a_hi = (UWord)(base + 128 - 1);
3372       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3373       if (a_hi <= MAX_PRIMARY_ADDRESS) {
3374          // Now we know the entire range is within the main primary map.
3375          SecMap* sm    = get_secmap_for_writing_low(a_lo);
3376          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3377          /* Now we know that the entire address range falls within a
3378             single secondary map, and that that secondary 'lives' in
3379             the main primary map. */
3380          if (LIKELY(sm == sm_hi)) {
3381             // Finally, we know that the range is entirely within one secmap.
3382             UWord   v_off = SM_OFF(a_lo);
3383             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
3384             p[ 0] = VA_BITS16_UNDEFINED;
3385             p[ 1] = VA_BITS16_UNDEFINED;
3386             p[ 2] = VA_BITS16_UNDEFINED;
3387             p[ 3] = VA_BITS16_UNDEFINED;
3388             p[ 4] = VA_BITS16_UNDEFINED;
3389             p[ 5] = VA_BITS16_UNDEFINED;
3390             p[ 6] = VA_BITS16_UNDEFINED;
3391             p[ 7] = VA_BITS16_UNDEFINED;
3392             p[ 8] = VA_BITS16_UNDEFINED;
3393             p[ 9] = VA_BITS16_UNDEFINED;
3394             p[10] = VA_BITS16_UNDEFINED;
3395             p[11] = VA_BITS16_UNDEFINED;
3396             p[12] = VA_BITS16_UNDEFINED;
3397             p[13] = VA_BITS16_UNDEFINED;
3398             p[14] = VA_BITS16_UNDEFINED;
3399             p[15] = VA_BITS16_UNDEFINED;
3400             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3401                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3402                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3403                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3404                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3405                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3406                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3407                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3408                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3409                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3410                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3411                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3412                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3413                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3414                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3415                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3416                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3417             }
3418             return;
3419          }
3420       }
3421    }
3422 
3423    /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3424    if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3425       /* Now we know the address range is suitably sized and aligned. */
3426       UWord a_lo = (UWord)(base);
3427       UWord a_hi = (UWord)(base + 288 - 1);
3428       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3429       if (a_hi <= MAX_PRIMARY_ADDRESS) {
3430          // Now we know the entire range is within the main primary map.
3431          SecMap* sm    = get_secmap_for_writing_low(a_lo);
3432          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3433          /* Now we know that the entire address range falls within a
3434             single secondary map, and that that secondary 'lives' in
3435             the main primary map. */
3436          if (LIKELY(sm == sm_hi)) {
3437             // Finally, we know that the range is entirely within one secmap.
3438             UWord   v_off = SM_OFF(a_lo);
3439             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
3440             p[ 0] = VA_BITS16_UNDEFINED;
3441             p[ 1] = VA_BITS16_UNDEFINED;
3442             p[ 2] = VA_BITS16_UNDEFINED;
3443             p[ 3] = VA_BITS16_UNDEFINED;
3444             p[ 4] = VA_BITS16_UNDEFINED;
3445             p[ 5] = VA_BITS16_UNDEFINED;
3446             p[ 6] = VA_BITS16_UNDEFINED;
3447             p[ 7] = VA_BITS16_UNDEFINED;
3448             p[ 8] = VA_BITS16_UNDEFINED;
3449             p[ 9] = VA_BITS16_UNDEFINED;
3450             p[10] = VA_BITS16_UNDEFINED;
3451             p[11] = VA_BITS16_UNDEFINED;
3452             p[12] = VA_BITS16_UNDEFINED;
3453             p[13] = VA_BITS16_UNDEFINED;
3454             p[14] = VA_BITS16_UNDEFINED;
3455             p[15] = VA_BITS16_UNDEFINED;
3456             p[16] = VA_BITS16_UNDEFINED;
3457             p[17] = VA_BITS16_UNDEFINED;
3458             p[18] = VA_BITS16_UNDEFINED;
3459             p[19] = VA_BITS16_UNDEFINED;
3460             p[20] = VA_BITS16_UNDEFINED;
3461             p[21] = VA_BITS16_UNDEFINED;
3462             p[22] = VA_BITS16_UNDEFINED;
3463             p[23] = VA_BITS16_UNDEFINED;
3464             p[24] = VA_BITS16_UNDEFINED;
3465             p[25] = VA_BITS16_UNDEFINED;
3466             p[26] = VA_BITS16_UNDEFINED;
3467             p[27] = VA_BITS16_UNDEFINED;
3468             p[28] = VA_BITS16_UNDEFINED;
3469             p[29] = VA_BITS16_UNDEFINED;
3470             p[30] = VA_BITS16_UNDEFINED;
3471             p[31] = VA_BITS16_UNDEFINED;
3472             p[32] = VA_BITS16_UNDEFINED;
3473             p[33] = VA_BITS16_UNDEFINED;
3474             p[34] = VA_BITS16_UNDEFINED;
3475             p[35] = VA_BITS16_UNDEFINED;
3476             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3477                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3478                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3479                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3480                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3481                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3482                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3483                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3484                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3485                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3486                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3487                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3488                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3489                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3490                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3491                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3492                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3493                set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3494                set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3495                set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3496                set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3497                set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3498                set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3499                set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3500                set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3501                set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3502                set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3503                set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3504                set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3505                set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3506                set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3507                set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3508                set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3509                set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3510                set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3511                set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3512                set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3513             }
3514             return;
3515          }
3516       }
3517    }
3518 
3519    /* else fall into slow case */
3520    MC_(make_mem_undefined_w_otag)(base, len, otag);
3521 }
3522 
3523 
3524 /*------------------------------------------------------------*/
3525 /*--- Checking memory                                      ---*/
3526 /*------------------------------------------------------------*/
3527 
3528 typedef
3529    enum {
3530       MC_Ok = 5,
3531       MC_AddrErr = 6,
3532       MC_ValueErr = 7
3533    }
3534    MC_ReadResult;
3535 
3536 
3537 /* Check permissions for address range.  If inadequate permissions
3538    exist, *bad_addr is set to the offending address, so the caller can
3539    know what it is. */
3540 
3541 /* Returns True if [a .. a+len) is not addressible.  Otherwise,
3542    returns False, and if bad_addr is non-NULL, sets *bad_addr to
3543    indicate the lowest failing address.  Functions below are
3544    similar. */
MC_(check_mem_is_noaccess)3545 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
3546 {
3547    SizeT i;
3548    UWord vabits2;
3549 
3550    PROF_EVENT(60, "check_mem_is_noaccess");
3551    for (i = 0; i < len; i++) {
3552       PROF_EVENT(61, "check_mem_is_noaccess(loop)");
3553       vabits2 = get_vabits2(a);
3554       if (VA_BITS2_NOACCESS != vabits2) {
3555          if (bad_addr != NULL) *bad_addr = a;
3556          return False;
3557       }
3558       a++;
3559    }
3560    return True;
3561 }
3562 
is_mem_addressable(Addr a,SizeT len,Addr * bad_addr)3563 static Bool is_mem_addressable ( Addr a, SizeT len,
3564                                  /*OUT*/Addr* bad_addr )
3565 {
3566    SizeT i;
3567    UWord vabits2;
3568 
3569    PROF_EVENT(62, "is_mem_addressable");
3570    for (i = 0; i < len; i++) {
3571       PROF_EVENT(63, "is_mem_addressable(loop)");
3572       vabits2 = get_vabits2(a);
3573       if (VA_BITS2_NOACCESS == vabits2) {
3574          if (bad_addr != NULL) *bad_addr = a;
3575          return False;
3576       }
3577       a++;
3578    }
3579    return True;
3580 }
3581 
is_mem_defined(Addr a,SizeT len,Addr * bad_addr,UInt * otag)3582 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
3583                                       /*OUT*/Addr* bad_addr,
3584                                       /*OUT*/UInt* otag )
3585 {
3586    SizeT i;
3587    UWord vabits2;
3588 
3589    PROF_EVENT(64, "is_mem_defined");
3590    DEBUG("is_mem_defined\n");
3591 
3592    if (otag)     *otag = 0;
3593    if (bad_addr) *bad_addr = 0;
3594    for (i = 0; i < len; i++) {
3595       PROF_EVENT(65, "is_mem_defined(loop)");
3596       vabits2 = get_vabits2(a);
3597       if (VA_BITS2_DEFINED != vabits2) {
3598          // Error!  Nb: Report addressability errors in preference to
3599          // definedness errors.  And don't report definedeness errors unless
3600          // --undef-value-errors=yes.
3601          if (bad_addr) {
3602             *bad_addr = a;
3603          }
3604          if (VA_BITS2_NOACCESS == vabits2) {
3605             return MC_AddrErr;
3606          }
3607          if (MC_(clo_mc_level) >= 2) {
3608             if (otag && MC_(clo_mc_level) == 3) {
3609                *otag = MC_(helperc_b_load1)( a );
3610             }
3611             return MC_ValueErr;
3612          }
3613       }
3614       a++;
3615    }
3616    return MC_Ok;
3617 }
3618 
3619 
3620 /* Like is_mem_defined but doesn't give up at the first uninitialised
3621    byte -- the entire range is always checked.  This is important for
3622    detecting errors in the case where a checked range strays into
3623    invalid memory, but that fact is not detected by the ordinary
3624    is_mem_defined(), because of an undefined section that precedes the
3625    out of range section, possibly as a result of an alignment hole in
3626    the checked data.  This version always checks the entire range and
3627    can report both a definedness and an accessbility error, if
3628    necessary. */
is_mem_defined_comprehensive(Addr a,SizeT len,Bool * errorV,Addr * bad_addrV,UInt * otagV,Bool * errorA,Addr * bad_addrA)3629 static void is_mem_defined_comprehensive (
3630                Addr a, SizeT len,
3631                /*OUT*/Bool* errorV,    /* is there a definedness err? */
3632                /*OUT*/Addr* bad_addrV, /* if so where? */
3633                /*OUT*/UInt* otagV,     /* and what's its otag? */
3634                /*OUT*/Bool* errorA,    /* is there an addressability err? */
3635                /*OUT*/Addr* bad_addrA  /* if so where? */
3636             )
3637 {
3638    SizeT i;
3639    UWord vabits2;
3640    Bool  already_saw_errV = False;
3641 
3642    PROF_EVENT(64, "is_mem_defined"); // fixme
3643    DEBUG("is_mem_defined_comprehensive\n");
3644 
3645    tl_assert(!(*errorV || *errorA));
3646 
3647    for (i = 0; i < len; i++) {
3648       PROF_EVENT(65, "is_mem_defined(loop)"); // fixme
3649       vabits2 = get_vabits2(a);
3650       switch (vabits2) {
3651          case VA_BITS2_DEFINED:
3652             a++;
3653             break;
3654          case VA_BITS2_UNDEFINED:
3655          case VA_BITS2_PARTDEFINED:
3656             if (!already_saw_errV) {
3657                *errorV    = True;
3658                *bad_addrV = a;
3659                if (MC_(clo_mc_level) == 3) {
3660                   *otagV = MC_(helperc_b_load1)( a );
3661                } else {
3662                   *otagV = 0;
3663                }
3664                already_saw_errV = True;
3665             }
3666             a++; /* keep going */
3667             break;
3668          case VA_BITS2_NOACCESS:
3669             *errorA    = True;
3670             *bad_addrA = a;
3671             return; /* give up now. */
3672          default:
3673             tl_assert(0);
3674       }
3675    }
3676 }
3677 
3678 
3679 /* Check a zero-terminated ascii string.  Tricky -- don't want to
3680    examine the actual bytes, to find the end, until we're sure it is
3681    safe to do so. */
3682 
mc_is_defined_asciiz(Addr a,Addr * bad_addr,UInt * otag)3683 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
3684 {
3685    UWord vabits2;
3686 
3687    PROF_EVENT(66, "mc_is_defined_asciiz");
3688    DEBUG("mc_is_defined_asciiz\n");
3689 
3690    if (otag)     *otag = 0;
3691    if (bad_addr) *bad_addr = 0;
3692    while (True) {
3693       PROF_EVENT(67, "mc_is_defined_asciiz(loop)");
3694       vabits2 = get_vabits2(a);
3695       if (VA_BITS2_DEFINED != vabits2) {
3696          // Error!  Nb: Report addressability errors in preference to
3697          // definedness errors.  And don't report definedeness errors unless
3698          // --undef-value-errors=yes.
3699          if (bad_addr) {
3700             *bad_addr = a;
3701          }
3702          if (VA_BITS2_NOACCESS == vabits2) {
3703             return MC_AddrErr;
3704          }
3705          if (MC_(clo_mc_level) >= 2) {
3706             if (otag && MC_(clo_mc_level) == 3) {
3707                *otag = MC_(helperc_b_load1)( a );
3708             }
3709             return MC_ValueErr;
3710          }
3711       }
3712       /* Ok, a is safe to read. */
3713       if (* ((UChar*)a) == 0) {
3714          return MC_Ok;
3715       }
3716       a++;
3717    }
3718 }
3719 
3720 
3721 /*------------------------------------------------------------*/
3722 /*--- Memory event handlers                                ---*/
3723 /*------------------------------------------------------------*/
3724 
3725 static
check_mem_is_addressable(CorePart part,ThreadId tid,Char * s,Addr base,SizeT size)3726 void check_mem_is_addressable ( CorePart part, ThreadId tid, Char* s,
3727                                 Addr base, SizeT size )
3728 {
3729    Addr bad_addr;
3730    Bool ok = is_mem_addressable ( base, size, &bad_addr );
3731 
3732    if (!ok) {
3733       switch (part) {
3734       case Vg_CoreSysCall:
3735          MC_(record_memparam_error) ( tid, bad_addr,
3736                                       /*isAddrErr*/True, s, 0/*otag*/ );
3737          break;
3738 
3739       case Vg_CoreSignal:
3740          MC_(record_core_mem_error)( tid, s );
3741          break;
3742 
3743       default:
3744          VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
3745       }
3746    }
3747 }
3748 
3749 static
check_mem_is_defined(CorePart part,ThreadId tid,Char * s,Addr base,SizeT size)3750 void check_mem_is_defined ( CorePart part, ThreadId tid, Char* s,
3751                             Addr base, SizeT size )
3752 {
3753    UInt otag = 0;
3754    Addr bad_addr;
3755    MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
3756 
3757    if (MC_Ok != res) {
3758       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3759 
3760       switch (part) {
3761       case Vg_CoreSysCall:
3762          MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3763                                       isAddrErr ? 0 : otag );
3764          break;
3765 
3766       case Vg_CoreSysCallArgInMem:
3767          MC_(record_regparam_error) ( tid, s, otag );
3768          break;
3769 
3770       /* If we're being asked to jump to a silly address, record an error
3771          message before potentially crashing the entire system. */
3772       case Vg_CoreTranslate:
3773          MC_(record_jump_error)( tid, bad_addr );
3774          break;
3775 
3776       default:
3777          VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
3778       }
3779    }
3780 }
3781 
3782 static
check_mem_is_defined_asciiz(CorePart part,ThreadId tid,Char * s,Addr str)3783 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
3784                                    Char* s, Addr str )
3785 {
3786    MC_ReadResult res;
3787    Addr bad_addr = 0;   // shut GCC up
3788    UInt otag = 0;
3789 
3790    tl_assert(part == Vg_CoreSysCall);
3791    res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
3792    if (MC_Ok != res) {
3793       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3794       MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3795                                    isAddrErr ? 0 : otag );
3796    }
3797 }
3798 
3799 /* Handling of mmap and mprotect is not as simple as it seems.
3800 
3801    The underlying semantics are that memory obtained from mmap is
3802    always initialised, but may be inaccessible.  And changes to the
3803    protection of memory do not change its contents and hence not its
3804    definedness state.  Problem is we can't model
3805    inaccessible-but-with-some-definedness state; once we mark memory
3806    as inaccessible we lose all info about definedness, and so can't
3807    restore that if it is later made accessible again.
3808 
3809    One obvious thing to do is this:
3810 
3811       mmap/mprotect NONE  -> noaccess
3812       mmap/mprotect other -> defined
3813 
3814    The problem case here is: taking accessible memory, writing
3815    uninitialised data to it, mprotecting it NONE and later mprotecting
3816    it back to some accessible state causes the undefinedness to be
3817    lost.
3818 
3819    A better proposal is:
3820 
3821      (1) mmap NONE       ->  make noaccess
3822      (2) mmap other      ->  make defined
3823 
3824      (3) mprotect NONE   ->  # no change
3825      (4) mprotect other  ->  change any "noaccess" to "defined"
3826 
3827    (2) is OK because memory newly obtained from mmap really is defined
3828        (zeroed out by the kernel -- doing anything else would
3829        constitute a massive security hole.)
3830 
3831    (1) is OK because the only way to make the memory usable is via
3832        (4), in which case we also wind up correctly marking it all as
3833        defined.
3834 
3835    (3) is the weak case.  We choose not to change memory state.
3836        (presumably the range is in some mixture of "defined" and
3837        "undefined", viz, accessible but with arbitrary V bits).  Doing
3838        nothing means we retain the V bits, so that if the memory is
3839        later mprotected "other", the V bits remain unchanged, so there
3840        can be no false negatives.  The bad effect is that if there's
3841        an access in the area, then MC cannot warn; but at least we'll
3842        get a SEGV to show, so it's better than nothing.
3843 
3844    Consider the sequence (3) followed by (4).  Any memory that was
3845    "defined" or "undefined" previously retains its state (as
3846    required).  Any memory that was "noaccess" before can only have
3847    been made that way by (1), and so it's OK to change it to
3848    "defined".
3849 
3850    See https://bugs.kde.org/show_bug.cgi?id=205541
3851    and https://bugs.kde.org/show_bug.cgi?id=210268
3852 */
3853 static
mc_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)3854 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
3855                        ULong di_handle )
3856 {
3857    if (rr || ww || xx) {
3858       /* (2) mmap/mprotect other -> defined */
3859       MC_(make_mem_defined)(a, len);
3860    } else {
3861       /* (1) mmap/mprotect NONE  -> noaccess */
3862       MC_(make_mem_noaccess)(a, len);
3863    }
3864 }
3865 
3866 static
mc_new_mem_mprotect(Addr a,SizeT len,Bool rr,Bool ww,Bool xx)3867 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
3868 {
3869    if (rr || ww || xx) {
3870       /* (4) mprotect other  ->  change any "noaccess" to "defined" */
3871       make_mem_defined_if_noaccess(a, len);
3872    } else {
3873       /* (3) mprotect NONE   ->  # no change */
3874       /* do nothing */
3875    }
3876 }
3877 
3878 
3879 static
mc_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)3880 void mc_new_mem_startup( Addr a, SizeT len,
3881                          Bool rr, Bool ww, Bool xx, ULong di_handle )
3882 {
3883    // Because code is defined, initialised variables get put in the data
3884    // segment and are defined, and uninitialised variables get put in the
3885    // bss segment and are auto-zeroed (and so defined).
3886    //
3887    // It's possible that there will be padding between global variables.
3888    // This will also be auto-zeroed, and marked as defined by Memcheck.  If
3889    // a program uses it, Memcheck will not complain.  This is arguably a
3890    // false negative, but it's a grey area -- the behaviour is defined (the
3891    // padding is zeroed) but it's probably not what the user intended.  And
3892    // we can't avoid it.
3893    //
3894    // Note: we generally ignore RWX permissions, because we can't track them
3895    // without requiring more than one A bit which would slow things down a
3896    // lot.  But on Darwin the 0th page is mapped but !R and !W and !X.
3897    // So we mark any such pages as "unaddressable".
3898    DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
3899          a, (ULong)len, rr, ww, xx);
3900    mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
3901 }
3902 
3903 static
mc_post_mem_write(CorePart part,ThreadId tid,Addr a,SizeT len)3904 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
3905 {
3906    MC_(make_mem_defined)(a, len);
3907 }
3908 
3909 
3910 /*------------------------------------------------------------*/
3911 /*--- Register event handlers                              ---*/
3912 /*------------------------------------------------------------*/
3913 
3914 /* Try and get a nonzero origin for the guest state section of thread
3915    tid characterised by (offset,size).  Return 0 if nothing to show
3916    for it. */
mb_get_origin_for_guest_offset(ThreadId tid,Int offset,SizeT size)3917 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
3918                                              Int offset, SizeT size )
3919 {
3920    Int   sh2off;
3921    UInt  area[3];
3922    UInt  otag;
3923    sh2off = MC_(get_otrack_shadow_offset)( offset, size );
3924    if (sh2off == -1)
3925       return 0;  /* This piece of guest state is not tracked */
3926    tl_assert(sh2off >= 0);
3927    tl_assert(0 == (sh2off % 4));
3928    area[0] = 0x31313131;
3929    area[2] = 0x27272727;
3930    VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
3931    tl_assert(area[0] == 0x31313131);
3932    tl_assert(area[2] == 0x27272727);
3933    otag = area[1];
3934    return otag;
3935 }
3936 
3937 
3938 /* When some chunk of guest state is written, mark the corresponding
3939    shadow area as valid.  This is used to initialise arbitrarily large
3940    chunks of guest state, hence the _SIZE value, which has to be as
3941    big as the biggest guest state.
3942 */
mc_post_reg_write(CorePart part,ThreadId tid,PtrdiffT offset,SizeT size)3943 static void mc_post_reg_write ( CorePart part, ThreadId tid,
3944                                 PtrdiffT offset, SizeT size)
3945 {
3946 #  define MAX_REG_WRITE_SIZE 1696
3947    UChar area[MAX_REG_WRITE_SIZE];
3948    tl_assert(size <= MAX_REG_WRITE_SIZE);
3949    VG_(memset)(area, V_BITS8_DEFINED, size);
3950    VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
3951 #  undef MAX_REG_WRITE_SIZE
3952 }
3953 
3954 static
mc_post_reg_write_clientcall(ThreadId tid,PtrdiffT offset,SizeT size,Addr f)3955 void mc_post_reg_write_clientcall ( ThreadId tid,
3956                                     PtrdiffT offset, SizeT size, Addr f)
3957 {
3958    mc_post_reg_write(/*dummy*/0, tid, offset, size);
3959 }
3960 
3961 /* Look at the definedness of the guest's shadow state for
3962    [offset, offset+len).  If any part of that is undefined, record
3963    a parameter error.
3964 */
mc_pre_reg_read(CorePart part,ThreadId tid,Char * s,PtrdiffT offset,SizeT size)3965 static void mc_pre_reg_read ( CorePart part, ThreadId tid, Char* s,
3966                               PtrdiffT offset, SizeT size)
3967 {
3968    Int   i;
3969    Bool  bad;
3970    UInt  otag;
3971 
3972    UChar area[16];
3973    tl_assert(size <= 16);
3974 
3975    VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
3976 
3977    bad = False;
3978    for (i = 0; i < size; i++) {
3979       if (area[i] != V_BITS8_DEFINED) {
3980          bad = True;
3981          break;
3982       }
3983    }
3984 
3985    if (!bad)
3986       return;
3987 
3988    /* We've found some undefinedness.  See if we can also find an
3989       origin for it. */
3990    otag = mb_get_origin_for_guest_offset( tid, offset, size );
3991    MC_(record_regparam_error) ( tid, s, otag );
3992 }
3993 
3994 
3995 /*------------------------------------------------------------*/
3996 /*--- Functions called directly from generated code:       ---*/
3997 /*--- Load/store handlers.                                 ---*/
3998 /*------------------------------------------------------------*/
3999 
4000 /* Types:  LOADV32, LOADV16, LOADV8 are:
4001                UWord fn ( Addr a )
4002    so they return 32-bits on 32-bit machines and 64-bits on
4003    64-bit machines.  Addr has the same size as a host word.
4004 
4005    LOADV64 is always  ULong fn ( Addr a )
4006 
4007    Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4008    are a UWord, and for STOREV64 they are a ULong.
4009 */
4010 
4011 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4012    naturally '_sz/8'-aligned, or it exceeds the range covered by the
4013    primary map.  This is all very tricky (and important!), so let's
4014    work through the maths by hand (below), *and* assert for these
4015    values at startup. */
4016 #define MASK(_szInBytes) \
4017    ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4018 
4019 /* MASK only exists so as to define this macro. */
4020 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4021    ((_a) & MASK((_szInBits>>3)))
4022 
4023 /* On a 32-bit machine:
4024 
4025    N_PRIMARY_BITS          == 16, so
4026    N_PRIMARY_MAP           == 0x10000, so
4027    N_PRIMARY_MAP-1         == 0xFFFF, so
4028    (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4029 
4030    MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4031            = ~ ( 0xFFFF | 0xFFFF0000 )
4032            = ~ 0xFFFF'FFFF
4033            = 0
4034 
4035    MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4036            = ~ ( 0xFFFE | 0xFFFF0000 )
4037            = ~ 0xFFFF'FFFE
4038            = 1
4039 
4040    MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4041            = ~ ( 0xFFFC | 0xFFFF0000 )
4042            = ~ 0xFFFF'FFFC
4043            = 3
4044 
4045    MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4046            = ~ ( 0xFFF8 | 0xFFFF0000 )
4047            = ~ 0xFFFF'FFF8
4048            = 7
4049 
4050    Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4051    precisely when a is not 1/2/4/8-bytes aligned.  And obviously, for
4052    the 1-byte alignment case, it is always a zero value, since MASK(1)
4053    is zero.  All as expected.
4054 
4055    On a 64-bit machine, it's more complex, since we're testing
4056    simultaneously for misalignment and for the address being at or
4057    above 32G:
4058 
4059    N_PRIMARY_BITS          == 19, so
4060    N_PRIMARY_MAP           == 0x80000, so
4061    N_PRIMARY_MAP-1         == 0x7FFFF, so
4062    (N_PRIMARY_MAP-1) << 16 == 0x7FFFF'0000, and so
4063 
4064    MASK(1) = ~ ( (0x10000 - 1) | 0x7FFFF'0000 )
4065            = ~ ( 0xFFFF | 0x7FFFF'0000 )
4066            = ~ 0x7FFFF'FFFF
4067            = 0xFFFF'FFF8'0000'0000
4068 
4069    MASK(2) = ~ ( (0x10000 - 2) | 0x7FFFF'0000 )
4070            = ~ ( 0xFFFE | 0x7FFFF'0000 )
4071            = ~ 0x7FFFF'FFFE
4072            = 0xFFFF'FFF8'0000'0001
4073 
4074    MASK(4) = ~ ( (0x10000 - 4) | 0x7FFFF'0000 )
4075            = ~ ( 0xFFFC | 0x7FFFF'0000 )
4076            = ~ 0x7FFFF'FFFC
4077            = 0xFFFF'FFF8'0000'0003
4078 
4079    MASK(8) = ~ ( (0x10000 - 8) | 0x7FFFF'0000 )
4080            = ~ ( 0xFFF8 | 0x7FFFF'0000 )
4081            = ~ 0x7FFFF'FFF8
4082            = 0xFFFF'FFF8'0000'0007
4083 */
4084 
4085 
4086 /* ------------------------ Size = 8 ------------------------ */
4087 
4088 static INLINE
mc_LOADV64(Addr a,Bool isBigEndian)4089 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4090 {
4091    PROF_EVENT(200, "mc_LOADV64");
4092 
4093 #ifndef PERF_FAST_LOADV
4094    return mc_LOADVn_slow( a, 64, isBigEndian );
4095 #else
4096    {
4097       UWord   sm_off16, vabits16;
4098       SecMap* sm;
4099 
4100       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4101          PROF_EVENT(201, "mc_LOADV64-slow1");
4102          return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4103       }
4104 
4105       sm       = get_secmap_for_reading_low(a);
4106       sm_off16 = SM_OFF_16(a);
4107       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4108 
4109       // Handle common case quickly: a is suitably aligned, is mapped, and
4110       // addressible.
4111       // Convert V bits from compact memory form to expanded register form.
4112       if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4113          return V_BITS64_DEFINED;
4114       } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4115          return V_BITS64_UNDEFINED;
4116       } else {
4117          /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4118          PROF_EVENT(202, "mc_LOADV64-slow2");
4119          return mc_LOADVn_slow( a, 64, isBigEndian );
4120       }
4121    }
4122 #endif
4123 }
4124 
MC_(helperc_LOADV64be)4125 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4126 {
4127    return mc_LOADV64(a, True);
4128 }
MC_(helperc_LOADV64le)4129 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4130 {
4131    return mc_LOADV64(a, False);
4132 }
4133 
4134 
4135 static INLINE
mc_STOREV64(Addr a,ULong vbits64,Bool isBigEndian)4136 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4137 {
4138    PROF_EVENT(210, "mc_STOREV64");
4139 
4140 #ifndef PERF_FAST_STOREV
4141    // XXX: this slow case seems to be marginally faster than the fast case!
4142    // Investigate further.
4143    mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4144 #else
4145    {
4146       UWord   sm_off16, vabits16;
4147       SecMap* sm;
4148 
4149       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4150          PROF_EVENT(211, "mc_STOREV64-slow1");
4151          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4152          return;
4153       }
4154 
4155       sm       = get_secmap_for_reading_low(a);
4156       sm_off16 = SM_OFF_16(a);
4157       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4158 
4159       if (LIKELY( !is_distinguished_sm(sm) &&
4160                           (VA_BITS16_DEFINED   == vabits16 ||
4161                            VA_BITS16_UNDEFINED == vabits16) ))
4162       {
4163          /* Handle common case quickly: a is suitably aligned, */
4164          /* is mapped, and is addressible. */
4165          // Convert full V-bits in register to compact 2-bit form.
4166          if (V_BITS64_DEFINED == vbits64) {
4167             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
4168          } else if (V_BITS64_UNDEFINED == vbits64) {
4169             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
4170          } else {
4171             /* Slow but general case -- writing partially defined bytes. */
4172             PROF_EVENT(212, "mc_STOREV64-slow2");
4173             mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4174          }
4175       } else {
4176          /* Slow but general case. */
4177          PROF_EVENT(213, "mc_STOREV64-slow3");
4178          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4179       }
4180    }
4181 #endif
4182 }
4183 
MC_(helperc_STOREV64be)4184 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
4185 {
4186    mc_STOREV64(a, vbits64, True);
4187 }
MC_(helperc_STOREV64le)4188 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
4189 {
4190    mc_STOREV64(a, vbits64, False);
4191 }
4192 
4193 
4194 /* ------------------------ Size = 4 ------------------------ */
4195 
4196 static INLINE
mc_LOADV32(Addr a,Bool isBigEndian)4197 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
4198 {
4199    PROF_EVENT(220, "mc_LOADV32");
4200 
4201 #ifndef PERF_FAST_LOADV
4202    return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4203 #else
4204    {
4205       UWord   sm_off, vabits8;
4206       SecMap* sm;
4207 
4208       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4209          PROF_EVENT(221, "mc_LOADV32-slow1");
4210          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4211       }
4212 
4213       sm      = get_secmap_for_reading_low(a);
4214       sm_off  = SM_OFF(a);
4215       vabits8 = sm->vabits8[sm_off];
4216 
4217       // Handle common case quickly: a is suitably aligned, is mapped, and the
4218       // entire word32 it lives in is addressible.
4219       // Convert V bits from compact memory form to expanded register form.
4220       // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
4221       // Almost certainly not necessary, but be paranoid.
4222       if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4223          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
4224       } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
4225          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
4226       } else {
4227          /* Slow case: the 4 bytes are not all-defined or all-undefined. */
4228          PROF_EVENT(222, "mc_LOADV32-slow2");
4229          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4230       }
4231    }
4232 #endif
4233 }
4234 
MC_(helperc_LOADV32be)4235 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
4236 {
4237    return mc_LOADV32(a, True);
4238 }
MC_(helperc_LOADV32le)4239 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
4240 {
4241    return mc_LOADV32(a, False);
4242 }
4243 
4244 
4245 static INLINE
mc_STOREV32(Addr a,UWord vbits32,Bool isBigEndian)4246 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
4247 {
4248    PROF_EVENT(230, "mc_STOREV32");
4249 
4250 #ifndef PERF_FAST_STOREV
4251    mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4252 #else
4253    {
4254       UWord   sm_off, vabits8;
4255       SecMap* sm;
4256 
4257       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4258          PROF_EVENT(231, "mc_STOREV32-slow1");
4259          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4260          return;
4261       }
4262 
4263       sm      = get_secmap_for_reading_low(a);
4264       sm_off  = SM_OFF(a);
4265       vabits8 = sm->vabits8[sm_off];
4266 
4267       // Cleverness:  sometimes we don't have to write the shadow memory at
4268       // all, if we can tell that what we want to write is the same as what is
4269       // already there.  The 64/16/8 bit cases also have cleverness at this
4270       // point, but it works a little differently to the code below.
4271       if (V_BITS32_DEFINED == vbits32) {
4272          if (vabits8 == (UInt)VA_BITS8_DEFINED) {
4273             return;
4274          } else if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
4275             sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
4276          } else {
4277             // not defined/undefined, or distinguished and changing state
4278             PROF_EVENT(232, "mc_STOREV32-slow2");
4279             mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4280          }
4281       } else if (V_BITS32_UNDEFINED == vbits32) {
4282          if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
4283             return;
4284          } else if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
4285             sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
4286          } else {
4287             // not defined/undefined, or distinguished and changing state
4288             PROF_EVENT(233, "mc_STOREV32-slow3");
4289             mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4290          }
4291       } else {
4292          // Partially defined word
4293          PROF_EVENT(234, "mc_STOREV32-slow4");
4294          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4295       }
4296    }
4297 #endif
4298 }
4299 
MC_(helperc_STOREV32be)4300 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
4301 {
4302    mc_STOREV32(a, vbits32, True);
4303 }
MC_(helperc_STOREV32le)4304 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
4305 {
4306    mc_STOREV32(a, vbits32, False);
4307 }
4308 
4309 
4310 /* ------------------------ Size = 2 ------------------------ */
4311 
4312 static INLINE
mc_LOADV16(Addr a,Bool isBigEndian)4313 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
4314 {
4315    PROF_EVENT(240, "mc_LOADV16");
4316 
4317 #ifndef PERF_FAST_LOADV
4318    return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4319 #else
4320    {
4321       UWord   sm_off, vabits8;
4322       SecMap* sm;
4323 
4324       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4325          PROF_EVENT(241, "mc_LOADV16-slow1");
4326          return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4327       }
4328 
4329       sm      = get_secmap_for_reading_low(a);
4330       sm_off  = SM_OFF(a);
4331       vabits8 = sm->vabits8[sm_off];
4332       // Handle common case quickly: a is suitably aligned, is mapped, and is
4333       // addressible.
4334       // Convert V bits from compact memory form to expanded register form
4335       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS16_DEFINED;   }
4336       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
4337       else {
4338          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4339          // the two sub-bytes.
4340          UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
4341          if      (vabits4 == VA_BITS4_DEFINED  ) { return V_BITS16_DEFINED;   }
4342          else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
4343          else {
4344             /* Slow case: the two bytes are not all-defined or all-undefined. */
4345             PROF_EVENT(242, "mc_LOADV16-slow2");
4346             return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4347          }
4348       }
4349    }
4350 #endif
4351 }
4352 
MC_(helperc_LOADV16be)4353 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
4354 {
4355    return mc_LOADV16(a, True);
4356 }
MC_(helperc_LOADV16le)4357 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
4358 {
4359    return mc_LOADV16(a, False);
4360 }
4361 
4362 
4363 static INLINE
mc_STOREV16(Addr a,UWord vbits16,Bool isBigEndian)4364 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
4365 {
4366    PROF_EVENT(250, "mc_STOREV16");
4367 
4368 #ifndef PERF_FAST_STOREV
4369    mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4370 #else
4371    {
4372       UWord   sm_off, vabits8;
4373       SecMap* sm;
4374 
4375       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4376          PROF_EVENT(251, "mc_STOREV16-slow1");
4377          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4378          return;
4379       }
4380 
4381       sm      = get_secmap_for_reading_low(a);
4382       sm_off  = SM_OFF(a);
4383       vabits8 = sm->vabits8[sm_off];
4384       if (LIKELY( !is_distinguished_sm(sm) &&
4385                           (VA_BITS8_DEFINED   == vabits8 ||
4386                            VA_BITS8_UNDEFINED == vabits8) ))
4387       {
4388          /* Handle common case quickly: a is suitably aligned, */
4389          /* is mapped, and is addressible. */
4390          // Convert full V-bits in register to compact 2-bit form.
4391          if (V_BITS16_DEFINED == vbits16) {
4392             insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED ,
4393                                          &(sm->vabits8[sm_off]) );
4394          } else if (V_BITS16_UNDEFINED == vbits16) {
4395             insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
4396                                          &(sm->vabits8[sm_off]) );
4397          } else {
4398             /* Slow but general case -- writing partially defined bytes. */
4399             PROF_EVENT(252, "mc_STOREV16-slow2");
4400             mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4401          }
4402       } else {
4403          /* Slow but general case. */
4404          PROF_EVENT(253, "mc_STOREV16-slow3");
4405          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4406       }
4407    }
4408 #endif
4409 }
4410 
MC_(helperc_STOREV16be)4411 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
4412 {
4413    mc_STOREV16(a, vbits16, True);
4414 }
MC_(helperc_STOREV16le)4415 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
4416 {
4417    mc_STOREV16(a, vbits16, False);
4418 }
4419 
4420 
4421 /* ------------------------ Size = 1 ------------------------ */
4422 /* Note: endianness is irrelevant for size == 1 */
4423 
4424 VG_REGPARM(1)
MC_(helperc_LOADV8)4425 UWord MC_(helperc_LOADV8) ( Addr a )
4426 {
4427    PROF_EVENT(260, "mc_LOADV8");
4428 
4429 #ifndef PERF_FAST_LOADV
4430    return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4431 #else
4432    {
4433       UWord   sm_off, vabits8;
4434       SecMap* sm;
4435 
4436       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4437          PROF_EVENT(261, "mc_LOADV8-slow1");
4438          return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4439       }
4440 
4441       sm      = get_secmap_for_reading_low(a);
4442       sm_off  = SM_OFF(a);
4443       vabits8 = sm->vabits8[sm_off];
4444       // Convert V bits from compact memory form to expanded register form
4445       // Handle common case quickly: a is mapped, and the entire
4446       // word32 it lives in is addressible.
4447       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS8_DEFINED;   }
4448       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
4449       else {
4450          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4451          // the single byte.
4452          UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
4453          if      (vabits2 == VA_BITS2_DEFINED  ) { return V_BITS8_DEFINED;   }
4454          else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
4455          else {
4456             /* Slow case: the byte is not all-defined or all-undefined. */
4457             PROF_EVENT(262, "mc_LOADV8-slow2");
4458             return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4459          }
4460       }
4461    }
4462 #endif
4463 }
4464 
4465 
4466 VG_REGPARM(2)
MC_(helperc_STOREV8)4467 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
4468 {
4469    PROF_EVENT(270, "mc_STOREV8");
4470 
4471 #ifndef PERF_FAST_STOREV
4472    mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4473 #else
4474    {
4475       UWord   sm_off, vabits8;
4476       SecMap* sm;
4477 
4478       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4479          PROF_EVENT(271, "mc_STOREV8-slow1");
4480          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4481          return;
4482       }
4483 
4484       sm      = get_secmap_for_reading_low(a);
4485       sm_off  = SM_OFF(a);
4486       vabits8 = sm->vabits8[sm_off];
4487       if (LIKELY
4488             ( !is_distinguished_sm(sm) &&
4489               ( (VA_BITS8_DEFINED == vabits8 || VA_BITS8_UNDEFINED == vabits8)
4490              || (VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8))
4491               )
4492             )
4493          )
4494       {
4495          /* Handle common case quickly: a is mapped, the entire word32 it
4496             lives in is addressible. */
4497          // Convert full V-bits in register to compact 2-bit form.
4498          if (V_BITS8_DEFINED == vbits8) {
4499             insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
4500                                           &(sm->vabits8[sm_off]) );
4501          } else if (V_BITS8_UNDEFINED == vbits8) {
4502             insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
4503                                           &(sm->vabits8[sm_off]) );
4504          } else {
4505             /* Slow but general case -- writing partially defined bytes. */
4506             PROF_EVENT(272, "mc_STOREV8-slow2");
4507             mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4508          }
4509       } else {
4510          /* Slow but general case. */
4511          PROF_EVENT(273, "mc_STOREV8-slow3");
4512          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4513       }
4514    }
4515 #endif
4516 }
4517 
4518 
4519 /*------------------------------------------------------------*/
4520 /*--- Functions called directly from generated code:       ---*/
4521 /*--- Value-check failure handlers.                        ---*/
4522 /*------------------------------------------------------------*/
4523 
4524 /* Call these ones when an origin is available ... */
4525 VG_REGPARM(1)
MC_(helperc_value_check0_fail_w_o)4526 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
4527    MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
4528 }
4529 
4530 VG_REGPARM(1)
MC_(helperc_value_check1_fail_w_o)4531 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
4532    MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
4533 }
4534 
4535 VG_REGPARM(1)
MC_(helperc_value_check4_fail_w_o)4536 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
4537    MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
4538 }
4539 
4540 VG_REGPARM(1)
MC_(helperc_value_check8_fail_w_o)4541 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
4542    MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
4543 }
4544 
4545 VG_REGPARM(2)
MC_(helperc_value_checkN_fail_w_o)4546 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
4547    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
4548 }
4549 
4550 /* ... and these when an origin isn't available. */
4551 
4552 VG_REGPARM(0)
MC_(helperc_value_check0_fail_no_o)4553 void MC_(helperc_value_check0_fail_no_o) ( void ) {
4554    MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
4555 }
4556 
4557 VG_REGPARM(0)
MC_(helperc_value_check1_fail_no_o)4558 void MC_(helperc_value_check1_fail_no_o) ( void ) {
4559    MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
4560 }
4561 
4562 VG_REGPARM(0)
MC_(helperc_value_check4_fail_no_o)4563 void MC_(helperc_value_check4_fail_no_o) ( void ) {
4564    MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
4565 }
4566 
4567 VG_REGPARM(0)
MC_(helperc_value_check8_fail_no_o)4568 void MC_(helperc_value_check8_fail_no_o) ( void ) {
4569    MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
4570 }
4571 
4572 VG_REGPARM(1)
MC_(helperc_value_checkN_fail_no_o)4573 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
4574    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
4575 }
4576 
4577 
4578 /*------------------------------------------------------------*/
4579 /*--- Metadata get/set functions, for client requests.     ---*/
4580 /*------------------------------------------------------------*/
4581 
4582 // Nb: this expands the V+A bits out into register-form V bits, even though
4583 // they're in memory.  This is for backward compatibility, and because it's
4584 // probably what the user wants.
4585 
4586 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
4587    error [no longer used], 3 == addressing error. */
4588 /* Nb: We used to issue various definedness/addressability errors from here,
4589    but we took them out because they ranged from not-very-helpful to
4590    downright annoying, and they complicated the error data structures. */
mc_get_or_set_vbits_for_client(Addr a,Addr vbits,SizeT szB,Bool setting,Bool is_client_request)4591 static Int mc_get_or_set_vbits_for_client (
4592    Addr a,
4593    Addr vbits,
4594    SizeT szB,
4595    Bool setting, /* True <=> set vbits,  False <=> get vbits */
4596    Bool is_client_request /* True <=> real user request
4597                              False <=> internal call from gdbserver */
4598 )
4599 {
4600    SizeT i;
4601    Bool  ok;
4602    UChar vbits8;
4603 
4604    /* Check that arrays are addressible before doing any getting/setting.
4605       vbits to be checked only for real user request. */
4606    for (i = 0; i < szB; i++) {
4607       if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
4608           (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
4609          return 3;
4610       }
4611    }
4612 
4613    /* Do the copy */
4614    if (setting) {
4615       /* setting */
4616       for (i = 0; i < szB; i++) {
4617          ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
4618          tl_assert(ok);
4619       }
4620    } else {
4621       /* getting */
4622       for (i = 0; i < szB; i++) {
4623          ok = get_vbits8(a + i, &vbits8);
4624          tl_assert(ok);
4625          ((UChar*)vbits)[i] = vbits8;
4626       }
4627       if (is_client_request)
4628         // The bytes in vbits[] have now been set, so mark them as such.
4629         MC_(make_mem_defined)(vbits, szB);
4630    }
4631 
4632    return 1;
4633 }
4634 
4635 
4636 /*------------------------------------------------------------*/
4637 /*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
4638 /*------------------------------------------------------------*/
4639 
4640 /* For the memory leak detector, say whether an entire 64k chunk of
4641    address space is possibly in use, or not.  If in doubt return
4642    True.
4643 */
MC_(is_within_valid_secondary)4644 Bool MC_(is_within_valid_secondary) ( Addr a )
4645 {
4646    SecMap* sm = maybe_get_secmap_for ( a );
4647    if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
4648       /* Definitely not in use. */
4649       return False;
4650    } else {
4651       return True;
4652    }
4653 }
4654 
4655 
4656 /* For the memory leak detector, say whether or not a given word
4657    address is to be regarded as valid. */
MC_(is_valid_aligned_word)4658 Bool MC_(is_valid_aligned_word) ( Addr a )
4659 {
4660    tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
4661    tl_assert(VG_IS_WORD_ALIGNED(a));
4662    if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
4663       return False;
4664    if (sizeof(UWord) == 8) {
4665       if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
4666          return False;
4667    }
4668    if (UNLIKELY(MC_(in_ignored_range)(a)))
4669       return False;
4670    else
4671       return True;
4672 }
4673 
4674 
4675 /*------------------------------------------------------------*/
4676 /*--- Initialisation                                       ---*/
4677 /*------------------------------------------------------------*/
4678 
init_shadow_memory(void)4679 static void init_shadow_memory ( void )
4680 {
4681    Int     i;
4682    SecMap* sm;
4683 
4684    tl_assert(V_BIT_UNDEFINED   == 1);
4685    tl_assert(V_BIT_DEFINED     == 0);
4686    tl_assert(V_BITS8_UNDEFINED == 0xFF);
4687    tl_assert(V_BITS8_DEFINED   == 0);
4688 
4689    /* Build the 3 distinguished secondaries */
4690    sm = &sm_distinguished[SM_DIST_NOACCESS];
4691    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
4692 
4693    sm = &sm_distinguished[SM_DIST_UNDEFINED];
4694    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
4695 
4696    sm = &sm_distinguished[SM_DIST_DEFINED];
4697    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
4698 
4699    /* Set up the primary map. */
4700    /* These entries gradually get overwritten as the used address
4701       space expands. */
4702    for (i = 0; i < N_PRIMARY_MAP; i++)
4703       primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
4704 
4705    /* Auxiliary primary maps */
4706    init_auxmap_L1_L2();
4707 
4708    /* auxmap_size = auxmap_used = 0;
4709       no ... these are statically initialised */
4710 
4711    /* Secondary V bit table */
4712    secVBitTable = createSecVBitTable();
4713 }
4714 
4715 
4716 /*------------------------------------------------------------*/
4717 /*--- Sanity check machinery (permanently engaged)         ---*/
4718 /*------------------------------------------------------------*/
4719 
mc_cheap_sanity_check(void)4720 static Bool mc_cheap_sanity_check ( void )
4721 {
4722    n_sanity_cheap++;
4723    PROF_EVENT(490, "cheap_sanity_check");
4724    /* Check for sane operating level */
4725    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
4726       return False;
4727    /* nothing else useful we can rapidly check */
4728    return True;
4729 }
4730 
mc_expensive_sanity_check(void)4731 static Bool mc_expensive_sanity_check ( void )
4732 {
4733    Int     i;
4734    Word    n_secmaps_found;
4735    SecMap* sm;
4736    HChar*  errmsg;
4737    Bool    bad = False;
4738 
4739    if (0) VG_(printf)("expensive sanity check\n");
4740    if (0) return True;
4741 
4742    n_sanity_expensive++;
4743    PROF_EVENT(491, "expensive_sanity_check");
4744 
4745    /* Check for sane operating level */
4746    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
4747       return False;
4748 
4749    /* Check that the 3 distinguished SMs are still as they should be. */
4750 
4751    /* Check noaccess DSM. */
4752    sm = &sm_distinguished[SM_DIST_NOACCESS];
4753    for (i = 0; i < SM_CHUNKS; i++)
4754       if (sm->vabits8[i] != VA_BITS8_NOACCESS)
4755          bad = True;
4756 
4757    /* Check undefined DSM. */
4758    sm = &sm_distinguished[SM_DIST_UNDEFINED];
4759    for (i = 0; i < SM_CHUNKS; i++)
4760       if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
4761          bad = True;
4762 
4763    /* Check defined DSM. */
4764    sm = &sm_distinguished[SM_DIST_DEFINED];
4765    for (i = 0; i < SM_CHUNKS; i++)
4766       if (sm->vabits8[i] != VA_BITS8_DEFINED)
4767          bad = True;
4768 
4769    if (bad) {
4770       VG_(printf)("memcheck expensive sanity: "
4771                   "distinguished_secondaries have changed\n");
4772       return False;
4773    }
4774 
4775    /* If we're not checking for undefined value errors, the secondary V bit
4776     * table should be empty. */
4777    if (MC_(clo_mc_level) == 1) {
4778       if (0 != VG_(OSetGen_Size)(secVBitTable))
4779          return False;
4780    }
4781 
4782    /* check the auxiliary maps, very thoroughly */
4783    n_secmaps_found = 0;
4784    errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
4785    if (errmsg) {
4786       VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
4787       return False;
4788    }
4789 
4790    /* n_secmaps_found is now the number referred to by the auxiliary
4791       primary map.  Now add on the ones referred to by the main
4792       primary map. */
4793    for (i = 0; i < N_PRIMARY_MAP; i++) {
4794       if (primary_map[i] == NULL) {
4795          bad = True;
4796       } else {
4797          if (!is_distinguished_sm(primary_map[i]))
4798             n_secmaps_found++;
4799       }
4800    }
4801 
4802    /* check that the number of secmaps issued matches the number that
4803       are reachable (iow, no secmap leaks) */
4804    if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
4805       bad = True;
4806 
4807    if (bad) {
4808       VG_(printf)("memcheck expensive sanity: "
4809                   "apparent secmap leakage\n");
4810       return False;
4811    }
4812 
4813    if (bad) {
4814       VG_(printf)("memcheck expensive sanity: "
4815                   "auxmap covers wrong address space\n");
4816       return False;
4817    }
4818 
4819    /* there is only one pointer to each secmap (expensive) */
4820 
4821    return True;
4822 }
4823 
4824 /*------------------------------------------------------------*/
4825 /*--- Command line args                                    ---*/
4826 /*------------------------------------------------------------*/
4827 
4828 Bool          MC_(clo_partial_loads_ok)       = False;
4829 Long          MC_(clo_freelist_vol)           = 20*1000*1000LL;
4830 Long          MC_(clo_freelist_big_blocks)    =  1*1000*1000LL;
4831 LeakCheckMode MC_(clo_leak_check)             = LC_Summary;
4832 VgRes         MC_(clo_leak_resolution)        = Vg_HighRes;
4833 Bool          MC_(clo_show_reachable)         = False;
4834 Bool          MC_(clo_show_possibly_lost)     = True;
4835 Bool          MC_(clo_workaround_gcc296_bugs) = False;
4836 Int           MC_(clo_malloc_fill)            = -1;
4837 Int           MC_(clo_free_fill)              = -1;
4838 Int           MC_(clo_mc_level)               = 2;
4839 
mc_process_cmd_line_options(Char * arg)4840 static Bool mc_process_cmd_line_options(Char* arg)
4841 {
4842    Char* tmp_str;
4843 
4844    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
4845 
4846    /* Set MC_(clo_mc_level):
4847          1 = A bit tracking only
4848          2 = A and V bit tracking, but no V bit origins
4849          3 = A and V bit tracking, and V bit origins
4850 
4851       Do this by inspecting --undef-value-errors= and
4852       --track-origins=.  Reject the case --undef-value-errors=no
4853       --track-origins=yes as meaningless.
4854    */
4855    if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
4856       if (MC_(clo_mc_level) == 3) {
4857          goto bad_level;
4858       } else {
4859          MC_(clo_mc_level) = 1;
4860          return True;
4861       }
4862    }
4863    if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
4864       if (MC_(clo_mc_level) == 1)
4865          MC_(clo_mc_level) = 2;
4866       return True;
4867    }
4868    if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
4869       if (MC_(clo_mc_level) == 3)
4870          MC_(clo_mc_level) = 2;
4871       return True;
4872    }
4873    if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
4874       if (MC_(clo_mc_level) == 1) {
4875          goto bad_level;
4876       } else {
4877          MC_(clo_mc_level) = 3;
4878          return True;
4879       }
4880    }
4881 
4882 	if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
4883    else if VG_BOOL_CLO(arg, "--show-reachable",   MC_(clo_show_reachable))   {}
4884    else if VG_BOOL_CLO(arg, "--show-possibly-lost",
4885                                             MC_(clo_show_possibly_lost))     {}
4886    else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
4887                                             MC_(clo_workaround_gcc296_bugs)) {}
4888 
4889    else if VG_BINT_CLO(arg, "--freelist-vol",  MC_(clo_freelist_vol),
4890                                                0, 10*1000*1000*1000LL) {}
4891 
4892    else if VG_BINT_CLO(arg, "--freelist-big-blocks",
4893                        MC_(clo_freelist_big_blocks),
4894                        0, 10*1000*1000*1000LL) {}
4895 
4896    else if VG_XACT_CLO(arg, "--leak-check=no",
4897                             MC_(clo_leak_check), LC_Off) {}
4898    else if VG_XACT_CLO(arg, "--leak-check=summary",
4899                             MC_(clo_leak_check), LC_Summary) {}
4900    else if VG_XACT_CLO(arg, "--leak-check=yes",
4901                             MC_(clo_leak_check), LC_Full) {}
4902    else if VG_XACT_CLO(arg, "--leak-check=full",
4903                             MC_(clo_leak_check), LC_Full) {}
4904 
4905    else if VG_XACT_CLO(arg, "--leak-resolution=low",
4906                             MC_(clo_leak_resolution), Vg_LowRes) {}
4907    else if VG_XACT_CLO(arg, "--leak-resolution=med",
4908                             MC_(clo_leak_resolution), Vg_MedRes) {}
4909    else if VG_XACT_CLO(arg, "--leak-resolution=high",
4910                             MC_(clo_leak_resolution), Vg_HighRes) {}
4911 
4912    else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
4913       Int  i;
4914       Bool ok  = parse_ignore_ranges(tmp_str);
4915       if (!ok)
4916         return False;
4917       tl_assert(ignoreRanges.used >= 0);
4918       tl_assert(ignoreRanges.used < M_IGNORE_RANGES);
4919       for (i = 0; i < ignoreRanges.used; i++) {
4920          Addr s = ignoreRanges.start[i];
4921          Addr e = ignoreRanges.end[i];
4922          Addr limit = 0x4000000; /* 64M - entirely arbitrary limit */
4923          if (e <= s) {
4924             VG_(message)(Vg_DebugMsg,
4925                "ERROR: --ignore-ranges: end <= start in range:\n");
4926             VG_(message)(Vg_DebugMsg,
4927                "       0x%lx-0x%lx\n", s, e);
4928             return False;
4929          }
4930          if (e - s > limit) {
4931             VG_(message)(Vg_DebugMsg,
4932                "ERROR: --ignore-ranges: suspiciously large range:\n");
4933             VG_(message)(Vg_DebugMsg,
4934                "       0x%lx-0x%lx (size %ld)\n", s, e, (UWord)(e-s));
4935             return False;
4936 	 }
4937       }
4938    }
4939 
4940    else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
4941    else if VG_BHEX_CLO(arg, "--free-fill",   MC_(clo_free_fill),   0x00,0xFF) {}
4942 
4943    else
4944       return VG_(replacement_malloc_process_cmd_line_option)(arg);
4945 
4946    return True;
4947 
4948 
4949   bad_level:
4950    VG_(fmsg_bad_option)(arg,
4951       "--track-origins=yes has no effect when --undef-value-errors=no.\n");
4952 }
4953 
mc_print_usage(void)4954 static void mc_print_usage(void)
4955 {
4956    VG_(printf)(
4957 "    --leak-check=no|summary|full     search for memory leaks at exit?  [summary]\n"
4958 "    --leak-resolution=low|med|high   differentiation of leak stack traces [high]\n"
4959 "    --show-reachable=no|yes          show reachable blocks in leak check? [no]\n"
4960 "    --show-possibly-lost=no|yes      show possibly lost blocks in leak check?\n"
4961 "                                     [yes]\n"
4962 "    --undef-value-errors=no|yes      check for undefined value errors [yes]\n"
4963 "    --track-origins=no|yes           show origins of undefined values? [no]\n"
4964 "    --partial-loads-ok=no|yes        too hard to explain here; see manual [no]\n"
4965 "    --freelist-vol=<number>          volume of freed blocks queue      [20000000]\n"
4966 "    --freelist-big-blocks=<number>   releases first blocks with size >= [1000000]\n"
4967 "    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
4968 "    --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS]   assume given addresses are OK\n"
4969 "    --malloc-fill=<hexnumber>        fill malloc'd areas with given value\n"
4970 "    --free-fill=<hexnumber>          fill free'd areas with given value\n"
4971    );
4972 }
4973 
mc_print_debug_usage(void)4974 static void mc_print_debug_usage(void)
4975 {
4976    VG_(printf)(
4977 "    (none)\n"
4978    );
4979 }
4980 
4981 
4982 /*------------------------------------------------------------*/
4983 /*--- Client blocks                                        ---*/
4984 /*------------------------------------------------------------*/
4985 
4986 /* Client block management:
4987 
4988    This is managed as an expanding array of client block descriptors.
4989    Indices of live descriptors are issued to the client, so it can ask
4990    to free them later.  Therefore we cannot slide live entries down
4991    over dead ones.  Instead we must use free/inuse flags and scan for
4992    an empty slot at allocation time.  This in turn means allocation is
4993    relatively expensive, so we hope this does not happen too often.
4994 
4995    An unused block has start == size == 0
4996 */
4997 
4998 /* type CGenBlock is defined in mc_include.h */
4999 
5000 /* This subsystem is self-initialising. */
5001 static UWord      cgb_size = 0;
5002 static UWord      cgb_used = 0;
5003 static CGenBlock* cgbs     = NULL;
5004 
5005 /* Stats for this subsystem. */
5006 static ULong cgb_used_MAX = 0;   /* Max in use. */
5007 static ULong cgb_allocs   = 0;   /* Number of allocs. */
5008 static ULong cgb_discards = 0;   /* Number of discards. */
5009 static ULong cgb_search   = 0;   /* Number of searches. */
5010 
5011 
5012 /* Get access to the client block array. */
MC_(get_ClientBlock_array)5013 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
5014                                  /*OUT*/UWord* nBlocks )
5015 {
5016    *blocks  = cgbs;
5017    *nBlocks = cgb_used;
5018 }
5019 
5020 
5021 static
alloc_client_block(void)5022 Int alloc_client_block ( void )
5023 {
5024    UWord      i, sz_new;
5025    CGenBlock* cgbs_new;
5026 
5027    cgb_allocs++;
5028 
5029    for (i = 0; i < cgb_used; i++) {
5030       cgb_search++;
5031       if (cgbs[i].start == 0 && cgbs[i].size == 0)
5032          return i;
5033    }
5034 
5035    /* Not found.  Try to allocate one at the end. */
5036    if (cgb_used < cgb_size) {
5037       cgb_used++;
5038       return cgb_used-1;
5039    }
5040 
5041    /* Ok, we have to allocate a new one. */
5042    tl_assert(cgb_used == cgb_size);
5043    sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
5044 
5045    cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
5046    for (i = 0; i < cgb_used; i++)
5047       cgbs_new[i] = cgbs[i];
5048 
5049    if (cgbs != NULL)
5050       VG_(free)( cgbs );
5051    cgbs = cgbs_new;
5052 
5053    cgb_size = sz_new;
5054    cgb_used++;
5055    if (cgb_used > cgb_used_MAX)
5056       cgb_used_MAX = cgb_used;
5057    return cgb_used-1;
5058 }
5059 
5060 
show_client_block_stats(void)5061 static void show_client_block_stats ( void )
5062 {
5063    VG_(message)(Vg_DebugMsg,
5064       "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
5065       cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
5066    );
5067 }
print_monitor_help(void)5068 static void print_monitor_help ( void )
5069 {
5070    VG_(gdb_printf)
5071       (
5072 "\n"
5073 "memcheck monitor commands:\n"
5074 "  get_vbits <addr> [<len>]\n"
5075 "        returns validity bits for <len> (or 1) bytes at <addr>\n"
5076 "            bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
5077 "        Example: get_vbits 0x8049c78 10\n"
5078 "  make_memory [noaccess|undefined\n"
5079 "                     |defined|Definedifaddressable] <addr> [<len>]\n"
5080 "        mark <len> (or 1) bytes at <addr> with the given accessibility\n"
5081 "  check_memory [addressable|defined] <addr> [<len>]\n"
5082 "        check that <len> (or 1) bytes at <addr> have the given accessibility\n"
5083 "            and outputs a description of <addr>\n"
5084 "  leak_check [full*|summary] [reachable|possibleleak*|definiteleak]\n"
5085 "                [increased*|changed|any]\n"
5086 "                [unlimited*|limited <max_loss_records_output>]\n"
5087 "            * = defaults\n"
5088 "        Examples: leak_check\n"
5089 "                  leak_check summary any\n"
5090 "                  leak_check full reachable any limited 100\n"
5091 "  block_list <loss_record_nr>\n"
5092 "        after a leak search, shows the list of blocks of <loss_record_nr>\n"
5093 "  who_points_at <addr> [<len>]\n"
5094 "        shows places pointing inside <len> (default 1) bytes at <addr>\n"
5095 "        (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
5096 "         with len > 1, will also show \"interior pointers\")\n"
5097 "\n");
5098 }
5099 
5100 /* return True if request recognised, False otherwise */
handle_gdb_monitor_command(ThreadId tid,Char * req)5101 static Bool handle_gdb_monitor_command (ThreadId tid, Char *req)
5102 {
5103    Char* wcmd;
5104    Char s[VG_(strlen(req))]; /* copy for strtok_r */
5105    Char *ssaveptr;
5106 
5107    VG_(strcpy) (s, req);
5108 
5109    wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
5110    /* NB: if possible, avoid introducing a new command below which
5111       starts with the same first letter(s) as an already existing
5112       command. This ensures a shorter abbreviation for the user. */
5113    switch (VG_(keyword_id)
5114            ("help get_vbits leak_check make_memory check_memory "
5115             "block_list who_points_at",
5116             wcmd, kwd_report_duplicated_matches)) {
5117    case -2: /* multiple matches */
5118       return True;
5119    case -1: /* not found */
5120       return False;
5121    case  0: /* help */
5122       print_monitor_help();
5123       return True;
5124    case  1: { /* get_vbits */
5125       Addr address;
5126       SizeT szB = 1;
5127       VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5128       if (szB != 0) {
5129          UChar vbits;
5130          Int i;
5131          Int unaddressable = 0;
5132          for (i = 0; i < szB; i++) {
5133             Int res = mc_get_or_set_vbits_for_client
5134                (address+i, (Addr) &vbits, 1,
5135                 False, /* get them */
5136                 False  /* is client request */ );
5137             /* we are before the first character on next line, print a \n. */
5138             if ((i % 32) == 0 && i != 0)
5139                VG_(gdb_printf) ("\n");
5140             /* we are before the next block of 4 starts, print a space. */
5141             else if ((i % 4) == 0 && i != 0)
5142                VG_(gdb_printf) (" ");
5143             if (res == 1) {
5144                VG_(gdb_printf) ("%02x", vbits);
5145             } else {
5146                tl_assert(3 == res);
5147                unaddressable++;
5148                VG_(gdb_printf) ("__");
5149             }
5150          }
5151          VG_(gdb_printf) ("\n");
5152          if (unaddressable) {
5153             VG_(gdb_printf)
5154                ("Address %p len %ld has %d bytes unaddressable\n",
5155                 (void *)address, szB, unaddressable);
5156          }
5157       }
5158       return True;
5159    }
5160    case  2: { /* leak_check */
5161       Int err = 0;
5162       LeakCheckParams lcp;
5163       Char* kw;
5164 
5165       lcp.mode               = LC_Full;
5166       lcp.show_reachable     = False;
5167       lcp.show_possibly_lost = True;
5168       lcp.deltamode          = LCD_Increased;
5169       lcp.max_loss_records_output = 999999999;
5170       lcp.requested_by_monitor_command = True;
5171 
5172       for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
5173            kw != NULL;
5174            kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
5175          switch (VG_(keyword_id)
5176                  ("full summary "
5177                   "reachable possibleleak definiteleak "
5178                   "increased changed any "
5179                   "unlimited limited ",
5180                   kw, kwd_report_all)) {
5181          case -2: err++; break;
5182          case -1: err++; break;
5183          case  0: /* full */
5184             lcp.mode = LC_Full; break;
5185          case  1: /* summary */
5186             lcp.mode = LC_Summary; break;
5187          case  2: /* reachable */
5188             lcp.show_reachable = True;
5189             lcp.show_possibly_lost = True; break;
5190          case  3: /* possibleleak */
5191             lcp.show_reachable = False;
5192             lcp.show_possibly_lost = True; break;
5193          case  4: /* definiteleak */
5194             lcp.show_reachable = False;
5195             lcp.show_possibly_lost = False; break;
5196          case  5: /* increased */
5197             lcp.deltamode = LCD_Increased; break;
5198          case  6: /* changed */
5199             lcp.deltamode = LCD_Changed; break;
5200          case  7: /* any */
5201             lcp.deltamode = LCD_Any; break;
5202          case  8: /* unlimited */
5203             lcp.max_loss_records_output = 999999999; break;
5204          case  9: { /* limited */
5205             int int_value;
5206             char* endptr;
5207 
5208             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
5209             if (wcmd == NULL) {
5210                int_value = 0;
5211                endptr = "empty"; /* to report an error below */
5212             } else {
5213                int_value = VG_(strtoll10) (wcmd, (Char **)&endptr);
5214             }
5215             if (*endptr != '\0')
5216                VG_(gdb_printf) ("missing or malformed integer value\n");
5217             else if (int_value > 0)
5218                lcp.max_loss_records_output = (UInt) int_value;
5219             else
5220                VG_(gdb_printf) ("max_loss_records_output must be >= 1, got %d\n",
5221                                 int_value);
5222             break;
5223          }
5224          default:
5225             tl_assert (0);
5226          }
5227       }
5228       if (!err)
5229          MC_(detect_memory_leaks)(tid, &lcp);
5230       return True;
5231    }
5232 
5233    case  3: { /* make_memory */
5234       Addr address;
5235       SizeT szB = 1;
5236       int kwdid = VG_(keyword_id)
5237          ("noaccess undefined defined Definedifaddressable",
5238           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5239       VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5240       if (address == (Addr) 0 && szB == 0) return True;
5241       switch (kwdid) {
5242       case -2: break;
5243       case -1: break;
5244       case  0: MC_(make_mem_noaccess) (address, szB); break;
5245       case  1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
5246                                                     MC_OKIND_USER ); break;
5247       case  2: MC_(make_mem_defined) ( address, szB ); break;
5248       case  3: make_mem_defined_if_addressable ( address, szB ); break;;
5249       default: tl_assert(0);
5250       }
5251       return True;
5252    }
5253 
5254    case  4: { /* check_memory */
5255       Addr address;
5256       SizeT szB = 1;
5257       Addr bad_addr;
5258       UInt okind;
5259       char* src;
5260       UInt otag;
5261       UInt ecu;
5262       ExeContext* origin_ec;
5263       MC_ReadResult res;
5264 
5265       int kwdid = VG_(keyword_id)
5266          ("addressable defined",
5267           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5268       VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5269       if (address == (Addr) 0 && szB == 0) return True;
5270       switch (kwdid) {
5271       case -2: break;
5272       case -1: break;
5273       case  0:
5274          if (is_mem_addressable ( address, szB, &bad_addr ))
5275             VG_(gdb_printf) ("Address %p len %ld addressable\n",
5276                              (void *)address, szB);
5277          else
5278             VG_(gdb_printf)
5279                ("Address %p len %ld not addressable:\nbad address %p\n",
5280                 (void *)address, szB, (void *) bad_addr);
5281          MC_(pp_describe_addr) (address);
5282          break;
5283       case  1: res = is_mem_defined ( address, szB, &bad_addr, &otag );
5284          if (MC_AddrErr == res)
5285             VG_(gdb_printf)
5286                ("Address %p len %ld not addressable:\nbad address %p\n",
5287                 (void *)address, szB, (void *) bad_addr);
5288          else if (MC_ValueErr == res) {
5289             okind = otag & 3;
5290             switch (okind) {
5291             case MC_OKIND_STACK:
5292                src = " was created by a stack allocation"; break;
5293             case MC_OKIND_HEAP:
5294                src = " was created by a heap allocation"; break;
5295             case MC_OKIND_USER:
5296                src = " was created by a client request"; break;
5297             case MC_OKIND_UNKNOWN:
5298                src = ""; break;
5299             default: tl_assert(0);
5300             }
5301             VG_(gdb_printf)
5302                ("Address %p len %ld not defined:\n"
5303                 "Uninitialised value at %p%s\n",
5304                 (void *)address, szB, (void *) bad_addr, src);
5305             ecu = otag & ~3;
5306             if (VG_(is_plausible_ECU)(ecu)) {
5307                origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
5308                VG_(pp_ExeContext)( origin_ec );
5309             }
5310          }
5311          else
5312             VG_(gdb_printf) ("Address %p len %ld defined\n",
5313                              (void *)address, szB);
5314          MC_(pp_describe_addr) (address);
5315          break;
5316       default: tl_assert(0);
5317       }
5318       return True;
5319    }
5320 
5321    case  5: { /* block_list */
5322       Char* wl;
5323       Char *endptr;
5324       UInt lr_nr = 0;
5325       wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
5326       lr_nr = VG_(strtoull10) (wl, &endptr);
5327       if (wl != NULL && *endptr != '\0') {
5328          VG_(gdb_printf) ("malformed integer\n");
5329       } else {
5330          // lr_nr-1 as what is shown to the user is 1 more than the index in lr_array.
5331          if (lr_nr == 0 || ! MC_(print_block_list) (lr_nr-1))
5332             VG_(gdb_printf) ("invalid loss record nr\n");
5333       }
5334       return True;
5335    }
5336 
5337    case  6: { /* who_points_at */
5338       Addr address;
5339       SizeT szB = 1;
5340 
5341       VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5342       if (address == (Addr) 0) {
5343          VG_(gdb_printf) ("Cannot search who points at 0x0\n");
5344          return True;
5345       }
5346       MC_(who_points_at) (address, szB);
5347       return True;
5348    }
5349 
5350    default:
5351       tl_assert(0);
5352       return False;
5353    }
5354 }
5355 
5356 /*------------------------------------------------------------*/
5357 /*--- Client requests                                      ---*/
5358 /*------------------------------------------------------------*/
5359 
mc_handle_client_request(ThreadId tid,UWord * arg,UWord * ret)5360 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
5361 {
5362    Int   i;
5363    Bool  ok;
5364    Addr  bad_addr;
5365 
5366    if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
5367        && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
5368        && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
5369        && VG_USERREQ__FREELIKE_BLOCK   != arg[0]
5370        && VG_USERREQ__CREATE_MEMPOOL   != arg[0]
5371        && VG_USERREQ__DESTROY_MEMPOOL  != arg[0]
5372        && VG_USERREQ__MEMPOOL_ALLOC    != arg[0]
5373        && VG_USERREQ__MEMPOOL_FREE     != arg[0]
5374        && VG_USERREQ__MEMPOOL_TRIM     != arg[0]
5375        && VG_USERREQ__MOVE_MEMPOOL     != arg[0]
5376        && VG_USERREQ__MEMPOOL_CHANGE   != arg[0]
5377        && VG_USERREQ__MEMPOOL_EXISTS   != arg[0]
5378        && VG_USERREQ__GDB_MONITOR_COMMAND   != arg[0])
5379       return False;
5380 
5381    switch (arg[0]) {
5382       case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE:
5383          ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
5384          if (!ok)
5385             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
5386          *ret = ok ? (UWord)NULL : bad_addr;
5387          break;
5388 
5389       case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
5390          Bool errorV    = False;
5391          Addr bad_addrV = 0;
5392          UInt otagV     = 0;
5393          Bool errorA    = False;
5394          Addr bad_addrA = 0;
5395          is_mem_defined_comprehensive(
5396             arg[1], arg[2],
5397             &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
5398          );
5399          if (errorV) {
5400             MC_(record_user_error) ( tid, bad_addrV,
5401                                      /*isAddrErr*/False, otagV );
5402          }
5403          if (errorA) {
5404             MC_(record_user_error) ( tid, bad_addrA,
5405                                      /*isAddrErr*/True, 0 );
5406          }
5407          /* Return the lower of the two erring addresses, if any. */
5408          *ret = 0;
5409          if (errorV && !errorA) {
5410             *ret = bad_addrV;
5411          }
5412          if (!errorV && errorA) {
5413             *ret = bad_addrA;
5414          }
5415          if (errorV && errorA) {
5416             *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
5417          }
5418          break;
5419       }
5420 
5421       case VG_USERREQ__DO_LEAK_CHECK: {
5422          LeakCheckParams lcp;
5423 
5424          if (arg[1] == 0)
5425             lcp.mode = LC_Full;
5426          else if (arg[1] == 1)
5427             lcp.mode = LC_Summary;
5428          else {
5429             VG_(message)(Vg_UserMsg,
5430                          "Warning: unknown memcheck leak search mode\n");
5431             lcp.mode = LC_Full;
5432          }
5433 
5434          lcp.show_reachable = MC_(clo_show_reachable);
5435          lcp.show_possibly_lost = MC_(clo_show_possibly_lost);
5436 
5437          if (arg[2] == 0)
5438             lcp.deltamode = LCD_Any;
5439          else if (arg[2] == 1)
5440             lcp.deltamode = LCD_Increased;
5441          else if (arg[2] == 2)
5442             lcp.deltamode = LCD_Changed;
5443          else {
5444             VG_(message)
5445                (Vg_UserMsg,
5446                 "Warning: unknown memcheck leak search deltamode\n");
5447             lcp.deltamode = LCD_Any;
5448          }
5449          lcp.max_loss_records_output = 999999999;
5450          lcp.requested_by_monitor_command = False;
5451 
5452          MC_(detect_memory_leaks)(tid, &lcp);
5453          *ret = 0; /* return value is meaningless */
5454          break;
5455       }
5456 
5457       case VG_USERREQ__MAKE_MEM_NOACCESS:
5458          MC_(make_mem_noaccess) ( arg[1], arg[2] );
5459          *ret = -1;
5460          break;
5461 
5462       case VG_USERREQ__MAKE_MEM_UNDEFINED:
5463          make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
5464                                               MC_OKIND_USER );
5465          *ret = -1;
5466          break;
5467 
5468       case VG_USERREQ__MAKE_MEM_DEFINED:
5469          MC_(make_mem_defined) ( arg[1], arg[2] );
5470          *ret = -1;
5471          break;
5472 
5473       case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
5474          make_mem_defined_if_addressable ( arg[1], arg[2] );
5475          *ret = -1;
5476          break;
5477 
5478       case VG_USERREQ__CREATE_BLOCK: /* describe a block */
5479          if (arg[1] != 0 && arg[2] != 0) {
5480             i = alloc_client_block();
5481             /* VG_(printf)("allocated %d %p\n", i, cgbs); */
5482             cgbs[i].start = arg[1];
5483             cgbs[i].size  = arg[2];
5484             cgbs[i].desc  = VG_(strdup)("mc.mhcr.1", (Char *)arg[3]);
5485             cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
5486             *ret = i;
5487          } else
5488             *ret = -1;
5489          break;
5490 
5491       case VG_USERREQ__DISCARD: /* discard */
5492          if (cgbs == NULL
5493              || arg[2] >= cgb_used ||
5494              (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
5495             *ret = 1;
5496          } else {
5497             tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
5498             cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
5499             VG_(free)(cgbs[arg[2]].desc);
5500             cgb_discards++;
5501             *ret = 0;
5502          }
5503          break;
5504 
5505       case VG_USERREQ__GET_VBITS:
5506          *ret = mc_get_or_set_vbits_for_client
5507                    ( arg[1], arg[2], arg[3],
5508                      False /* get them */,
5509                      True /* is client request */ );
5510          break;
5511 
5512       case VG_USERREQ__SET_VBITS:
5513          *ret = mc_get_or_set_vbits_for_client
5514                    ( arg[1], arg[2], arg[3],
5515                      True /* set them */,
5516                      True /* is client request */ );
5517          break;
5518 
5519       case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
5520          UWord** argp = (UWord**)arg;
5521          // MC_(bytes_leaked) et al were set by the last leak check (or zero
5522          // if no prior leak checks performed).
5523          *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
5524          *argp[2] = MC_(bytes_dubious);
5525          *argp[3] = MC_(bytes_reachable);
5526          *argp[4] = MC_(bytes_suppressed);
5527          // there is no argp[5]
5528          //*argp[5] = MC_(bytes_indirect);
5529          // XXX need to make *argp[1-4] defined;  currently done in the
5530          // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
5531          *ret = 0;
5532          return True;
5533       }
5534       case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
5535          UWord** argp = (UWord**)arg;
5536          // MC_(blocks_leaked) et al were set by the last leak check (or zero
5537          // if no prior leak checks performed).
5538          *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
5539          *argp[2] = MC_(blocks_dubious);
5540          *argp[3] = MC_(blocks_reachable);
5541          *argp[4] = MC_(blocks_suppressed);
5542          // there is no argp[5]
5543          //*argp[5] = MC_(blocks_indirect);
5544          // XXX need to make *argp[1-4] defined;  currently done in the
5545          // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
5546          *ret = 0;
5547          return True;
5548       }
5549       case VG_USERREQ__MALLOCLIKE_BLOCK: {
5550          Addr p         = (Addr)arg[1];
5551          SizeT sizeB    =       arg[2];
5552          UInt rzB       =       arg[3];
5553          Bool is_zeroed = (Bool)arg[4];
5554 
5555          MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
5556                           MC_AllocCustom, MC_(malloc_list) );
5557          if (rzB > 0) {
5558             MC_(make_mem_noaccess) ( p - rzB, rzB);
5559             MC_(make_mem_noaccess) ( p + sizeB, rzB);
5560          }
5561          return True;
5562       }
5563       case VG_USERREQ__RESIZEINPLACE_BLOCK: {
5564          Addr p         = (Addr)arg[1];
5565          SizeT oldSizeB =       arg[2];
5566          SizeT newSizeB =       arg[3];
5567          UInt rzB       =       arg[4];
5568 
5569          MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
5570          return True;
5571       }
5572       case VG_USERREQ__FREELIKE_BLOCK: {
5573          Addr p         = (Addr)arg[1];
5574          UInt rzB       =       arg[2];
5575 
5576          MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
5577          return True;
5578       }
5579 
5580       case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
5581          Char* s   = (Char*)arg[1];
5582          Addr  dst = (Addr) arg[2];
5583          Addr  src = (Addr) arg[3];
5584          SizeT len = (SizeT)arg[4];
5585          MC_(record_overlap_error)(tid, s, src, dst, len);
5586          return True;
5587       }
5588 
5589       case VG_USERREQ__CREATE_MEMPOOL: {
5590          Addr pool      = (Addr)arg[1];
5591          UInt rzB       =       arg[2];
5592          Bool is_zeroed = (Bool)arg[3];
5593 
5594          MC_(create_mempool) ( pool, rzB, is_zeroed );
5595          return True;
5596       }
5597 
5598       case VG_USERREQ__DESTROY_MEMPOOL: {
5599          Addr pool      = (Addr)arg[1];
5600 
5601          MC_(destroy_mempool) ( pool );
5602          return True;
5603       }
5604 
5605       case VG_USERREQ__MEMPOOL_ALLOC: {
5606          Addr pool      = (Addr)arg[1];
5607          Addr addr      = (Addr)arg[2];
5608          UInt size      =       arg[3];
5609 
5610          MC_(mempool_alloc) ( tid, pool, addr, size );
5611          return True;
5612       }
5613 
5614       case VG_USERREQ__MEMPOOL_FREE: {
5615          Addr pool      = (Addr)arg[1];
5616          Addr addr      = (Addr)arg[2];
5617 
5618          MC_(mempool_free) ( pool, addr );
5619          return True;
5620       }
5621 
5622       case VG_USERREQ__MEMPOOL_TRIM: {
5623          Addr pool      = (Addr)arg[1];
5624          Addr addr      = (Addr)arg[2];
5625          UInt size      =       arg[3];
5626 
5627          MC_(mempool_trim) ( pool, addr, size );
5628          return True;
5629       }
5630 
5631       case VG_USERREQ__MOVE_MEMPOOL: {
5632          Addr poolA     = (Addr)arg[1];
5633          Addr poolB     = (Addr)arg[2];
5634 
5635          MC_(move_mempool) ( poolA, poolB );
5636          return True;
5637       }
5638 
5639       case VG_USERREQ__MEMPOOL_CHANGE: {
5640          Addr pool      = (Addr)arg[1];
5641          Addr addrA     = (Addr)arg[2];
5642          Addr addrB     = (Addr)arg[3];
5643          UInt size      =       arg[4];
5644 
5645          MC_(mempool_change) ( pool, addrA, addrB, size );
5646          return True;
5647       }
5648 
5649       case VG_USERREQ__MEMPOOL_EXISTS: {
5650          Addr pool      = (Addr)arg[1];
5651 
5652          *ret = (UWord) MC_(mempool_exists) ( pool );
5653 	 return True;
5654       }
5655 
5656       case VG_USERREQ__GDB_MONITOR_COMMAND: {
5657          Bool handled = handle_gdb_monitor_command (tid, (Char*)arg[1]);
5658          if (handled)
5659             *ret = 1;
5660          else
5661             *ret = 0;
5662          return handled;
5663       }
5664 
5665       default:
5666          VG_(message)(
5667             Vg_UserMsg,
5668             "Warning: unknown memcheck client request code %llx\n",
5669             (ULong)arg[0]
5670          );
5671          return False;
5672    }
5673    return True;
5674 }
5675 
5676 
5677 /*------------------------------------------------------------*/
5678 /*--- Crude profiling machinery.                           ---*/
5679 /*------------------------------------------------------------*/
5680 
5681 // We track a number of interesting events (using PROF_EVENT)
5682 // if MC_PROFILE_MEMORY is defined.
5683 
5684 #ifdef MC_PROFILE_MEMORY
5685 
5686 UInt   MC_(event_ctr)[N_PROF_EVENTS];
5687 HChar* MC_(event_ctr_name)[N_PROF_EVENTS];
5688 
init_prof_mem(void)5689 static void init_prof_mem ( void )
5690 {
5691    Int i;
5692    for (i = 0; i < N_PROF_EVENTS; i++) {
5693       MC_(event_ctr)[i] = 0;
5694       MC_(event_ctr_name)[i] = NULL;
5695    }
5696 }
5697 
done_prof_mem(void)5698 static void done_prof_mem ( void )
5699 {
5700    Int  i;
5701    Bool spaced = False;
5702    for (i = 0; i < N_PROF_EVENTS; i++) {
5703       if (!spaced && (i % 10) == 0) {
5704          VG_(printf)("\n");
5705          spaced = True;
5706       }
5707       if (MC_(event_ctr)[i] > 0) {
5708          spaced = False;
5709          VG_(printf)( "prof mem event %3d: %9d   %s\n",
5710                       i, MC_(event_ctr)[i],
5711                       MC_(event_ctr_name)[i]
5712                          ? MC_(event_ctr_name)[i] : "unnamed");
5713       }
5714    }
5715 }
5716 
5717 #else
5718 
init_prof_mem(void)5719 static void init_prof_mem ( void ) { }
done_prof_mem(void)5720 static void done_prof_mem ( void ) { }
5721 
5722 #endif
5723 
5724 
5725 /*------------------------------------------------------------*/
5726 /*--- Origin tracking stuff                                ---*/
5727 /*------------------------------------------------------------*/
5728 
5729 /*--------------------------------------------*/
5730 /*--- Origin tracking: load handlers       ---*/
5731 /*--------------------------------------------*/
5732 
merge_origins(UInt or1,UInt or2)5733 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
5734    return or1 > or2 ? or1 : or2;
5735 }
5736 
MC_(helperc_b_load1)5737 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
5738    OCacheLine* line;
5739    UChar descr;
5740    UWord lineoff = oc_line_offset(a);
5741    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
5742 
5743    if (OC_ENABLE_ASSERTIONS) {
5744       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5745    }
5746 
5747    line = find_OCacheLine( a );
5748 
5749    descr = line->descr[lineoff];
5750    if (OC_ENABLE_ASSERTIONS) {
5751       tl_assert(descr < 0x10);
5752    }
5753 
5754    if (LIKELY(0 == (descr & (1 << byteoff))))  {
5755       return 0;
5756    } else {
5757       return line->w32[lineoff];
5758    }
5759 }
5760 
MC_(helperc_b_load2)5761 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
5762    OCacheLine* line;
5763    UChar descr;
5764    UWord lineoff, byteoff;
5765 
5766    if (UNLIKELY(a & 1)) {
5767       /* Handle misaligned case, slowly. */
5768       UInt oLo   = (UInt)MC_(helperc_b_load1)( a + 0 );
5769       UInt oHi   = (UInt)MC_(helperc_b_load1)( a + 1 );
5770       return merge_origins(oLo, oHi);
5771    }
5772 
5773    lineoff = oc_line_offset(a);
5774    byteoff = a & 3; /* 0 or 2 */
5775 
5776    if (OC_ENABLE_ASSERTIONS) {
5777       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5778    }
5779    line = find_OCacheLine( a );
5780 
5781    descr = line->descr[lineoff];
5782    if (OC_ENABLE_ASSERTIONS) {
5783       tl_assert(descr < 0x10);
5784    }
5785 
5786    if (LIKELY(0 == (descr & (3 << byteoff)))) {
5787       return 0;
5788    } else {
5789       return line->w32[lineoff];
5790    }
5791 }
5792 
MC_(helperc_b_load4)5793 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
5794    OCacheLine* line;
5795    UChar descr;
5796    UWord lineoff;
5797 
5798    if (UNLIKELY(a & 3)) {
5799       /* Handle misaligned case, slowly. */
5800       UInt oLo   = (UInt)MC_(helperc_b_load2)( a + 0 );
5801       UInt oHi   = (UInt)MC_(helperc_b_load2)( a + 2 );
5802       return merge_origins(oLo, oHi);
5803    }
5804 
5805    lineoff = oc_line_offset(a);
5806    if (OC_ENABLE_ASSERTIONS) {
5807       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5808    }
5809 
5810    line = find_OCacheLine( a );
5811 
5812    descr = line->descr[lineoff];
5813    if (OC_ENABLE_ASSERTIONS) {
5814       tl_assert(descr < 0x10);
5815    }
5816 
5817    if (LIKELY(0 == descr)) {
5818       return 0;
5819    } else {
5820       return line->w32[lineoff];
5821    }
5822 }
5823 
MC_(helperc_b_load8)5824 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
5825    OCacheLine* line;
5826    UChar descrLo, descrHi, descr;
5827    UWord lineoff;
5828 
5829    if (UNLIKELY(a & 7)) {
5830       /* Handle misaligned case, slowly. */
5831       UInt oLo   = (UInt)MC_(helperc_b_load4)( a + 0 );
5832       UInt oHi   = (UInt)MC_(helperc_b_load4)( a + 4 );
5833       return merge_origins(oLo, oHi);
5834    }
5835 
5836    lineoff = oc_line_offset(a);
5837    if (OC_ENABLE_ASSERTIONS) {
5838       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
5839    }
5840 
5841    line = find_OCacheLine( a );
5842 
5843    descrLo = line->descr[lineoff + 0];
5844    descrHi = line->descr[lineoff + 1];
5845    descr   = descrLo | descrHi;
5846    if (OC_ENABLE_ASSERTIONS) {
5847       tl_assert(descr < 0x10);
5848    }
5849 
5850    if (LIKELY(0 == descr)) {
5851       return 0; /* both 32-bit chunks are defined */
5852    } else {
5853       UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
5854       UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
5855       return merge_origins(oLo, oHi);
5856    }
5857 }
5858 
MC_(helperc_b_load16)5859 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
5860    UInt oLo   = (UInt)MC_(helperc_b_load8)( a + 0 );
5861    UInt oHi   = (UInt)MC_(helperc_b_load8)( a + 8 );
5862    UInt oBoth = merge_origins(oLo, oHi);
5863    return (UWord)oBoth;
5864 }
5865 
MC_(helperc_b_load32)5866 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
5867    UInt oQ0   = (UInt)MC_(helperc_b_load8)( a + 0 );
5868    UInt oQ1   = (UInt)MC_(helperc_b_load8)( a + 8 );
5869    UInt oQ2   = (UInt)MC_(helperc_b_load8)( a + 16 );
5870    UInt oQ3   = (UInt)MC_(helperc_b_load8)( a + 24 );
5871    UInt oAll  = merge_origins(merge_origins(oQ0, oQ1),
5872                               merge_origins(oQ2, oQ3));
5873    return (UWord)oAll;
5874 }
5875 
5876 
5877 /*--------------------------------------------*/
5878 /*--- Origin tracking: store handlers      ---*/
5879 /*--------------------------------------------*/
5880 
MC_(helperc_b_store1)5881 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
5882    OCacheLine* line;
5883    UWord lineoff = oc_line_offset(a);
5884    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
5885 
5886    if (OC_ENABLE_ASSERTIONS) {
5887       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5888    }
5889 
5890    line = find_OCacheLine( a );
5891 
5892    if (d32 == 0) {
5893       line->descr[lineoff] &= ~(1 << byteoff);
5894    } else {
5895       line->descr[lineoff] |= (1 << byteoff);
5896       line->w32[lineoff] = d32;
5897    }
5898 }
5899 
MC_(helperc_b_store2)5900 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
5901    OCacheLine* line;
5902    UWord lineoff, byteoff;
5903 
5904    if (UNLIKELY(a & 1)) {
5905       /* Handle misaligned case, slowly. */
5906       MC_(helperc_b_store1)( a + 0, d32 );
5907       MC_(helperc_b_store1)( a + 1, d32 );
5908       return;
5909    }
5910 
5911    lineoff = oc_line_offset(a);
5912    byteoff = a & 3; /* 0 or 2 */
5913 
5914    if (OC_ENABLE_ASSERTIONS) {
5915       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5916    }
5917 
5918    line = find_OCacheLine( a );
5919 
5920    if (d32 == 0) {
5921       line->descr[lineoff] &= ~(3 << byteoff);
5922    } else {
5923       line->descr[lineoff] |= (3 << byteoff);
5924       line->w32[lineoff] = d32;
5925    }
5926 }
5927 
MC_(helperc_b_store4)5928 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
5929    OCacheLine* line;
5930    UWord lineoff;
5931 
5932    if (UNLIKELY(a & 3)) {
5933       /* Handle misaligned case, slowly. */
5934       MC_(helperc_b_store2)( a + 0, d32 );
5935       MC_(helperc_b_store2)( a + 2, d32 );
5936       return;
5937    }
5938 
5939    lineoff = oc_line_offset(a);
5940    if (OC_ENABLE_ASSERTIONS) {
5941       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5942    }
5943 
5944    line = find_OCacheLine( a );
5945 
5946    if (d32 == 0) {
5947       line->descr[lineoff] = 0;
5948    } else {
5949       line->descr[lineoff] = 0xF;
5950       line->w32[lineoff] = d32;
5951    }
5952 }
5953 
MC_(helperc_b_store8)5954 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
5955    OCacheLine* line;
5956    UWord lineoff;
5957 
5958    if (UNLIKELY(a & 7)) {
5959       /* Handle misaligned case, slowly. */
5960       MC_(helperc_b_store4)( a + 0, d32 );
5961       MC_(helperc_b_store4)( a + 4, d32 );
5962       return;
5963    }
5964 
5965    lineoff = oc_line_offset(a);
5966    if (OC_ENABLE_ASSERTIONS) {
5967       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
5968    }
5969 
5970    line = find_OCacheLine( a );
5971 
5972    if (d32 == 0) {
5973       line->descr[lineoff + 0] = 0;
5974       line->descr[lineoff + 1] = 0;
5975    } else {
5976       line->descr[lineoff + 0] = 0xF;
5977       line->descr[lineoff + 1] = 0xF;
5978       line->w32[lineoff + 0] = d32;
5979       line->w32[lineoff + 1] = d32;
5980    }
5981 }
5982 
MC_(helperc_b_store16)5983 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
5984    MC_(helperc_b_store8)( a + 0, d32 );
5985    MC_(helperc_b_store8)( a + 8, d32 );
5986 }
5987 
MC_(helperc_b_store32)5988 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
5989    MC_(helperc_b_store8)( a +  0, d32 );
5990    MC_(helperc_b_store8)( a +  8, d32 );
5991    MC_(helperc_b_store8)( a + 16, d32 );
5992    MC_(helperc_b_store8)( a + 24, d32 );
5993 }
5994 
5995 
5996 /*--------------------------------------------*/
5997 /*--- Origin tracking: sarp handlers       ---*/
5998 /*--------------------------------------------*/
5999 
6000 __attribute__((noinline))
ocache_sarp_Set_Origins(Addr a,UWord len,UInt otag)6001 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
6002    if ((a & 1) && len >= 1) {
6003       MC_(helperc_b_store1)( a, otag );
6004       a++;
6005       len--;
6006    }
6007    if ((a & 2) && len >= 2) {
6008       MC_(helperc_b_store2)( a, otag );
6009       a += 2;
6010       len -= 2;
6011    }
6012    if (len >= 4)
6013       tl_assert(0 == (a & 3));
6014    while (len >= 4) {
6015       MC_(helperc_b_store4)( a, otag );
6016       a += 4;
6017       len -= 4;
6018    }
6019    if (len >= 2) {
6020       MC_(helperc_b_store2)( a, otag );
6021       a += 2;
6022       len -= 2;
6023    }
6024    if (len >= 1) {
6025       MC_(helperc_b_store1)( a, otag );
6026       //a++;
6027       len--;
6028    }
6029    tl_assert(len == 0);
6030 }
6031 
6032 __attribute__((noinline))
ocache_sarp_Clear_Origins(Addr a,UWord len)6033 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
6034    if ((a & 1) && len >= 1) {
6035       MC_(helperc_b_store1)( a, 0 );
6036       a++;
6037       len--;
6038    }
6039    if ((a & 2) && len >= 2) {
6040       MC_(helperc_b_store2)( a, 0 );
6041       a += 2;
6042       len -= 2;
6043    }
6044    if (len >= 4)
6045       tl_assert(0 == (a & 3));
6046    while (len >= 4) {
6047       MC_(helperc_b_store4)( a, 0 );
6048       a += 4;
6049       len -= 4;
6050    }
6051    if (len >= 2) {
6052       MC_(helperc_b_store2)( a, 0 );
6053       a += 2;
6054       len -= 2;
6055    }
6056    if (len >= 1) {
6057       MC_(helperc_b_store1)( a, 0 );
6058       //a++;
6059       len--;
6060    }
6061    tl_assert(len == 0);
6062 }
6063 
6064 
6065 /*------------------------------------------------------------*/
6066 /*--- Setup and finalisation                               ---*/
6067 /*------------------------------------------------------------*/
6068 
mc_post_clo_init(void)6069 static void mc_post_clo_init ( void )
6070 {
6071    /* If we've been asked to emit XML, mash around various other
6072       options so as to constrain the output somewhat. */
6073    if (VG_(clo_xml)) {
6074       /* Extract as much info as possible from the leak checker. */
6075       /* MC_(clo_show_reachable) = True; */
6076       MC_(clo_leak_check) = LC_Full;
6077    }
6078 
6079    if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol))
6080       VG_(message)(Vg_UserMsg,
6081                    "Warning: --freelist-big-blocks value %lld has no effect\n"
6082                    "as it is >= to --freelist-vol value %lld\n",
6083                    MC_(clo_freelist_big_blocks),
6084                    MC_(clo_freelist_vol));
6085 
6086    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
6087 
6088    if (MC_(clo_mc_level) == 3) {
6089       /* We're doing origin tracking. */
6090 #     ifdef PERF_FAST_STACK
6091       VG_(track_new_mem_stack_4_w_ECU)   ( mc_new_mem_stack_4_w_ECU   );
6092       VG_(track_new_mem_stack_8_w_ECU)   ( mc_new_mem_stack_8_w_ECU   );
6093       VG_(track_new_mem_stack_12_w_ECU)  ( mc_new_mem_stack_12_w_ECU  );
6094       VG_(track_new_mem_stack_16_w_ECU)  ( mc_new_mem_stack_16_w_ECU  );
6095       VG_(track_new_mem_stack_32_w_ECU)  ( mc_new_mem_stack_32_w_ECU  );
6096       VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
6097       VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
6098       VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
6099       VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
6100 #     endif
6101       VG_(track_new_mem_stack_w_ECU)     ( mc_new_mem_stack_w_ECU     );
6102    } else {
6103       /* Not doing origin tracking */
6104 #     ifdef PERF_FAST_STACK
6105       VG_(track_new_mem_stack_4)   ( mc_new_mem_stack_4   );
6106       VG_(track_new_mem_stack_8)   ( mc_new_mem_stack_8   );
6107       VG_(track_new_mem_stack_12)  ( mc_new_mem_stack_12  );
6108       VG_(track_new_mem_stack_16)  ( mc_new_mem_stack_16  );
6109       VG_(track_new_mem_stack_32)  ( mc_new_mem_stack_32  );
6110       VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
6111       VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
6112       VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
6113       VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
6114 #     endif
6115       VG_(track_new_mem_stack)     ( mc_new_mem_stack     );
6116    }
6117 
6118    /* This origin tracking cache is huge (~100M), so only initialise
6119       if we need it. */
6120    if (MC_(clo_mc_level) >= 3) {
6121       init_OCache();
6122       tl_assert(ocacheL1 != NULL);
6123       tl_assert(ocacheL2 != NULL);
6124    } else {
6125       tl_assert(ocacheL1 == NULL);
6126       tl_assert(ocacheL2 == NULL);
6127    }
6128 
6129    /* Do not check definedness of guest state if --undef-value-errors=no */
6130    if (MC_(clo_mc_level) >= 2)
6131       VG_(track_pre_reg_read) ( mc_pre_reg_read );
6132 }
6133 
print_SM_info(char * type,int n_SMs)6134 static void print_SM_info(char* type, int n_SMs)
6135 {
6136    VG_(message)(Vg_DebugMsg,
6137       " memcheck: SMs: %s = %d (%ldk, %ldM)\n",
6138       type,
6139       n_SMs,
6140       n_SMs * sizeof(SecMap) / 1024UL,
6141       n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
6142 }
6143 
mc_fini(Int exitcode)6144 static void mc_fini ( Int exitcode )
6145 {
6146    MC_(print_malloc_stats)();
6147 
6148    if (MC_(clo_leak_check) != LC_Off) {
6149       LeakCheckParams lcp;
6150       lcp.mode = MC_(clo_leak_check);
6151       lcp.show_reachable = MC_(clo_show_reachable);
6152       lcp.show_possibly_lost = MC_(clo_show_possibly_lost);
6153       lcp.deltamode = LCD_Any;
6154       lcp.max_loss_records_output = 999999999;
6155       lcp.requested_by_monitor_command = False;
6156       MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
6157    } else {
6158       if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6159          VG_(umsg)(
6160             "For a detailed leak analysis, rerun with: --leak-check=full\n"
6161             "\n"
6162          );
6163       }
6164    }
6165 
6166    if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6167       VG_(message)(Vg_UserMsg,
6168                    "For counts of detected and suppressed errors, rerun with: -v\n");
6169    }
6170 
6171    if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
6172        && MC_(clo_mc_level) == 2) {
6173       VG_(message)(Vg_UserMsg,
6174                    "Use --track-origins=yes to see where "
6175                    "uninitialised values come from\n");
6176    }
6177 
6178    done_prof_mem();
6179 
6180    if (VG_(clo_stats)) {
6181       SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
6182 
6183       VG_(message)(Vg_DebugMsg,
6184          " memcheck: sanity checks: %d cheap, %d expensive\n",
6185          n_sanity_cheap, n_sanity_expensive );
6186       VG_(message)(Vg_DebugMsg,
6187          " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n",
6188          n_auxmap_L2_nodes,
6189          n_auxmap_L2_nodes * 64,
6190          n_auxmap_L2_nodes / 16 );
6191       VG_(message)(Vg_DebugMsg,
6192          " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n",
6193          n_auxmap_L1_searches, n_auxmap_L1_cmps,
6194          (10ULL * n_auxmap_L1_cmps)
6195             / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
6196       );
6197       VG_(message)(Vg_DebugMsg,
6198          " memcheck: auxmaps_L2: %lld searches, %lld nodes\n",
6199          n_auxmap_L2_searches, n_auxmap_L2_nodes
6200       );
6201 
6202       print_SM_info("n_issued     ", n_issued_SMs);
6203       print_SM_info("n_deissued   ", n_deissued_SMs);
6204       print_SM_info("max_noaccess ", max_noaccess_SMs);
6205       print_SM_info("max_undefined", max_undefined_SMs);
6206       print_SM_info("max_defined  ", max_defined_SMs);
6207       print_SM_info("max_non_DSM  ", max_non_DSM_SMs);
6208 
6209       // Three DSMs, plus the non-DSM ones
6210       max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
6211       // The 3*sizeof(Word) bytes is the AVL node metadata size.
6212       // The VG_ROUNDUP is because the OSet pool allocator will/must align
6213       // the elements on pointer size.
6214       // Note that the pool allocator has some additional small overhead
6215       // which is not counted in the below.
6216       // Hardwiring this logic sucks, but I don't see how else to do it.
6217       max_secVBit_szB = max_secVBit_nodes *
6218             (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
6219       max_shmem_szB   = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
6220 
6221       VG_(message)(Vg_DebugMsg,
6222          " memcheck: max sec V bit nodes:    %d (%ldk, %ldM)\n",
6223          max_secVBit_nodes, max_secVBit_szB / 1024,
6224                             max_secVBit_szB / (1024 * 1024));
6225       VG_(message)(Vg_DebugMsg,
6226          " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
6227          sec_vbits_new_nodes + sec_vbits_updates,
6228          sec_vbits_new_nodes, sec_vbits_updates );
6229       VG_(message)(Vg_DebugMsg,
6230          " memcheck: max shadow mem size:   %ldk, %ldM\n",
6231          max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
6232 
6233       if (MC_(clo_mc_level) >= 3) {
6234          VG_(message)(Vg_DebugMsg,
6235                       " ocacheL1: %'12lu refs   %'12lu misses (%'lu lossage)\n",
6236                       stats_ocacheL1_find,
6237                       stats_ocacheL1_misses,
6238                       stats_ocacheL1_lossage );
6239          VG_(message)(Vg_DebugMsg,
6240                       " ocacheL1: %'12lu at 0   %'12lu at 1\n",
6241                       stats_ocacheL1_find - stats_ocacheL1_misses
6242                          - stats_ocacheL1_found_at_1
6243                          - stats_ocacheL1_found_at_N,
6244                       stats_ocacheL1_found_at_1 );
6245          VG_(message)(Vg_DebugMsg,
6246                       " ocacheL1: %'12lu at 2+  %'12lu move-fwds\n",
6247                       stats_ocacheL1_found_at_N,
6248                       stats_ocacheL1_movefwds );
6249          VG_(message)(Vg_DebugMsg,
6250                       " ocacheL1: %'12lu sizeB  %'12u useful\n",
6251                       (UWord)sizeof(OCache),
6252                       4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
6253          VG_(message)(Vg_DebugMsg,
6254                       " ocacheL2: %'12lu refs   %'12lu misses\n",
6255                       stats__ocacheL2_refs,
6256                       stats__ocacheL2_misses );
6257          VG_(message)(Vg_DebugMsg,
6258                       " ocacheL2:    %'9lu max nodes %'9lu curr nodes\n",
6259                       stats__ocacheL2_n_nodes_max,
6260                       stats__ocacheL2_n_nodes );
6261          VG_(message)(Vg_DebugMsg,
6262                       " niacache: %'12lu refs   %'12lu misses\n",
6263                       stats__nia_cache_queries, stats__nia_cache_misses);
6264       } else {
6265          tl_assert(ocacheL1 == NULL);
6266          tl_assert(ocacheL2 == NULL);
6267       }
6268    }
6269 
6270    if (0) {
6271       VG_(message)(Vg_DebugMsg,
6272         "------ Valgrind's client block stats follow ---------------\n" );
6273       show_client_block_stats();
6274    }
6275 }
6276 
6277 /* mark the given addr/len unaddressable for watchpoint implementation
6278    The PointKind will be handled at access time */
mc_mark_unaddressable_for_watchpoint(PointKind kind,Bool insert,Addr addr,SizeT len)6279 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
6280                                                   Addr addr, SizeT len)
6281 {
6282    /* GDBTD this is somewhat fishy. We might rather have to save the previous
6283       accessibility and definedness in gdbserver so as to allow restoring it
6284       properly. Currently, we assume that the user only watches things
6285       which are properly addressable and defined */
6286    if (insert)
6287       MC_(make_mem_noaccess) (addr, len);
6288    else
6289       MC_(make_mem_defined)  (addr, len);
6290    return True;
6291 }
6292 
mc_pre_clo_init(void)6293 static void mc_pre_clo_init(void)
6294 {
6295    VG_(details_name)            ("Memcheck");
6296    VG_(details_version)         (NULL);
6297    VG_(details_description)     ("a memory error detector");
6298    VG_(details_copyright_author)(
6299       "Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.");
6300    VG_(details_bug_reports_to)  (VG_BUGS_TO);
6301    VG_(details_avg_translation_sizeB) ( 640 );
6302 
6303    VG_(basic_tool_funcs)          (mc_post_clo_init,
6304                                    MC_(instrument),
6305                                    mc_fini);
6306 
6307    VG_(needs_final_IR_tidy_pass)  ( MC_(final_tidy) );
6308 
6309 
6310    VG_(needs_core_errors)         ();
6311    VG_(needs_tool_errors)         (MC_(eq_Error),
6312                                    MC_(before_pp_Error),
6313                                    MC_(pp_Error),
6314                                    True,/*show TIDs for errors*/
6315                                    MC_(update_Error_extra),
6316                                    MC_(is_recognised_suppression),
6317                                    MC_(read_extra_suppression_info),
6318                                    MC_(error_matches_suppression),
6319                                    MC_(get_error_name),
6320                                    MC_(get_extra_suppression_info));
6321    VG_(needs_libc_freeres)        ();
6322    VG_(needs_command_line_options)(mc_process_cmd_line_options,
6323                                    mc_print_usage,
6324                                    mc_print_debug_usage);
6325    VG_(needs_client_requests)     (mc_handle_client_request);
6326    VG_(needs_sanity_checks)       (mc_cheap_sanity_check,
6327                                    mc_expensive_sanity_check);
6328    VG_(needs_malloc_replacement)  (MC_(malloc),
6329                                    MC_(__builtin_new),
6330                                    MC_(__builtin_vec_new),
6331                                    MC_(memalign),
6332                                    MC_(calloc),
6333                                    MC_(free),
6334                                    MC_(__builtin_delete),
6335                                    MC_(__builtin_vec_delete),
6336                                    MC_(realloc),
6337                                    MC_(malloc_usable_size),
6338                                    MC_MALLOC_DEFAULT_REDZONE_SZB );
6339    MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
6340 
6341    VG_(needs_xml_output)          ();
6342 
6343    VG_(track_new_mem_startup)     ( mc_new_mem_startup );
6344    VG_(track_new_mem_stack_signal)( make_mem_undefined_w_tid );
6345    // We assume that brk()/sbrk() does not initialise new memory.  Is this
6346    // accurate?  John Reiser says:
6347    //
6348    //   0) sbrk() can *decrease* process address space.  No zero fill is done
6349    //   for a decrease, not even the fragment on the high end of the last page
6350    //   that is beyond the new highest address.  For maximum safety and
6351    //   portability, then the bytes in the last page that reside above [the
6352    //   new] sbrk(0) should be considered to be uninitialized, but in practice
6353    //   it is exceedingly likely that they will retain their previous
6354    //   contents.
6355    //
6356    //   1) If an increase is large enough to require new whole pages, then
6357    //   those new whole pages (like all new pages) are zero-filled by the
6358    //   operating system.  So if sbrk(0) already is page aligned, then
6359    //   sbrk(PAGE_SIZE) *does* zero-fill the new memory.
6360    //
6361    //   2) Any increase that lies within an existing allocated page is not
6362    //   changed.  So if (x = sbrk(0)) is not page aligned, then
6363    //   sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
6364    //   existing contents, and an additional PAGE_SIZE bytes which are zeroed.
6365    //   ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
6366    //   of them come along for the ride because the operating system deals
6367    //   only in whole pages.  Again, for maximum safety and portability, then
6368    //   anything that lives above [the new] sbrk(0) should be considered
6369    //   uninitialized, but in practice will retain previous contents [zero in
6370    //   this case.]"
6371    //
6372    // In short:
6373    //
6374    //   A key property of sbrk/brk is that new whole pages that are supplied
6375    //   by the operating system *do* get initialized to zero.
6376    //
6377    // As for the portability of all this:
6378    //
6379    //   sbrk and brk are not POSIX.  However, any system that is a derivative
6380    //   of *nix has sbrk and brk because there are too many softwares (such as
6381    //   the Bourne shell) which rely on the traditional memory map (.text,
6382    //   .data+.bss, stack) and the existence of sbrk/brk.
6383    //
6384    // So we should arguably observe all this.  However:
6385    // - The current inaccuracy has caused maybe one complaint in seven years(?)
6386    // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
6387    //   doubt most programmers know the above information.
6388    // So I'm not terribly unhappy with marking it as undefined. --njn.
6389    //
6390    // [More:  I think most of what John said only applies to sbrk().  It seems
6391    // that brk() always deals in whole pages.  And since this event deals
6392    // directly with brk(), not with sbrk(), perhaps it would be reasonable to
6393    // just mark all memory it allocates as defined.]
6394    //
6395    VG_(track_new_mem_brk)         ( make_mem_undefined_w_tid );
6396 
6397    // Handling of mmap and mprotect isn't simple (well, it is simple,
6398    // but the justification isn't.)  See comments above, just prior to
6399    // mc_new_mem_mmap.
6400    VG_(track_new_mem_mmap)        ( mc_new_mem_mmap );
6401    VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
6402 
6403    VG_(track_copy_mem_remap)      ( MC_(copy_address_range_state) );
6404 
6405    VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
6406    VG_(track_die_mem_brk)         ( MC_(make_mem_noaccess) );
6407    VG_(track_die_mem_munmap)      ( MC_(make_mem_noaccess) );
6408 
6409    /* Defer the specification of the new_mem_stack functions to the
6410       post_clo_init function, since we need to first parse the command
6411       line before deciding which set to use. */
6412 
6413 #  ifdef PERF_FAST_STACK
6414    VG_(track_die_mem_stack_4)     ( mc_die_mem_stack_4   );
6415    VG_(track_die_mem_stack_8)     ( mc_die_mem_stack_8   );
6416    VG_(track_die_mem_stack_12)    ( mc_die_mem_stack_12  );
6417    VG_(track_die_mem_stack_16)    ( mc_die_mem_stack_16  );
6418    VG_(track_die_mem_stack_32)    ( mc_die_mem_stack_32  );
6419    VG_(track_die_mem_stack_112)   ( mc_die_mem_stack_112 );
6420    VG_(track_die_mem_stack_128)   ( mc_die_mem_stack_128 );
6421    VG_(track_die_mem_stack_144)   ( mc_die_mem_stack_144 );
6422    VG_(track_die_mem_stack_160)   ( mc_die_mem_stack_160 );
6423 #  endif
6424    VG_(track_die_mem_stack)       ( mc_die_mem_stack     );
6425 
6426    VG_(track_ban_mem_stack)       ( MC_(make_mem_noaccess) );
6427 
6428    VG_(track_pre_mem_read)        ( check_mem_is_defined );
6429    VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
6430    VG_(track_pre_mem_write)       ( check_mem_is_addressable );
6431    VG_(track_post_mem_write)      ( mc_post_mem_write );
6432 
6433    VG_(track_post_reg_write)                  ( mc_post_reg_write );
6434    VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
6435 
6436    VG_(needs_watchpoint)          ( mc_mark_unaddressable_for_watchpoint );
6437 
6438    init_shadow_memory();
6439    MC_(chunk_poolalloc) = VG_(newPA) (sizeof(MC_Chunk),
6440                                       1000,
6441                                       VG_(malloc),
6442                                       "mc.cMC.1 (MC_Chunk pools)",
6443                                       VG_(free));
6444    MC_(malloc_list)  = VG_(HT_construct)( "MC_(malloc_list)" );
6445    MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
6446    init_prof_mem();
6447 
6448    tl_assert( mc_expensive_sanity_check() );
6449 
6450    // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
6451    tl_assert(sizeof(UWord) == sizeof(Addr));
6452    // Call me paranoid.  I don't care.
6453    tl_assert(sizeof(void*) == sizeof(Addr));
6454 
6455    // BYTES_PER_SEC_VBIT_NODE must be a power of two.
6456    tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
6457 
6458    /* This is small.  Always initialise it. */
6459    init_nia_to_ecu_cache();
6460 
6461    /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
6462       if we need to, since the command line args haven't been
6463       processed yet.  Hence defer it to mc_post_clo_init. */
6464    tl_assert(ocacheL1 == NULL);
6465    tl_assert(ocacheL2 == NULL);
6466 
6467    /* Check some important stuff.  See extensive comments above
6468       re UNALIGNED_OR_HIGH for background. */
6469 #  if VG_WORDSIZE == 4
6470    tl_assert(sizeof(void*) == 4);
6471    tl_assert(sizeof(Addr)  == 4);
6472    tl_assert(sizeof(UWord) == 4);
6473    tl_assert(sizeof(Word)  == 4);
6474    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
6475    tl_assert(MASK(1) == 0UL);
6476    tl_assert(MASK(2) == 1UL);
6477    tl_assert(MASK(4) == 3UL);
6478    tl_assert(MASK(8) == 7UL);
6479 #  else
6480    tl_assert(VG_WORDSIZE == 8);
6481    tl_assert(sizeof(void*) == 8);
6482    tl_assert(sizeof(Addr)  == 8);
6483    tl_assert(sizeof(UWord) == 8);
6484    tl_assert(sizeof(Word)  == 8);
6485    tl_assert(MAX_PRIMARY_ADDRESS == 0x7FFFFFFFFULL);
6486    tl_assert(MASK(1) == 0xFFFFFFF800000000ULL);
6487    tl_assert(MASK(2) == 0xFFFFFFF800000001ULL);
6488    tl_assert(MASK(4) == 0xFFFFFFF800000003ULL);
6489    tl_assert(MASK(8) == 0xFFFFFFF800000007ULL);
6490 #  endif
6491 }
6492 
6493 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
6494 
6495 /*--------------------------------------------------------------------*/
6496 /*--- end                                                mc_main.c ---*/
6497 /*--------------------------------------------------------------------*/
6498