• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- MemCheck: Maintain bitmaps of memory, tracking the           ---*/
4 /*--- accessibility (A) and validity (V) status of each byte.      ---*/
5 /*---                                                    mc_main.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of MemCheck, a heavyweight Valgrind tool for
10    detecting memory errors.
11 
12    Copyright (C) 2000-2010 Julian Seward
13       jseward@acm.org
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 
33 #include "pub_tool_basics.h"
34 #include "pub_tool_aspacemgr.h"
35 #include "pub_tool_hashtable.h"     // For mc_include.h
36 #include "pub_tool_libcbase.h"
37 #include "pub_tool_libcassert.h"
38 #include "pub_tool_libcprint.h"
39 #include "pub_tool_machine.h"
40 #include "pub_tool_mallocfree.h"
41 #include "pub_tool_options.h"
42 #include "pub_tool_oset.h"
43 #include "pub_tool_replacemalloc.h"
44 #include "pub_tool_tooliface.h"
45 #include "pub_tool_threadstate.h"
46 
47 #include "mc_include.h"
48 #include "memcheck.h"   /* for client requests */
49 
50 
51 /* Set to 1 to do a little more sanity checking */
52 #define VG_DEBUG_MEMORY 0
53 
54 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
55 
56 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
57 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
58 
59 
60 /*------------------------------------------------------------*/
61 /*--- Fast-case knobs                                      ---*/
62 /*------------------------------------------------------------*/
63 
64 // Comment these out to disable the fast cases (don't just set them to zero).
65 
66 #define PERF_FAST_LOADV    1
67 #define PERF_FAST_STOREV   1
68 
69 #define PERF_FAST_SARP     1
70 
71 #define PERF_FAST_STACK    1
72 #define PERF_FAST_STACK2   1
73 
74 /* Change this to 1 to enable assertions on origin tracking cache fast
75    paths */
76 #define OC_ENABLE_ASSERTIONS 0
77 
78 
79 /*------------------------------------------------------------*/
80 /*--- Comments on the origin tracking implementation       ---*/
81 /*------------------------------------------------------------*/
82 
83 /* See detailed comment entitled
84    AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
85    which is contained further on in this file. */
86 
87 
88 /*------------------------------------------------------------*/
89 /*--- V bits and A bits                                    ---*/
90 /*------------------------------------------------------------*/
91 
92 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
93    thinks the corresponding value bit is defined.  And every memory byte
94    has an A bit, which tracks whether Memcheck thinks the program can access
95    it safely (ie. it's mapped, and has at least one of the RWX permission bits
96    set).  So every N-bit register is shadowed with N V bits, and every memory
97    byte is shadowed with 8 V bits and one A bit.
98 
99    In the implementation, we use two forms of compression (compressed V bits
100    and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
101    for memory.
102 
103    Memcheck also tracks extra information about each heap block that is
104    allocated, for detecting memory leaks and other purposes.
105 */
106 
107 /*------------------------------------------------------------*/
108 /*--- Basic A/V bitmap representation.                     ---*/
109 /*------------------------------------------------------------*/
110 
111 /* All reads and writes are checked against a memory map (a.k.a. shadow
112    memory), which records the state of all memory in the process.
113 
114    On 32-bit machines the memory map is organised as follows.
115    The top 16 bits of an address are used to index into a top-level
116    map table, containing 65536 entries.  Each entry is a pointer to a
117    second-level map, which records the accesibililty and validity
118    permissions for the 65536 bytes indexed by the lower 16 bits of the
119    address.  Each byte is represented by two bits (details are below).  So
120    each second-level map contains 16384 bytes.  This two-level arrangement
121    conveniently divides the 4G address space into 64k lumps, each size 64k
122    bytes.
123 
124    All entries in the primary (top-level) map must point to a valid
125    secondary (second-level) map.  Since many of the 64kB chunks will
126    have the same status for every bit -- ie. noaccess (for unused
127    address space) or entirely addressable and defined (for code segments) --
128    there are three distinguished secondary maps, which indicate 'noaccess',
129    'undefined' and 'defined'.  For these uniform 64kB chunks, the primary
130    map entry points to the relevant distinguished map.  In practice,
131    typically more than half of the addressable memory is represented with
132    the 'undefined' or 'defined' distinguished secondary map, so it gives a
133    good saving.  It also lets us set the V+A bits of large address regions
134    quickly in set_address_range_perms().
135 
136    On 64-bit machines it's more complicated.  If we followed the same basic
137    scheme we'd have a four-level table which would require too many memory
138    accesses.  So instead the top-level map table has 2^19 entries (indexed
139    using bits 16..34 of the address);  this covers the bottom 32GB.  Any
140    accesses above 32GB are handled with a slow, sparse auxiliary table.
141    Valgrind's address space manager tries very hard to keep things below
142    this 32GB barrier so that performance doesn't suffer too much.
143 
144    Note that this file has a lot of different functions for reading and
145    writing shadow memory.  Only a couple are strictly necessary (eg.
146    get_vabits2 and set_vabits2), most are just specialised for specific
147    common cases to improve performance.
148 
149    Aside: the V+A bits are less precise than they could be -- we have no way
150    of marking memory as read-only.  It would be great if we could add an
151    extra state VA_BITSn_READONLY.  But then we'd have 5 different states,
152    which requires 2.3 bits to hold, and there's no way to do that elegantly
153    -- we'd have to double up to 4 bits of metadata per byte, which doesn't
154    seem worth it.
155 */
156 
157 /* --------------- Basic configuration --------------- */
158 
159 /* Only change this.  N_PRIMARY_MAP *must* be a power of 2. */
160 
161 #if VG_WORDSIZE == 4
162 
163 /* cover the entire address space */
164 #  define N_PRIMARY_BITS  16
165 
166 #else
167 
168 /* Just handle the first 256G fast and the rest via auxiliary
169    primaries.  If you change this, Memcheck will assert at startup.
170    See the definition of UNALIGNED_OR_HIGH for extensive comments. */
171 #  define N_PRIMARY_BITS  22
172 
173 #endif
174 
175 
176 /* Do not change this. */
177 #define N_PRIMARY_MAP  ( ((UWord)1) << N_PRIMARY_BITS)
178 
179 /* Do not change this. */
180 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
181 
182 
183 /* --------------- Secondary maps --------------- */
184 
185 // Each byte of memory conceptually has an A bit, which indicates its
186 // addressability, and 8 V bits, which indicates its definedness.
187 //
188 // But because very few bytes are partially defined, we can use a nice
189 // compression scheme to reduce the size of shadow memory.  Each byte of
190 // memory has 2 bits which indicates its state (ie. V+A bits):
191 //
192 //   00:  noaccess    (unaddressable but treated as fully defined)
193 //   01:  undefined   (addressable and fully undefined)
194 //   10:  defined     (addressable and fully defined)
195 //   11:  partdefined (addressable and partially defined)
196 //
197 // In the "partdefined" case, we use a secondary table to store the V bits.
198 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
199 // bits.
200 //
201 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
202 // four bytes (32 bits) of memory are in each chunk.  Hence the name
203 // "vabits8".  This lets us get the V+A bits for four bytes at a time
204 // easily (without having to do any shifting and/or masking), and that is a
205 // very common operation.  (Note that although each vabits8 chunk
206 // is 8 bits in size, it represents 32 bits of memory.)
207 //
208 // The representation is "inverse" little-endian... each 4 bytes of
209 // memory is represented by a 1 byte value, where:
210 //
211 // - the status of byte (a+0) is held in bits [1..0]
212 // - the status of byte (a+1) is held in bits [3..2]
213 // - the status of byte (a+2) is held in bits [5..4]
214 // - the status of byte (a+3) is held in bits [7..6]
215 //
216 // It's "inverse" because endianness normally describes a mapping from
217 // value bits to memory addresses;  in this case the mapping is inverted.
218 // Ie. instead of particular value bits being held in certain addresses, in
219 // this case certain addresses are represented by particular value bits.
220 // See insert_vabits2_into_vabits8() for an example.
221 //
222 // But note that we don't compress the V bits stored in registers;  they
223 // need to be explicit to made the shadow operations possible.  Therefore
224 // when moving values between registers and memory we need to convert
225 // between the expanded in-register format and the compressed in-memory
226 // format.  This isn't so difficult, it just requires careful attention in a
227 // few places.
228 
229 // These represent eight bits of memory.
230 #define VA_BITS2_NOACCESS     0x0      // 00b
231 #define VA_BITS2_UNDEFINED    0x1      // 01b
232 #define VA_BITS2_DEFINED      0x2      // 10b
233 #define VA_BITS2_PARTDEFINED  0x3      // 11b
234 
235 // These represent 16 bits of memory.
236 #define VA_BITS4_NOACCESS     0x0      // 00_00b
237 #define VA_BITS4_UNDEFINED    0x5      // 01_01b
238 #define VA_BITS4_DEFINED      0xa      // 10_10b
239 
240 // These represent 32 bits of memory.
241 #define VA_BITS8_NOACCESS     0x00     // 00_00_00_00b
242 #define VA_BITS8_UNDEFINED    0x55     // 01_01_01_01b
243 #define VA_BITS8_DEFINED      0xaa     // 10_10_10_10b
244 
245 // These represent 64 bits of memory.
246 #define VA_BITS16_NOACCESS    0x0000   // 00_00_00_00b x 2
247 #define VA_BITS16_UNDEFINED   0x5555   // 01_01_01_01b x 2
248 #define VA_BITS16_DEFINED     0xaaaa   // 10_10_10_10b x 2
249 
250 
251 #define SM_CHUNKS             16384
252 #define SM_OFF(aaa)           (((aaa) & 0xffff) >> 2)
253 #define SM_OFF_16(aaa)        (((aaa) & 0xffff) >> 3)
254 
255 // Paranoia:  it's critical for performance that the requested inlining
256 // occurs.  So try extra hard.
257 #define INLINE    inline __attribute__((always_inline))
258 
start_of_this_sm(Addr a)259 static INLINE Addr start_of_this_sm ( Addr a ) {
260    return (a & (~SM_MASK));
261 }
is_start_of_sm(Addr a)262 static INLINE Bool is_start_of_sm ( Addr a ) {
263    return (start_of_this_sm(a) == a);
264 }
265 
266 typedef
267    struct {
268       UChar vabits8[SM_CHUNKS];
269    }
270    SecMap;
271 
272 // 3 distinguished secondary maps, one for no-access, one for
273 // accessible but undefined, and one for accessible and defined.
274 // Distinguished secondaries may never be modified.
275 #define SM_DIST_NOACCESS   0
276 #define SM_DIST_UNDEFINED  1
277 #define SM_DIST_DEFINED    2
278 
279 static SecMap sm_distinguished[3];
280 
is_distinguished_sm(SecMap * sm)281 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
282    return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
283 }
284 
285 // Forward declaration
286 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
287 
288 /* dist_sm points to one of our three distinguished secondaries.  Make
289    a copy of it so that we can write to it.
290 */
copy_for_writing(SecMap * dist_sm)291 static SecMap* copy_for_writing ( SecMap* dist_sm )
292 {
293    SecMap* new_sm;
294    tl_assert(dist_sm == &sm_distinguished[0]
295           || dist_sm == &sm_distinguished[1]
296           || dist_sm == &sm_distinguished[2]);
297 
298    new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
299    if (new_sm == NULL)
300       VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
301                                    sizeof(SecMap) );
302    VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
303    update_SM_counts(dist_sm, new_sm);
304    return new_sm;
305 }
306 
307 /* --------------- Stats --------------- */
308 
309 static Int   n_issued_SMs      = 0;
310 static Int   n_deissued_SMs    = 0;
311 static Int   n_noaccess_SMs    = N_PRIMARY_MAP; // start with many noaccess DSMs
312 static Int   n_undefined_SMs   = 0;
313 static Int   n_defined_SMs     = 0;
314 static Int   n_non_DSM_SMs     = 0;
315 static Int   max_noaccess_SMs  = 0;
316 static Int   max_undefined_SMs = 0;
317 static Int   max_defined_SMs   = 0;
318 static Int   max_non_DSM_SMs   = 0;
319 
320 /* # searches initiated in auxmap_L1, and # base cmps required */
321 static ULong n_auxmap_L1_searches  = 0;
322 static ULong n_auxmap_L1_cmps      = 0;
323 /* # of searches that missed in auxmap_L1 and therefore had to
324    be handed to auxmap_L2. And the number of nodes inserted. */
325 static ULong n_auxmap_L2_searches  = 0;
326 static ULong n_auxmap_L2_nodes     = 0;
327 
328 static Int   n_sanity_cheap     = 0;
329 static Int   n_sanity_expensive = 0;
330 
331 static Int   n_secVBit_nodes   = 0;
332 static Int   max_secVBit_nodes = 0;
333 
update_SM_counts(SecMap * oldSM,SecMap * newSM)334 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
335 {
336    if      (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
337    else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
338    else if (oldSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  --;
339    else                                                  { n_non_DSM_SMs  --;
340                                                            n_deissued_SMs ++; }
341 
342    if      (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
343    else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
344    else if (newSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  ++;
345    else                                                  { n_non_DSM_SMs  ++;
346                                                            n_issued_SMs   ++; }
347 
348    if (n_noaccess_SMs  > max_noaccess_SMs ) max_noaccess_SMs  = n_noaccess_SMs;
349    if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
350    if (n_defined_SMs   > max_defined_SMs  ) max_defined_SMs   = n_defined_SMs;
351    if (n_non_DSM_SMs   > max_non_DSM_SMs  ) max_non_DSM_SMs   = n_non_DSM_SMs;
352 }
353 
354 /* --------------- Primary maps --------------- */
355 
356 /* The main primary map.  This covers some initial part of the address
357    space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
358    handled using the auxiliary primary map.
359 */
360 static SecMap* primary_map[N_PRIMARY_MAP];
361 
362 
363 /* An entry in the auxiliary primary map.  base must be a 64k-aligned
364    value, and sm points at the relevant secondary map.  As with the
365    main primary map, the secondary may be either a real secondary, or
366    one of the three distinguished secondaries.  DO NOT CHANGE THIS
367    LAYOUT: the first word has to be the key for OSet fast lookups.
368 */
369 typedef
370    struct {
371       Addr    base;
372       SecMap* sm;
373    }
374    AuxMapEnt;
375 
376 /* Tunable parameter: How big is the L1 queue? */
377 #define N_AUXMAP_L1 24
378 
379 /* Tunable parameter: How far along the L1 queue to insert
380    entries resulting from L2 lookups? */
381 #define AUXMAP_L1_INSERT_IX 12
382 
383 static struct {
384           Addr       base;
385           AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
386        }
387        auxmap_L1[N_AUXMAP_L1];
388 
389 static OSet* auxmap_L2 = NULL;
390 
init_auxmap_L1_L2(void)391 static void init_auxmap_L1_L2 ( void )
392 {
393    Int i;
394    for (i = 0; i < N_AUXMAP_L1; i++) {
395       auxmap_L1[i].base = 0;
396       auxmap_L1[i].ent  = NULL;
397    }
398 
399    tl_assert(0 == offsetof(AuxMapEnt,base));
400    tl_assert(sizeof(Addr) == sizeof(void*));
401    auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
402                                     /*fastCmp*/ NULL,
403                                     VG_(malloc), "mc.iaLL.1", VG_(free) );
404 }
405 
406 /* Check representation invariants; if OK return NULL; else a
407    descriptive bit of text.  Also return the number of
408    non-distinguished secondary maps referred to from the auxiliary
409    primary maps. */
410 
check_auxmap_L1_L2_sanity(Word * n_secmaps_found)411 static HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
412 {
413    Word i, j;
414    /* On a 32-bit platform, the L2 and L1 tables should
415       both remain empty forever.
416 
417       On a 64-bit platform:
418       In the L2 table:
419        all .base & 0xFFFF == 0
420        all .base > MAX_PRIMARY_ADDRESS
421       In the L1 table:
422        all .base & 0xFFFF == 0
423        all (.base > MAX_PRIMARY_ADDRESS
424             .base & 0xFFFF == 0
425             and .ent points to an AuxMapEnt with the same .base)
426            or
427            (.base == 0 and .ent == NULL)
428    */
429    *n_secmaps_found = 0;
430    if (sizeof(void*) == 4) {
431       /* 32-bit platform */
432       if (VG_(OSetGen_Size)(auxmap_L2) != 0)
433          return "32-bit: auxmap_L2 is non-empty";
434       for (i = 0; i < N_AUXMAP_L1; i++)
435         if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
436       return "32-bit: auxmap_L1 is non-empty";
437    } else {
438       /* 64-bit platform */
439       UWord elems_seen = 0;
440       AuxMapEnt *elem, *res;
441       AuxMapEnt key;
442       /* L2 table */
443       VG_(OSetGen_ResetIter)(auxmap_L2);
444       while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
445          elems_seen++;
446          if (0 != (elem->base & (Addr)0xFFFF))
447             return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
448          if (elem->base <= MAX_PRIMARY_ADDRESS)
449             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
450          if (elem->sm == NULL)
451             return "64-bit: .sm in _L2 is NULL";
452          if (!is_distinguished_sm(elem->sm))
453             (*n_secmaps_found)++;
454       }
455       if (elems_seen != n_auxmap_L2_nodes)
456          return "64-bit: disagreement on number of elems in _L2";
457       /* Check L1-L2 correspondence */
458       for (i = 0; i < N_AUXMAP_L1; i++) {
459          if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
460             continue;
461          if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
462             return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
463          if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
464             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
465          if (auxmap_L1[i].ent == NULL)
466             return "64-bit: .ent is NULL in auxmap_L1";
467          if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
468             return "64-bit: _L1 and _L2 bases are inconsistent";
469          /* Look it up in auxmap_L2. */
470          key.base = auxmap_L1[i].base;
471          key.sm   = 0;
472          res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
473          if (res == NULL)
474             return "64-bit: _L1 .base not found in _L2";
475          if (res != auxmap_L1[i].ent)
476             return "64-bit: _L1 .ent disagrees with _L2 entry";
477       }
478       /* Check L1 contains no duplicates */
479       for (i = 0; i < N_AUXMAP_L1; i++) {
480          if (auxmap_L1[i].base == 0)
481             continue;
482 	 for (j = i+1; j < N_AUXMAP_L1; j++) {
483             if (auxmap_L1[j].base == 0)
484                continue;
485             if (auxmap_L1[j].base == auxmap_L1[i].base)
486                return "64-bit: duplicate _L1 .base entries";
487          }
488       }
489    }
490    return NULL; /* ok */
491 }
492 
insert_into_auxmap_L1_at(Word rank,AuxMapEnt * ent)493 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
494 {
495    Word i;
496    tl_assert(ent);
497    tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
498    for (i = N_AUXMAP_L1-1; i > rank; i--)
499       auxmap_L1[i] = auxmap_L1[i-1];
500    auxmap_L1[rank].base = ent->base;
501    auxmap_L1[rank].ent  = ent;
502 }
503 
maybe_find_in_auxmap(Addr a)504 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
505 {
506    AuxMapEnt  key;
507    AuxMapEnt* res;
508    Word       i;
509 
510    tl_assert(a > MAX_PRIMARY_ADDRESS);
511    a &= ~(Addr)0xFFFF;
512 
513    /* First search the front-cache, which is a self-organising
514       list containing the most popular entries. */
515 
516    if (LIKELY(auxmap_L1[0].base == a))
517       return auxmap_L1[0].ent;
518    if (LIKELY(auxmap_L1[1].base == a)) {
519       Addr       t_base = auxmap_L1[0].base;
520       AuxMapEnt* t_ent  = auxmap_L1[0].ent;
521       auxmap_L1[0].base = auxmap_L1[1].base;
522       auxmap_L1[0].ent  = auxmap_L1[1].ent;
523       auxmap_L1[1].base = t_base;
524       auxmap_L1[1].ent  = t_ent;
525       return auxmap_L1[0].ent;
526    }
527 
528    n_auxmap_L1_searches++;
529 
530    for (i = 0; i < N_AUXMAP_L1; i++) {
531       if (auxmap_L1[i].base == a) {
532          break;
533       }
534    }
535    tl_assert(i >= 0 && i <= N_AUXMAP_L1);
536 
537    n_auxmap_L1_cmps += (ULong)(i+1);
538 
539    if (i < N_AUXMAP_L1) {
540       if (i > 0) {
541          Addr       t_base = auxmap_L1[i-1].base;
542          AuxMapEnt* t_ent  = auxmap_L1[i-1].ent;
543          auxmap_L1[i-1].base = auxmap_L1[i-0].base;
544          auxmap_L1[i-1].ent  = auxmap_L1[i-0].ent;
545          auxmap_L1[i-0].base = t_base;
546          auxmap_L1[i-0].ent  = t_ent;
547          i--;
548       }
549       return auxmap_L1[i].ent;
550    }
551 
552    n_auxmap_L2_searches++;
553 
554    /* First see if we already have it. */
555    key.base = a;
556    key.sm   = 0;
557 
558    res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
559    if (res)
560       insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
561    return res;
562 }
563 
find_or_alloc_in_auxmap(Addr a)564 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
565 {
566    AuxMapEnt *nyu, *res;
567 
568    /* First see if we already have it. */
569    res = maybe_find_in_auxmap( a );
570    if (LIKELY(res))
571       return res;
572 
573    /* Ok, there's no entry in the secondary map, so we'll have
574       to allocate one. */
575    a &= ~(Addr)0xFFFF;
576 
577    nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
578    tl_assert(nyu);
579    nyu->base = a;
580    nyu->sm   = &sm_distinguished[SM_DIST_NOACCESS];
581    VG_(OSetGen_Insert)( auxmap_L2, nyu );
582    insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
583    n_auxmap_L2_nodes++;
584    return nyu;
585 }
586 
587 /* --------------- SecMap fundamentals --------------- */
588 
589 // In all these, 'low' means it's definitely in the main primary map,
590 // 'high' means it's definitely in the auxiliary table.
591 
get_secmap_low_ptr(Addr a)592 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
593 {
594    UWord pm_off = a >> 16;
595 #  if VG_DEBUG_MEMORY >= 1
596    tl_assert(pm_off < N_PRIMARY_MAP);
597 #  endif
598    return &primary_map[ pm_off ];
599 }
600 
get_secmap_high_ptr(Addr a)601 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
602 {
603    AuxMapEnt* am = find_or_alloc_in_auxmap(a);
604    return &am->sm;
605 }
606 
get_secmap_ptr(Addr a)607 static SecMap** get_secmap_ptr ( Addr a )
608 {
609    return ( a <= MAX_PRIMARY_ADDRESS
610           ? get_secmap_low_ptr(a)
611           : get_secmap_high_ptr(a));
612 }
613 
get_secmap_for_reading_low(Addr a)614 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
615 {
616    return *get_secmap_low_ptr(a);
617 }
618 
get_secmap_for_reading_high(Addr a)619 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
620 {
621    return *get_secmap_high_ptr(a);
622 }
623 
get_secmap_for_writing_low(Addr a)624 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
625 {
626    SecMap** p = get_secmap_low_ptr(a);
627    if (UNLIKELY(is_distinguished_sm(*p)))
628       *p = copy_for_writing(*p);
629    return *p;
630 }
631 
get_secmap_for_writing_high(Addr a)632 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
633 {
634    SecMap** p = get_secmap_high_ptr(a);
635    if (UNLIKELY(is_distinguished_sm(*p)))
636       *p = copy_for_writing(*p);
637    return *p;
638 }
639 
640 /* Produce the secmap for 'a', either from the primary map or by
641    ensuring there is an entry for it in the aux primary map.  The
642    secmap may be a distinguished one as the caller will only want to
643    be able to read it.
644 */
get_secmap_for_reading(Addr a)645 static INLINE SecMap* get_secmap_for_reading ( Addr a )
646 {
647    return ( a <= MAX_PRIMARY_ADDRESS
648           ? get_secmap_for_reading_low (a)
649           : get_secmap_for_reading_high(a) );
650 }
651 
652 /* Produce the secmap for 'a', either from the primary map or by
653    ensuring there is an entry for it in the aux primary map.  The
654    secmap may not be a distinguished one, since the caller will want
655    to be able to write it.  If it is a distinguished secondary, make a
656    writable copy of it, install it, and return the copy instead.  (COW
657    semantics).
658 */
get_secmap_for_writing(Addr a)659 static SecMap* get_secmap_for_writing ( Addr a )
660 {
661    return ( a <= MAX_PRIMARY_ADDRESS
662           ? get_secmap_for_writing_low (a)
663           : get_secmap_for_writing_high(a) );
664 }
665 
666 /* If 'a' has a SecMap, produce it.  Else produce NULL.  But don't
667    allocate one if one doesn't already exist.  This is used by the
668    leak checker.
669 */
maybe_get_secmap_for(Addr a)670 static SecMap* maybe_get_secmap_for ( Addr a )
671 {
672    if (a <= MAX_PRIMARY_ADDRESS) {
673       return get_secmap_for_reading_low(a);
674    } else {
675       AuxMapEnt* am = maybe_find_in_auxmap(a);
676       return am ? am->sm : NULL;
677    }
678 }
679 
680 /* --------------- Fundamental functions --------------- */
681 
682 static INLINE
insert_vabits2_into_vabits8(Addr a,UChar vabits2,UChar * vabits8)683 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
684 {
685    UInt shift =  (a & 3)  << 1;        // shift by 0, 2, 4, or 6
686    *vabits8  &= ~(0x3     << shift);   // mask out the two old bits
687    *vabits8  |=  (vabits2 << shift);   // mask  in the two new bits
688 }
689 
690 static INLINE
insert_vabits4_into_vabits8(Addr a,UChar vabits4,UChar * vabits8)691 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
692 {
693    UInt shift;
694    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
695    shift     =  (a & 2)   << 1;        // shift by 0 or 4
696    *vabits8 &= ~(0xf      << shift);   // mask out the four old bits
697    *vabits8 |=  (vabits4 << shift);    // mask  in the four new bits
698 }
699 
700 static INLINE
extract_vabits2_from_vabits8(Addr a,UChar vabits8)701 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
702 {
703    UInt shift = (a & 3) << 1;          // shift by 0, 2, 4, or 6
704    vabits8 >>= shift;                  // shift the two bits to the bottom
705    return 0x3 & vabits8;               // mask out the rest
706 }
707 
708 static INLINE
extract_vabits4_from_vabits8(Addr a,UChar vabits8)709 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
710 {
711    UInt shift;
712    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
713    shift = (a & 2) << 1;               // shift by 0 or 4
714    vabits8 >>= shift;                  // shift the four bits to the bottom
715    return 0xf & vabits8;               // mask out the rest
716 }
717 
718 // Note that these four are only used in slow cases.  The fast cases do
719 // clever things like combine the auxmap check (in
720 // get_secmap_{read,writ}able) with alignment checks.
721 
722 // *** WARNING! ***
723 // Any time this function is called, if it is possible that vabits2
724 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
725 // sec-V-bits table must also be set!
726 static INLINE
set_vabits2(Addr a,UChar vabits2)727 void set_vabits2 ( Addr a, UChar vabits2 )
728 {
729    SecMap* sm       = get_secmap_for_writing(a);
730    UWord   sm_off   = SM_OFF(a);
731    insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
732 }
733 
734 static INLINE
get_vabits2(Addr a)735 UChar get_vabits2 ( Addr a )
736 {
737    SecMap* sm       = get_secmap_for_reading(a);
738    UWord   sm_off   = SM_OFF(a);
739    UChar   vabits8  = sm->vabits8[sm_off];
740    return extract_vabits2_from_vabits8(a, vabits8);
741 }
742 
743 // *** WARNING! ***
744 // Any time this function is called, if it is possible that any of the
745 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
746 // corresponding entry(s) in the sec-V-bits table must also be set!
747 static INLINE
get_vabits8_for_aligned_word32(Addr a)748 UChar get_vabits8_for_aligned_word32 ( Addr a )
749 {
750    SecMap* sm       = get_secmap_for_reading(a);
751    UWord   sm_off   = SM_OFF(a);
752    UChar   vabits8  = sm->vabits8[sm_off];
753    return vabits8;
754 }
755 
756 static INLINE
set_vabits8_for_aligned_word32(Addr a,UChar vabits8)757 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
758 {
759    SecMap* sm       = get_secmap_for_writing(a);
760    UWord   sm_off   = SM_OFF(a);
761    sm->vabits8[sm_off] = vabits8;
762 }
763 
764 
765 // Forward declarations
766 static UWord get_sec_vbits8(Addr a);
767 static void  set_sec_vbits8(Addr a, UWord vbits8);
768 
769 // Returns False if there was an addressability error.
770 static INLINE
set_vbits8(Addr a,UChar vbits8)771 Bool set_vbits8 ( Addr a, UChar vbits8 )
772 {
773    Bool  ok      = True;
774    UChar vabits2 = get_vabits2(a);
775    if ( VA_BITS2_NOACCESS != vabits2 ) {
776       // Addressable.  Convert in-register format to in-memory format.
777       // Also remove any existing sec V bit entry for the byte if no
778       // longer necessary.
779       if      ( V_BITS8_DEFINED   == vbits8 ) { vabits2 = VA_BITS2_DEFINED;   }
780       else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
781       else                                    { vabits2 = VA_BITS2_PARTDEFINED;
782                                                 set_sec_vbits8(a, vbits8);  }
783       set_vabits2(a, vabits2);
784 
785    } else {
786       // Unaddressable!  Do nothing -- when writing to unaddressable
787       // memory it acts as a black hole, and the V bits can never be seen
788       // again.  So we don't have to write them at all.
789       ok = False;
790    }
791    return ok;
792 }
793 
794 // Returns False if there was an addressability error.  In that case, we put
795 // all defined bits into vbits8.
796 static INLINE
get_vbits8(Addr a,UChar * vbits8)797 Bool get_vbits8 ( Addr a, UChar* vbits8 )
798 {
799    Bool  ok      = True;
800    UChar vabits2 = get_vabits2(a);
801 
802    // Convert the in-memory format to in-register format.
803    if      ( VA_BITS2_DEFINED   == vabits2 ) { *vbits8 = V_BITS8_DEFINED;   }
804    else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
805    else if ( VA_BITS2_NOACCESS  == vabits2 ) {
806       *vbits8 = V_BITS8_DEFINED;    // Make V bits defined!
807       ok = False;
808    } else {
809       tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
810       *vbits8 = get_sec_vbits8(a);
811    }
812    return ok;
813 }
814 
815 
816 /* --------------- Secondary V bit table ------------ */
817 
818 // This table holds the full V bit pattern for partially-defined bytes
819 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
820 // memory.
821 //
822 // Note: the nodes in this table can become stale.  Eg. if you write a PDB,
823 // then overwrite the same address with a fully defined byte, the sec-V-bit
824 // node will not necessarily be removed.  This is because checking for
825 // whether removal is necessary would slow down the fast paths.
826 //
827 // To avoid the stale nodes building up too much, we periodically (once the
828 // table reaches a certain size) garbage collect (GC) the table by
829 // traversing it and evicting any "sufficiently stale" nodes, ie. nodes that
830 // are stale and haven't been touched for a certain number of collections.
831 // If more than a certain proportion of nodes survived, we increase the
832 // table size so that GCs occur less often.
833 //
834 // (So this a bit different to a traditional GC, where you definitely want
835 // to remove any dead nodes.  It's more like we have a resizable cache and
836 // we're trying to find the right balance how many elements to evict and how
837 // big to make the cache.)
838 //
839 // This policy is designed to avoid bad table bloat in the worst case where
840 // a program creates huge numbers of stale PDBs -- we would get this bloat
841 // if we had no GC -- while handling well the case where a node becomes
842 // stale but shortly afterwards is rewritten with a PDB and so becomes
843 // non-stale again (which happens quite often, eg. in perf/bz2).  If we just
844 // remove all stale nodes as soon as possible, we just end up re-adding a
845 // lot of them in later again.  The "sufficiently stale" approach avoids
846 // this.  (If a program has many live PDBs, performance will just suck,
847 // there's no way around that.)
848 
849 static OSet* secVBitTable;
850 
851 // Stats
852 static ULong sec_vbits_new_nodes = 0;
853 static ULong sec_vbits_updates   = 0;
854 
855 // This must be a power of two;  this is checked in mc_pre_clo_init().
856 // The size chosen here is a trade-off:  if the nodes are bigger (ie. cover
857 // a larger address range) they take more space but we can get multiple
858 // partially-defined bytes in one if they are close to each other, reducing
859 // the number of total nodes.  In practice sometimes they are clustered (eg.
860 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
861 // row), but often not.  So we choose something intermediate.
862 #define BYTES_PER_SEC_VBIT_NODE     16
863 
864 // We make the table bigger if more than this many nodes survive a GC.
865 #define MAX_SURVIVOR_PROPORTION  0.5
866 
867 // Each time we make the table bigger, we increase it by this much.
868 #define TABLE_GROWTH_FACTOR      2
869 
870 // This defines "sufficiently stale" -- any node that hasn't been touched in
871 // this many GCs will be removed.
872 #define MAX_STALE_AGE            2
873 
874 // We GC the table when it gets this many nodes in it, ie. it's effectively
875 // the table size.  It can change.
876 static Int  secVBitLimit = 1024;
877 
878 // The number of GCs done, used to age sec-V-bit nodes for eviction.
879 // Because it's unsigned, wrapping doesn't matter -- the right answer will
880 // come out anyway.
881 static UInt GCs_done = 0;
882 
883 typedef
884    struct {
885       Addr  a;
886       UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
887       UInt  last_touched;
888    }
889    SecVBitNode;
890 
createSecVBitTable(void)891 static OSet* createSecVBitTable(void)
892 {
893    return VG_(OSetGen_Create)( offsetof(SecVBitNode, a),
894                                NULL, // use fast comparisons
895                                VG_(malloc), "mc.cSVT.1 (sec VBit table)",
896                                VG_(free) );
897 }
898 
gcSecVBitTable(void)899 static void gcSecVBitTable(void)
900 {
901    OSet*        secVBitTable2;
902    SecVBitNode* n;
903    Int          i, n_nodes = 0, n_survivors = 0;
904 
905    GCs_done++;
906 
907    // Create the new table.
908    secVBitTable2 = createSecVBitTable();
909 
910    // Traverse the table, moving fresh nodes into the new table.
911    VG_(OSetGen_ResetIter)(secVBitTable);
912    while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
913       Bool keep = False;
914       if ( (GCs_done - n->last_touched) <= MAX_STALE_AGE ) {
915          // Keep node if it's been touched recently enough (regardless of
916          // freshness/staleness).
917          keep = True;
918       } else {
919          // Keep node if any of its bytes are non-stale.  Using
920          // get_vabits2() for the lookup is not very efficient, but I don't
921          // think it matters.
922          for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
923             if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
924                keep = True;      // Found a non-stale byte, so keep
925                break;
926             }
927          }
928       }
929 
930       if ( keep ) {
931          // Insert a copy of the node into the new table.
932          SecVBitNode* n2 =
933             VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
934          *n2 = *n;
935          VG_(OSetGen_Insert)(secVBitTable2, n2);
936       }
937    }
938 
939    // Get the before and after sizes.
940    n_nodes     = VG_(OSetGen_Size)(secVBitTable);
941    n_survivors = VG_(OSetGen_Size)(secVBitTable2);
942 
943    // Destroy the old table, and put the new one in its place.
944    VG_(OSetGen_Destroy)(secVBitTable);
945    secVBitTable = secVBitTable2;
946 
947    if (VG_(clo_verbosity) > 1) {
948       Char percbuf[6];
949       VG_(percentify)(n_survivors, n_nodes, 1, 6, percbuf);
950       VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%s)\n",
951                    n_nodes, n_survivors, percbuf);
952    }
953 
954    // Increase table size if necessary.
955    if (n_survivors > (secVBitLimit * MAX_SURVIVOR_PROPORTION)) {
956       secVBitLimit *= TABLE_GROWTH_FACTOR;
957       if (VG_(clo_verbosity) > 1)
958          VG_(message)(Vg_DebugMsg, "memcheck GC: increase table size to %d\n",
959                       secVBitLimit);
960    }
961 }
962 
get_sec_vbits8(Addr a)963 static UWord get_sec_vbits8(Addr a)
964 {
965    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
966    Int          amod     = a % BYTES_PER_SEC_VBIT_NODE;
967    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
968    UChar        vbits8;
969    tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
970    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
971    // make it to the secondary V bits table.
972    vbits8 = n->vbits8[amod];
973    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
974    return vbits8;
975 }
976 
set_sec_vbits8(Addr a,UWord vbits8)977 static void set_sec_vbits8(Addr a, UWord vbits8)
978 {
979    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
980    Int          i, amod  = a % BYTES_PER_SEC_VBIT_NODE;
981    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
982    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
983    // make it to the secondary V bits table.
984    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
985    if (n) {
986       n->vbits8[amod] = vbits8;     // update
987       n->last_touched = GCs_done;
988       sec_vbits_updates++;
989    } else {
990       // New node:  assign the specific byte, make the rest invalid (they
991       // should never be read as-is, but be cautious).
992       n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
993       n->a            = aAligned;
994       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
995          n->vbits8[i] = V_BITS8_UNDEFINED;
996       }
997       n->vbits8[amod] = vbits8;
998       n->last_touched = GCs_done;
999 
1000       // Do a table GC if necessary.  Nb: do this before inserting the new
1001       // node, to avoid erroneously GC'ing the new node.
1002       if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1003          gcSecVBitTable();
1004       }
1005 
1006       // Insert the new node.
1007       VG_(OSetGen_Insert)(secVBitTable, n);
1008       sec_vbits_new_nodes++;
1009 
1010       n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1011       if (n_secVBit_nodes > max_secVBit_nodes)
1012          max_secVBit_nodes = n_secVBit_nodes;
1013    }
1014 }
1015 
1016 /* --------------- Endianness helpers --------------- */
1017 
1018 /* Returns the offset in memory of the byteno-th most significant byte
1019    in a wordszB-sized word, given the specified endianness. */
byte_offset_w(UWord wordszB,Bool bigendian,UWord byteno)1020 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1021                                     UWord byteno ) {
1022    return bigendian ? (wordszB-1-byteno) : byteno;
1023 }
1024 
1025 
1026 /* --------------- Ignored address ranges --------------- */
1027 
1028 #define M_IGNORE_RANGES 4
1029 
1030 typedef
1031    struct {
1032       Int  used;
1033       Addr start[M_IGNORE_RANGES];
1034       Addr end[M_IGNORE_RANGES];
1035    }
1036    IgnoreRanges;
1037 
1038 static IgnoreRanges ignoreRanges;
1039 
MC_(in_ignored_range)1040 INLINE Bool MC_(in_ignored_range) ( Addr a )
1041 {
1042    Int i;
1043    if (LIKELY(ignoreRanges.used == 0))
1044       return False;
1045    for (i = 0; i < ignoreRanges.used; i++) {
1046       if (a >= ignoreRanges.start[i] && a < ignoreRanges.end[i])
1047          return True;
1048    }
1049    return False;
1050 }
1051 
1052 
1053 /* Parse a 32- or 64-bit hex number, including leading 0x, from string
1054    starting at *ppc, putting result in *result, and return True.  Or
1055    fail, in which case *ppc and *result are undefined, and return
1056    False. */
1057 
isHex(UChar c)1058 static Bool isHex ( UChar c )
1059 {
1060   return ((c >= '0' && c <= '9') ||
1061 	  (c >= 'a' && c <= 'f') ||
1062 	  (c >= 'A' && c <= 'F'));
1063 }
1064 
fromHex(UChar c)1065 static UInt fromHex ( UChar c )
1066 {
1067    if (c >= '0' && c <= '9')
1068       return (UInt)c - (UInt)'0';
1069    if (c >= 'a' && c <= 'f')
1070       return 10 +  (UInt)c - (UInt)'a';
1071    if (c >= 'A' && c <= 'F')
1072       return 10 +  (UInt)c - (UInt)'A';
1073    /*NOTREACHED*/
1074    tl_assert(0);
1075    return 0;
1076 }
1077 
parse_Addr(UChar ** ppc,Addr * result)1078 static Bool parse_Addr ( UChar** ppc, Addr* result )
1079 {
1080    Int used, limit = 2 * sizeof(Addr);
1081    if (**ppc != '0')
1082       return False;
1083    (*ppc)++;
1084    if (**ppc != 'x')
1085       return False;
1086    (*ppc)++;
1087    *result = 0;
1088    used = 0;
1089    while (isHex(**ppc)) {
1090       UInt d = fromHex(**ppc);
1091       tl_assert(d < 16);
1092       *result = ((*result) << 4) | fromHex(**ppc);
1093       (*ppc)++;
1094       used++;
1095       if (used > limit) return False;
1096    }
1097    if (used == 0)
1098       return False;
1099    return True;
1100 }
1101 
1102 /* Parse two such numbers separated by a dash, or fail. */
1103 
parse_range(UChar ** ppc,Addr * result1,Addr * result2)1104 static Bool parse_range ( UChar** ppc, Addr* result1, Addr* result2 )
1105 {
1106    Bool ok = parse_Addr(ppc, result1);
1107    if (!ok)
1108       return False;
1109    if (**ppc != '-')
1110       return False;
1111    (*ppc)++;
1112    ok = parse_Addr(ppc, result2);
1113    if (!ok)
1114       return False;
1115    return True;
1116 }
1117 
1118 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1119    fail. */
1120 
parse_ignore_ranges(UChar * str0)1121 static Bool parse_ignore_ranges ( UChar* str0 )
1122 {
1123    Addr start, end;
1124    Bool ok;
1125    UChar*  str = str0;
1126    UChar** ppc = &str;
1127    ignoreRanges.used = 0;
1128    while (1) {
1129       ok = parse_range(ppc, &start, &end);
1130       if (!ok)
1131          return False;
1132       if (ignoreRanges.used >= M_IGNORE_RANGES)
1133          return False;
1134       ignoreRanges.start[ignoreRanges.used] = start;
1135       ignoreRanges.end[ignoreRanges.used] = end;
1136       ignoreRanges.used++;
1137       if (**ppc == 0)
1138          return True;
1139       if (**ppc != ',')
1140          return False;
1141       (*ppc)++;
1142    }
1143    /*NOTREACHED*/
1144    return False;
1145 }
1146 
1147 
1148 /* --------------- Load/store slow cases. --------------- */
1149 
1150 static
1151 #ifndef PERF_FAST_LOADV
1152 INLINE
1153 #endif
mc_LOADVn_slow(Addr a,SizeT nBits,Bool bigendian)1154 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1155 {
1156    /* Make up a 64-bit result V word, which contains the loaded data for
1157       valid addresses and Defined for invalid addresses.  Iterate over
1158       the bytes in the word, from the most significant down to the
1159       least. */
1160    ULong vbits64     = V_BITS64_UNDEFINED;
1161    SizeT szB         = nBits / 8;
1162    SSizeT i;                        // Must be signed.
1163    SizeT n_addrs_bad = 0;
1164    Addr  ai;
1165    Bool  partial_load_exemption_applies;
1166    UChar vbits8;
1167    Bool  ok;
1168 
1169    PROF_EVENT(30, "mc_LOADVn_slow");
1170 
1171    /* ------------ BEGIN semi-fast cases ------------ */
1172    /* These deal quickly-ish with the common auxiliary primary map
1173       cases on 64-bit platforms.  Are merely a speedup hack; can be
1174       omitted without loss of correctness/functionality.  Note that in
1175       both cases the "sizeof(void*) == 8" causes these cases to be
1176       folded out by compilers on 32-bit platforms.  These are derived
1177       from LOADV64 and LOADV32.
1178    */
1179    if (LIKELY(sizeof(void*) == 8
1180                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1181       SecMap* sm       = get_secmap_for_reading(a);
1182       UWord   sm_off16 = SM_OFF_16(a);
1183       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1184       if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1185          return V_BITS64_DEFINED;
1186       if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1187          return V_BITS64_UNDEFINED;
1188       /* else fall into the slow case */
1189    }
1190    if (LIKELY(sizeof(void*) == 8
1191                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1192       SecMap* sm = get_secmap_for_reading(a);
1193       UWord sm_off = SM_OFF(a);
1194       UWord vabits8 = sm->vabits8[sm_off];
1195       if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1196          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1197       if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1198          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1199       /* else fall into slow case */
1200    }
1201    /* ------------ END semi-fast cases ------------ */
1202 
1203    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1204 
1205    for (i = szB-1; i >= 0; i--) {
1206       PROF_EVENT(31, "mc_LOADVn_slow(loop)");
1207       ai = a + byte_offset_w(szB, bigendian, i);
1208       ok = get_vbits8(ai, &vbits8);
1209       if (!ok) n_addrs_bad++;
1210       vbits64 <<= 8;
1211       vbits64 |= vbits8;
1212    }
1213 
1214    /* This is a hack which avoids producing errors for code which
1215       insists in stepping along byte strings in aligned word-sized
1216       chunks, and there is a partially defined word at the end.  (eg,
1217       optimised strlen).  Such code is basically broken at least WRT
1218       semantics of ANSI C, but sometimes users don't have the option
1219       to fix it, and so this option is provided.  Note it is now
1220       defaulted to not-engaged.
1221 
1222       A load from a partially-addressible place is allowed if:
1223       - the command-line flag is set
1224       - it's a word-sized, word-aligned load
1225       - at least one of the addresses in the word *is* valid
1226    */
1227    partial_load_exemption_applies
1228       = MC_(clo_partial_loads_ok) && szB == VG_WORDSIZE
1229                                    && VG_IS_WORD_ALIGNED(a)
1230                                    && n_addrs_bad < VG_WORDSIZE;
1231 
1232    if (n_addrs_bad > 0 && !partial_load_exemption_applies)
1233       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1234 
1235    return vbits64;
1236 }
1237 
1238 
1239 static
1240 #ifndef PERF_FAST_STOREV
1241 INLINE
1242 #endif
mc_STOREVn_slow(Addr a,SizeT nBits,ULong vbytes,Bool bigendian)1243 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1244 {
1245    SizeT szB = nBits / 8;
1246    SizeT i, n_addrs_bad = 0;
1247    UChar vbits8;
1248    Addr  ai;
1249    Bool  ok;
1250 
1251    PROF_EVENT(35, "mc_STOREVn_slow");
1252 
1253    /* ------------ BEGIN semi-fast cases ------------ */
1254    /* These deal quickly-ish with the common auxiliary primary map
1255       cases on 64-bit platforms.  Are merely a speedup hack; can be
1256       omitted without loss of correctness/functionality.  Note that in
1257       both cases the "sizeof(void*) == 8" causes these cases to be
1258       folded out by compilers on 32-bit platforms.  These are derived
1259       from STOREV64 and STOREV32.
1260    */
1261    if (LIKELY(sizeof(void*) == 8
1262                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1263       SecMap* sm       = get_secmap_for_reading(a);
1264       UWord   sm_off16 = SM_OFF_16(a);
1265       UWord   vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1266       if (LIKELY( !is_distinguished_sm(sm) &&
1267                           (VA_BITS16_DEFINED   == vabits16 ||
1268                            VA_BITS16_UNDEFINED == vabits16) )) {
1269          /* Handle common case quickly: a is suitably aligned, */
1270          /* is mapped, and is addressible. */
1271          // Convert full V-bits in register to compact 2-bit form.
1272          if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1273             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
1274             return;
1275          } else if (V_BITS64_UNDEFINED == vbytes) {
1276             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
1277             return;
1278          }
1279          /* else fall into the slow case */
1280       }
1281       /* else fall into the slow case */
1282    }
1283    if (LIKELY(sizeof(void*) == 8
1284                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1285       SecMap* sm      = get_secmap_for_reading(a);
1286       UWord   sm_off  = SM_OFF(a);
1287       UWord   vabits8 = sm->vabits8[sm_off];
1288       if (LIKELY( !is_distinguished_sm(sm) &&
1289                           (VA_BITS8_DEFINED   == vabits8 ||
1290                            VA_BITS8_UNDEFINED == vabits8) )) {
1291          /* Handle common case quickly: a is suitably aligned, */
1292          /* is mapped, and is addressible. */
1293          // Convert full V-bits in register to compact 2-bit form.
1294          if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1295             sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1296             return;
1297          } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1298             sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1299             return;
1300          }
1301          /* else fall into the slow case */
1302       }
1303       /* else fall into the slow case */
1304    }
1305    /* ------------ END semi-fast cases ------------ */
1306 
1307    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1308 
1309    /* Dump vbytes in memory, iterating from least to most significant
1310       byte.  At the same time establish addressibility of the location. */
1311    for (i = 0; i < szB; i++) {
1312       PROF_EVENT(36, "mc_STOREVn_slow(loop)");
1313       ai     = a + byte_offset_w(szB, bigendian, i);
1314       vbits8 = vbytes & 0xff;
1315       ok     = set_vbits8(ai, vbits8);
1316       if (!ok) n_addrs_bad++;
1317       vbytes >>= 8;
1318    }
1319 
1320    /* If an address error has happened, report it. */
1321    if (n_addrs_bad > 0)
1322       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1323 }
1324 
1325 
1326 /*------------------------------------------------------------*/
1327 /*--- Setting permissions over address ranges.             ---*/
1328 /*------------------------------------------------------------*/
1329 
set_address_range_perms(Addr a,SizeT lenT,UWord vabits16,UWord dsm_num)1330 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1331                                       UWord dsm_num )
1332 {
1333    UWord    sm_off, sm_off16;
1334    UWord    vabits2 = vabits16 & 0x3;
1335    SizeT    lenA, lenB, len_to_next_secmap;
1336    Addr     aNext;
1337    SecMap*  sm;
1338    SecMap** sm_ptr;
1339    SecMap*  example_dsm;
1340 
1341    PROF_EVENT(150, "set_address_range_perms");
1342 
1343    /* Check the V+A bits make sense. */
1344    tl_assert(VA_BITS16_NOACCESS  == vabits16 ||
1345              VA_BITS16_UNDEFINED == vabits16 ||
1346              VA_BITS16_DEFINED   == vabits16);
1347 
1348    // This code should never write PDBs;  ensure this.  (See comment above
1349    // set_vabits2().)
1350    tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1351 
1352    if (lenT == 0)
1353       return;
1354 
1355    if (lenT > 256 * 1024 * 1024) {
1356       if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1357          Char* s = "unknown???";
1358          if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1359          if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1360          if (vabits16 == VA_BITS16_DEFINED  ) s = "defined";
1361          VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1362                                   "large range [0x%lx, 0x%lx) (%s)\n",
1363                                   a, a + lenT, s);
1364       }
1365    }
1366 
1367 #ifndef PERF_FAST_SARP
1368    /*------------------ debug-only case ------------------ */
1369    {
1370       // Endianness doesn't matter here because all bytes are being set to
1371       // the same value.
1372       // Nb: We don't have to worry about updating the sec-V-bits table
1373       // after these set_vabits2() calls because this code never writes
1374       // VA_BITS2_PARTDEFINED values.
1375       SizeT i;
1376       for (i = 0; i < lenT; i++) {
1377          set_vabits2(a + i, vabits2);
1378       }
1379       return;
1380    }
1381 #endif
1382 
1383    /*------------------ standard handling ------------------ */
1384 
1385    /* Get the distinguished secondary that we might want
1386       to use (part of the space-compression scheme). */
1387    example_dsm = &sm_distinguished[dsm_num];
1388 
1389    // We have to handle ranges covering various combinations of partial and
1390    // whole sec-maps.  Here is how parts 1, 2 and 3 are used in each case.
1391    // Cases marked with a '*' are common.
1392    //
1393    //   TYPE                                             PARTS USED
1394    //   ----                                             ----------
1395    // * one partial sec-map                  (p)         1
1396    // - one whole sec-map                    (P)         2
1397    //
1398    // * two partial sec-maps                 (pp)        1,3
1399    // - one partial, one whole sec-map       (pP)        1,2
1400    // - one whole, one partial sec-map       (Pp)        2,3
1401    // - two whole sec-maps                   (PP)        2,2
1402    //
1403    // * one partial, one whole, one partial  (pPp)       1,2,3
1404    // - one partial, two whole               (pPP)       1,2,2
1405    // - two whole, one partial               (PPp)       2,2,3
1406    // - three whole                          (PPP)       2,2,2
1407    //
1408    // * one partial, N-2 whole, one partial  (pP...Pp)   1,2...2,3
1409    // - one partial, N-1 whole               (pP...PP)   1,2...2,2
1410    // - N-1 whole, one partial               (PP...Pp)   2,2...2,3
1411    // - N whole                              (PP...PP)   2,2...2,3
1412 
1413    // Break up total length (lenT) into two parts:  length in the first
1414    // sec-map (lenA), and the rest (lenB);   lenT == lenA + lenB.
1415    aNext = start_of_this_sm(a) + SM_SIZE;
1416    len_to_next_secmap = aNext - a;
1417    if ( lenT <= len_to_next_secmap ) {
1418       // Range entirely within one sec-map.  Covers almost all cases.
1419       PROF_EVENT(151, "set_address_range_perms-single-secmap");
1420       lenA = lenT;
1421       lenB = 0;
1422    } else if (is_start_of_sm(a)) {
1423       // Range spans at least one whole sec-map, and starts at the beginning
1424       // of a sec-map; skip to Part 2.
1425       PROF_EVENT(152, "set_address_range_perms-startof-secmap");
1426       lenA = 0;
1427       lenB = lenT;
1428       goto part2;
1429    } else {
1430       // Range spans two or more sec-maps, first one is partial.
1431       PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
1432       lenA = len_to_next_secmap;
1433       lenB = lenT - lenA;
1434    }
1435 
1436    //------------------------------------------------------------------------
1437    // Part 1: Deal with the first sec_map.  Most of the time the range will be
1438    // entirely within a sec_map and this part alone will suffice.  Also,
1439    // doing it this way lets us avoid repeatedly testing for the crossing of
1440    // a sec-map boundary within these loops.
1441    //------------------------------------------------------------------------
1442 
1443    // If it's distinguished, make it undistinguished if necessary.
1444    sm_ptr = get_secmap_ptr(a);
1445    if (is_distinguished_sm(*sm_ptr)) {
1446       if (*sm_ptr == example_dsm) {
1447          // Sec-map already has the V+A bits that we want, so skip.
1448          PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
1449          a    = aNext;
1450          lenA = 0;
1451       } else {
1452          PROF_EVENT(155, "set_address_range_perms-dist-sm1");
1453          *sm_ptr = copy_for_writing(*sm_ptr);
1454       }
1455    }
1456    sm = *sm_ptr;
1457 
1458    // 1 byte steps
1459    while (True) {
1460       if (VG_IS_8_ALIGNED(a)) break;
1461       if (lenA < 1)           break;
1462       PROF_EVENT(156, "set_address_range_perms-loop1a");
1463       sm_off = SM_OFF(a);
1464       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1465       a    += 1;
1466       lenA -= 1;
1467    }
1468    // 8-aligned, 8 byte steps
1469    while (True) {
1470       if (lenA < 8) break;
1471       PROF_EVENT(157, "set_address_range_perms-loop8a");
1472       sm_off16 = SM_OFF_16(a);
1473       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1474       a    += 8;
1475       lenA -= 8;
1476    }
1477    // 1 byte steps
1478    while (True) {
1479       if (lenA < 1) break;
1480       PROF_EVENT(158, "set_address_range_perms-loop1b");
1481       sm_off = SM_OFF(a);
1482       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1483       a    += 1;
1484       lenA -= 1;
1485    }
1486 
1487    // We've finished the first sec-map.  Is that it?
1488    if (lenB == 0)
1489       return;
1490 
1491    //------------------------------------------------------------------------
1492    // Part 2: Fast-set entire sec-maps at a time.
1493    //------------------------------------------------------------------------
1494   part2:
1495    // 64KB-aligned, 64KB steps.
1496    // Nb: we can reach here with lenB < SM_SIZE
1497    tl_assert(0 == lenA);
1498    while (True) {
1499       if (lenB < SM_SIZE) break;
1500       tl_assert(is_start_of_sm(a));
1501       PROF_EVENT(159, "set_address_range_perms-loop64K");
1502       sm_ptr = get_secmap_ptr(a);
1503       if (!is_distinguished_sm(*sm_ptr)) {
1504          PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
1505          // Free the non-distinguished sec-map that we're replacing.  This
1506          // case happens moderately often, enough to be worthwhile.
1507          VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1508       }
1509       update_SM_counts(*sm_ptr, example_dsm);
1510       // Make the sec-map entry point to the example DSM
1511       *sm_ptr = example_dsm;
1512       lenB -= SM_SIZE;
1513       a    += SM_SIZE;
1514    }
1515 
1516    // We've finished the whole sec-maps.  Is that it?
1517    if (lenB == 0)
1518       return;
1519 
1520    //------------------------------------------------------------------------
1521    // Part 3: Finish off the final partial sec-map, if necessary.
1522    //------------------------------------------------------------------------
1523 
1524    tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1525 
1526    // If it's distinguished, make it undistinguished if necessary.
1527    sm_ptr = get_secmap_ptr(a);
1528    if (is_distinguished_sm(*sm_ptr)) {
1529       if (*sm_ptr == example_dsm) {
1530          // Sec-map already has the V+A bits that we want, so stop.
1531          PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
1532          return;
1533       } else {
1534          PROF_EVENT(162, "set_address_range_perms-dist-sm2");
1535          *sm_ptr = copy_for_writing(*sm_ptr);
1536       }
1537    }
1538    sm = *sm_ptr;
1539 
1540    // 8-aligned, 8 byte steps
1541    while (True) {
1542       if (lenB < 8) break;
1543       PROF_EVENT(163, "set_address_range_perms-loop8b");
1544       sm_off16 = SM_OFF_16(a);
1545       ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1546       a    += 8;
1547       lenB -= 8;
1548    }
1549    // 1 byte steps
1550    while (True) {
1551       if (lenB < 1) return;
1552       PROF_EVENT(164, "set_address_range_perms-loop1c");
1553       sm_off = SM_OFF(a);
1554       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1555       a    += 1;
1556       lenB -= 1;
1557    }
1558 }
1559 
1560 
1561 /* --- Set permissions for arbitrary address ranges --- */
1562 
MC_(make_mem_noaccess)1563 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1564 {
1565    PROF_EVENT(40, "MC_(make_mem_noaccess)");
1566    DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1567    set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1568    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1569       ocache_sarp_Clear_Origins ( a, len );
1570 }
1571 
make_mem_undefined(Addr a,SizeT len)1572 static void make_mem_undefined ( Addr a, SizeT len )
1573 {
1574    PROF_EVENT(41, "make_mem_undefined");
1575    DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1576    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1577 }
1578 
MC_(make_mem_undefined_w_otag)1579 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1580 {
1581    PROF_EVENT(41, "MC_(make_mem_undefined)");
1582    DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1583    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1584    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1585       ocache_sarp_Set_Origins ( a, len, otag );
1586 }
1587 
1588 static
make_mem_undefined_w_tid_and_okind(Addr a,SizeT len,ThreadId tid,UInt okind)1589 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1590                                           ThreadId tid, UInt okind )
1591 {
1592    UInt        ecu;
1593    ExeContext* here;
1594    /* VG_(record_ExeContext) checks for validity of tid, and asserts
1595       if it is invalid.  So no need to do it here. */
1596    tl_assert(okind <= 3);
1597    here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1598    tl_assert(here);
1599    ecu = VG_(get_ECU_from_ExeContext)(here);
1600    tl_assert(VG_(is_plausible_ECU)(ecu));
1601    MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1602 }
1603 
1604 static
make_mem_undefined_w_tid(Addr a,SizeT len,ThreadId tid)1605 void make_mem_undefined_w_tid ( Addr a, SizeT len, ThreadId tid ) {
1606    make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1607 }
1608 
1609 
MC_(make_mem_defined)1610 void MC_(make_mem_defined) ( Addr a, SizeT len )
1611 {
1612    PROF_EVENT(42, "MC_(make_mem_defined)");
1613    DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1614    set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1615    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1616       ocache_sarp_Clear_Origins ( a, len );
1617 }
1618 
1619 /* For each byte in [a,a+len), if the byte is addressable, make it be
1620    defined, but if it isn't addressible, leave it alone.  In other
1621    words a version of MC_(make_mem_defined) that doesn't mess with
1622    addressibility.  Low-performance implementation. */
make_mem_defined_if_addressable(Addr a,SizeT len)1623 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1624 {
1625    SizeT i;
1626    UChar vabits2;
1627    DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1628    for (i = 0; i < len; i++) {
1629       vabits2 = get_vabits2( a+i );
1630       if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1631          set_vabits2(a+i, VA_BITS2_DEFINED);
1632          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1633             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1634          }
1635       }
1636    }
1637 }
1638 
1639 /* Similarly (needed for mprotect handling ..) */
make_mem_defined_if_noaccess(Addr a,SizeT len)1640 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1641 {
1642    SizeT i;
1643    UChar vabits2;
1644    DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1645    for (i = 0; i < len; i++) {
1646       vabits2 = get_vabits2( a+i );
1647       if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1648          set_vabits2(a+i, VA_BITS2_DEFINED);
1649          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1650             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1651          }
1652       }
1653    }
1654 }
1655 
1656 /* --- Block-copy permissions (needed for implementing realloc() and
1657        sys_mremap). --- */
1658 
MC_(copy_address_range_state)1659 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
1660 {
1661    SizeT i, j;
1662    UChar vabits2, vabits8;
1663    Bool  aligned, nooverlap;
1664 
1665    DEBUG("MC_(copy_address_range_state)\n");
1666    PROF_EVENT(50, "MC_(copy_address_range_state)");
1667 
1668    if (len == 0 || src == dst)
1669       return;
1670 
1671    aligned   = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
1672    nooverlap = src+len <= dst || dst+len <= src;
1673 
1674    if (nooverlap && aligned) {
1675 
1676       /* Vectorised fast case, when no overlap and suitably aligned */
1677       /* vector loop */
1678       i = 0;
1679       while (len >= 4) {
1680          vabits8 = get_vabits8_for_aligned_word32( src+i );
1681          set_vabits8_for_aligned_word32( dst+i, vabits8 );
1682          if (LIKELY(VA_BITS8_DEFINED == vabits8
1683                             || VA_BITS8_UNDEFINED == vabits8
1684                             || VA_BITS8_NOACCESS == vabits8)) {
1685             /* do nothing */
1686          } else {
1687             /* have to copy secondary map info */
1688             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
1689                set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
1690             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
1691                set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
1692             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
1693                set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
1694             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
1695                set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
1696          }
1697          i += 4;
1698          len -= 4;
1699       }
1700       /* fixup loop */
1701       while (len >= 1) {
1702          vabits2 = get_vabits2( src+i );
1703          set_vabits2( dst+i, vabits2 );
1704          if (VA_BITS2_PARTDEFINED == vabits2) {
1705             set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1706          }
1707          i++;
1708          len--;
1709       }
1710 
1711    } else {
1712 
1713       /* We have to do things the slow way */
1714       if (src < dst) {
1715          for (i = 0, j = len-1; i < len; i++, j--) {
1716             PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
1717             vabits2 = get_vabits2( src+j );
1718             set_vabits2( dst+j, vabits2 );
1719             if (VA_BITS2_PARTDEFINED == vabits2) {
1720                set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
1721             }
1722          }
1723       }
1724 
1725       if (src > dst) {
1726          for (i = 0; i < len; i++) {
1727             PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
1728             vabits2 = get_vabits2( src+i );
1729             set_vabits2( dst+i, vabits2 );
1730             if (VA_BITS2_PARTDEFINED == vabits2) {
1731                set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1732             }
1733          }
1734       }
1735    }
1736 
1737 }
1738 
1739 
1740 /*------------------------------------------------------------*/
1741 /*--- Origin tracking stuff - cache basics                 ---*/
1742 /*------------------------------------------------------------*/
1743 
1744 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
1745    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1746 
1747    Note that this implementation draws inspiration from the "origin
1748    tracking by value piggybacking" scheme described in "Tracking Bad
1749    Apples: Reporting the Origin of Null and Undefined Value Errors"
1750    (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
1751    Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
1752    implemented completely differently.
1753 
1754    Origin tags and ECUs -- about the shadow values
1755    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1756 
1757    This implementation tracks the defining point of all uninitialised
1758    values using so called "origin tags", which are 32-bit integers,
1759    rather than using the values themselves to encode the origins.  The
1760    latter, so-called value piggybacking", is what the OOPSLA07 paper
1761    describes.
1762 
1763    Origin tags, as tracked by the machinery below, are 32-bit unsigned
1764    ints (UInts), regardless of the machine's word size.  Each tag
1765    comprises an upper 30-bit ECU field and a lower 2-bit
1766    'kind' field.  The ECU field is a number given out by m_execontext
1767    and has a 1-1 mapping with ExeContext*s.  An ECU can be used
1768    directly as an origin tag (otag), but in fact we want to put
1769    additional information 'kind' field to indicate roughly where the
1770    tag came from.  This helps print more understandable error messages
1771    for the user -- it has no other purpose.  In summary:
1772 
1773    * Both ECUs and origin tags are represented as 32-bit words
1774 
1775    * m_execontext and the core-tool interface deal purely in ECUs.
1776      They have no knowledge of origin tags - that is a purely
1777      Memcheck-internal matter.
1778 
1779    * all valid ECUs have the lowest 2 bits zero and at least
1780      one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
1781 
1782    * to convert from an ECU to an otag, OR in one of the MC_OKIND_
1783      constants defined in mc_include.h.
1784 
1785    * to convert an otag back to an ECU, AND it with ~3
1786 
1787    One important fact is that no valid otag is zero.  A zero otag is
1788    used by the implementation to indicate "no origin", which could
1789    mean that either the value is defined, or it is undefined but the
1790    implementation somehow managed to lose the origin.
1791 
1792    The ECU used for memory created by malloc etc is derived from the
1793    stack trace at the time the malloc etc happens.  This means the
1794    mechanism can show the exact allocation point for heap-created
1795    uninitialised values.
1796 
1797    In contrast, it is simply too expensive to create a complete
1798    backtrace for each stack allocation.  Therefore we merely use a
1799    depth-1 backtrace for stack allocations, which can be done once at
1800    translation time, rather than N times at run time.  The result of
1801    this is that, for stack created uninitialised values, Memcheck can
1802    only show the allocating function, and not what called it.
1803    Furthermore, compilers tend to move the stack pointer just once at
1804    the start of the function, to allocate all locals, and so in fact
1805    the stack origin almost always simply points to the opening brace
1806    of the function.  Net result is, for stack origins, the mechanism
1807    can tell you in which function the undefined value was created, but
1808    that's all.  Users will need to carefully check all locals in the
1809    specified function.
1810 
1811    Shadowing registers and memory
1812    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1813 
1814    Memory is shadowed using a two level cache structure (ocacheL1 and
1815    ocacheL2).  Memory references are first directed to ocacheL1.  This
1816    is a traditional 2-way set associative cache with 32-byte lines and
1817    approximate LRU replacement within each set.
1818 
1819    A naive implementation would require storing one 32 bit otag for
1820    each byte of memory covered, a 4:1 space overhead.  Instead, there
1821    is one otag for every 4 bytes of memory covered, plus a 4-bit mask
1822    that shows which of the 4 bytes have that shadow value and which
1823    have a shadow value of zero (indicating no origin).  Hence a lot of
1824    space is saved, but the cost is that only one different origin per
1825    4 bytes of address space can be represented.  This is a source of
1826    imprecision, but how much of a problem it really is remains to be
1827    seen.
1828 
1829    A cache line that contains all zeroes ("no origins") contains no
1830    useful information, and can be ejected from the L1 cache "for
1831    free", in the sense that a read miss on the L1 causes a line of
1832    zeroes to be installed.  However, ejecting a line containing
1833    nonzeroes risks losing origin information permanently.  In order to
1834    prevent such lossage, ejected nonzero lines are placed in a
1835    secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
1836    lines.  This can grow arbitrarily large, and so should ensure that
1837    Memcheck runs out of memory in preference to losing useful origin
1838    info due to cache size limitations.
1839 
1840    Shadowing registers is a bit tricky, because the shadow values are
1841    32 bits, regardless of the size of the register.  That gives a
1842    problem for registers smaller than 32 bits.  The solution is to
1843    find spaces in the guest state that are unused, and use those to
1844    shadow guest state fragments smaller than 32 bits.  For example, on
1845    ppc32/64, each vector register is 16 bytes long.  If 4 bytes of the
1846    shadow are allocated for the register's otag, then there are still
1847    12 bytes left over which could be used to shadow 3 other values.
1848 
1849    This implies there is some non-obvious mapping from guest state
1850    (start,length) pairs to the relevant shadow offset (for the origin
1851    tags).  And it is unfortunately guest-architecture specific.  The
1852    mapping is contained in mc_machine.c, which is quite lengthy but
1853    straightforward.
1854 
1855    Instrumenting the IR
1856    ~~~~~~~~~~~~~~~~~~~~
1857 
1858    Instrumentation is largely straightforward, and done by the
1859    functions schemeE and schemeS in mc_translate.c.  These generate
1860    code for handling the origin tags of expressions (E) and statements
1861    (S) respectively.  The rather strange names are a reference to the
1862    "compilation schemes" shown in Simon Peyton Jones' book "The
1863    Implementation of Functional Programming Languages" (Prentice Hall,
1864    1987, see
1865    http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
1866 
1867    schemeS merely arranges to move shadow values around the guest
1868    state to track the incoming IR.  schemeE is largely trivial too.
1869    The only significant point is how to compute the otag corresponding
1870    to binary (or ternary, quaternary, etc) operator applications.  The
1871    rule is simple: just take whichever value is larger (32-bit
1872    unsigned max).  Constants get the special value zero.  Hence this
1873    rule always propagates a nonzero (known) otag in preference to a
1874    zero (unknown, or more likely, value-is-defined) tag, as we want.
1875    If two different undefined values are inputs to a binary operator
1876    application, then which is propagated is arbitrary, but that
1877    doesn't matter, since the program is erroneous in using either of
1878    the values, and so there's no point in attempting to propagate
1879    both.
1880 
1881    Since constants are abstracted to (otag) zero, much of the
1882    instrumentation code can be folded out without difficulty by the
1883    generic post-instrumentation IR cleanup pass, using these rules:
1884    Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
1885    constants is evaluated at JIT time.  And the resulting dead code
1886    removal.  In practice this causes surprisingly few Max32Us to
1887    survive through to backend code generation.
1888 
1889    Integration with the V-bits machinery
1890    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1891 
1892    This is again largely straightforward.  Mostly the otag and V bits
1893    stuff are independent.  The only point of interaction is when the V
1894    bits instrumenter creates a call to a helper function to report an
1895    uninitialised value error -- in that case it must first use schemeE
1896    to get hold of the origin tag expression for the value, and pass
1897    that to the helper too.
1898 
1899    There is the usual stuff to do with setting address range
1900    permissions.  When memory is painted undefined, we must also know
1901    the origin tag to paint with, which involves some tedious plumbing,
1902    particularly to do with the fast case stack handlers.  When memory
1903    is painted defined or noaccess then the origin tags must be forced
1904    to zero.
1905 
1906    One of the goals of the implementation was to ensure that the
1907    non-origin tracking mode isn't slowed down at all.  To do this,
1908    various functions to do with memory permissions setting (again,
1909    mostly pertaining to the stack) are duplicated for the with- and
1910    without-otag case.
1911 
1912    Dealing with stack redzones, and the NIA cache
1913    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1914 
1915    This is one of the few non-obvious parts of the implementation.
1916 
1917    Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
1918    reserved area below the stack pointer, that can be used as scratch
1919    space by compiler generated code for functions.  In the Memcheck
1920    sources this is referred to as the "stack redzone".  The important
1921    thing here is that such redzones are considered volatile across
1922    function calls and returns.  So Memcheck takes care to mark them as
1923    undefined for each call and return, on the afflicted platforms.
1924    Past experience shows this is essential in order to get reliable
1925    messages about uninitialised values that come from the stack.
1926 
1927    So the question is, when we paint a redzone undefined, what origin
1928    tag should we use for it?  Consider a function f() calling g().  If
1929    we paint the redzone using an otag derived from the ExeContext of
1930    the CALL/BL instruction in f, then any errors in g causing it to
1931    use uninitialised values that happen to lie in the redzone, will be
1932    reported as having their origin in f.  Which is highly confusing.
1933 
1934    The same applies for returns: if, on a return, we paint the redzone
1935    using a origin tag derived from the ExeContext of the RET/BLR
1936    instruction in g, then any later errors in f causing it to use
1937    uninitialised values in the redzone, will be reported as having
1938    their origin in g.  Which is just as confusing.
1939 
1940    To do it right, in both cases we need to use an origin tag which
1941    pertains to the instruction which dynamically follows the CALL/BL
1942    or RET/BLR.  In short, one derived from the NIA - the "next
1943    instruction address".
1944 
1945    To make this work, Memcheck's redzone-painting helper,
1946    MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
1947    NIA.  It converts the NIA to a 1-element ExeContext, and uses that
1948    ExeContext's ECU as the basis for the otag used to paint the
1949    redzone.  The expensive part of this is converting an NIA into an
1950    ECU, since this happens once for every call and every return.  So
1951    we use a simple 511-line, 2-way set associative cache
1952    (nia_to_ecu_cache) to cache the mappings, and that knocks most of
1953    the cost out.
1954 
1955    Further background comments
1956    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1957 
1958    > Question: why is otag a UInt?  Wouldn't a UWord be better?  Isn't
1959    > it really just the address of the relevant ExeContext?
1960 
1961    Well, it's not the address, but a value which has a 1-1 mapping
1962    with ExeContexts, and is guaranteed not to be zero, since zero
1963    denotes (to memcheck) "unknown origin or defined value".  So these
1964    UInts are just numbers starting at 4 and incrementing by 4; each
1965    ExeContext is given a number when it is created.  (*** NOTE this
1966    confuses otags and ECUs; see comments above ***).
1967 
1968    Making these otags 32-bit regardless of the machine's word size
1969    makes the 64-bit implementation easier (next para).  And it doesn't
1970    really limit us in any way, since for the tags to overflow would
1971    require that the program somehow caused 2^30-1 different
1972    ExeContexts to be created, in which case it is probably in deep
1973    trouble.  Not to mention V will have soaked up many tens of
1974    gigabytes of memory merely to store them all.
1975 
1976    So having 64-bit origins doesn't really buy you anything, and has
1977    the following downsides:
1978 
1979    Suppose that instead, an otag is a UWord.  This would mean that, on
1980    a 64-bit target,
1981 
1982    1. It becomes hard to shadow any element of guest state which is
1983       smaller than 8 bytes.  To do so means you'd need to find some
1984       8-byte-sized hole in the guest state which you don't want to
1985       shadow, and use that instead to hold the otag.  On ppc64, the
1986       condition code register(s) are split into 20 UChar sized pieces,
1987       all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
1988       and so that would entail finding 160 bytes somewhere else in the
1989       guest state.
1990 
1991       Even on x86, I want to track origins for %AH .. %DH (bits 15:8
1992       of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
1993       same) and so I had to look for 4 untracked otag-sized areas in
1994       the guest state to make that possible.
1995 
1996       The same problem exists of course when origin tags are only 32
1997       bits, but it's less extreme.
1998 
1999    2. (More compelling) it doubles the size of the origin shadow
2000       memory.  Given that the shadow memory is organised as a fixed
2001       size cache, and that accuracy of tracking is limited by origins
2002       falling out the cache due to space conflicts, this isn't good.
2003 
2004    > Another question: is the origin tracking perfect, or are there
2005    > cases where it fails to determine an origin?
2006 
2007    It is imperfect for at least for the following reasons, and
2008    probably more:
2009 
2010    * Insufficient capacity in the origin cache.  When a line is
2011      evicted from the cache it is gone forever, and so subsequent
2012      queries for the line produce zero, indicating no origin
2013      information.  Interestingly, a line containing all zeroes can be
2014      evicted "free" from the cache, since it contains no useful
2015      information, so there is scope perhaps for some cleverer cache
2016      management schemes.  (*** NOTE, with the introduction of the
2017      second level origin tag cache, ocacheL2, this is no longer a
2018      problem. ***)
2019 
2020    * The origin cache only stores one otag per 32-bits of address
2021      space, plus 4 bits indicating which of the 4 bytes has that tag
2022      and which are considered defined.  The result is that if two
2023      undefined bytes in the same word are stored in memory, the first
2024      stored byte's origin will be lost and replaced by the origin for
2025      the second byte.
2026 
2027    * Nonzero origin tags for defined values.  Consider a binary
2028      operator application op(x,y).  Suppose y is undefined (and so has
2029      a valid nonzero origin tag), and x is defined, but erroneously
2030      has a nonzero origin tag (defined values should have tag zero).
2031      If the erroneous tag has a numeric value greater than y's tag,
2032      then the rule for propagating origin tags though binary
2033      operations, which is simply to take the unsigned max of the two
2034      tags, will erroneously propagate x's tag rather than y's.
2035 
2036    * Some obscure uses of x86/amd64 byte registers can cause lossage
2037      or confusion of origins.  %AH .. %DH are treated as different
2038      from, and unrelated to, their parent registers, %EAX .. %EDX.
2039      So some wierd sequences like
2040 
2041         movb undefined-value, %AH
2042         movb defined-value, %AL
2043         .. use %AX or %EAX ..
2044 
2045      will cause the origin attributed to %AH to be ignored, since %AL,
2046      %AX, %EAX are treated as the same register, and %AH as a
2047      completely separate one.
2048 
2049    But having said all that, it actually seems to work fairly well in
2050    practice.
2051 */
2052 
2053 static UWord stats_ocacheL1_find           = 0;
2054 static UWord stats_ocacheL1_found_at_1     = 0;
2055 static UWord stats_ocacheL1_found_at_N     = 0;
2056 static UWord stats_ocacheL1_misses         = 0;
2057 static UWord stats_ocacheL1_lossage        = 0;
2058 static UWord stats_ocacheL1_movefwds       = 0;
2059 
2060 static UWord stats__ocacheL2_refs          = 0;
2061 static UWord stats__ocacheL2_misses        = 0;
2062 static UWord stats__ocacheL2_n_nodes_max   = 0;
2063 
2064 /* Cache of 32-bit values, one every 32 bits of address space */
2065 
2066 #define OC_BITS_PER_LINE 5
2067 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2068 
oc_line_offset(Addr a)2069 static INLINE UWord oc_line_offset ( Addr a ) {
2070    return (a >> 2) & (OC_W32S_PER_LINE - 1);
2071 }
is_valid_oc_tag(Addr tag)2072 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2073    return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2074 }
2075 
2076 #define OC_LINES_PER_SET 2
2077 
2078 #define OC_N_SET_BITS    20
2079 #define OC_N_SETS        (1 << OC_N_SET_BITS)
2080 
2081 /* These settings give:
2082    64 bit host: ocache:  100,663,296 sizeB    67,108,864 useful
2083    32 bit host: ocache:   92,274,688 sizeB    67,108,864 useful
2084 */
2085 
2086 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2087 
2088 
2089 typedef
2090    struct {
2091       Addr  tag;
2092       UInt  w32[OC_W32S_PER_LINE];
2093       UChar descr[OC_W32S_PER_LINE];
2094    }
2095    OCacheLine;
2096 
2097 /* Classify and also sanity-check 'line'.  Return 'e' (empty) if not
2098    in use, 'n' (nonzero) if it contains at least one valid origin tag,
2099    and 'z' if all the represented tags are zero. */
classify_OCacheLine(OCacheLine * line)2100 static UChar classify_OCacheLine ( OCacheLine* line )
2101 {
2102    UWord i;
2103    if (line->tag == 1/*invalid*/)
2104       return 'e'; /* EMPTY */
2105    tl_assert(is_valid_oc_tag(line->tag));
2106    for (i = 0; i < OC_W32S_PER_LINE; i++) {
2107       tl_assert(0 == ((~0xF) & line->descr[i]));
2108       if (line->w32[i] > 0 && line->descr[i] > 0)
2109          return 'n'; /* NONZERO - contains useful info */
2110    }
2111    return 'z'; /* ZERO - no useful info */
2112 }
2113 
2114 typedef
2115    struct {
2116       OCacheLine line[OC_LINES_PER_SET];
2117    }
2118    OCacheSet;
2119 
2120 typedef
2121    struct {
2122       OCacheSet set[OC_N_SETS];
2123    }
2124    OCache;
2125 
2126 static OCache* ocacheL1 = NULL;
2127 static UWord   ocacheL1_event_ctr = 0;
2128 
2129 static void init_ocacheL2 ( void ); /* fwds */
init_OCache(void)2130 static void init_OCache ( void )
2131 {
2132    UWord line, set;
2133    tl_assert(MC_(clo_mc_level) >= 3);
2134    tl_assert(ocacheL1 == NULL);
2135    ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2136    if (ocacheL1 == NULL) {
2137       VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2138                                    sizeof(OCache) );
2139    }
2140    tl_assert(ocacheL1 != NULL);
2141    for (set = 0; set < OC_N_SETS; set++) {
2142       for (line = 0; line < OC_LINES_PER_SET; line++) {
2143          ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2144       }
2145    }
2146    init_ocacheL2();
2147 }
2148 
moveLineForwards(OCacheSet * set,UWord lineno)2149 static void moveLineForwards ( OCacheSet* set, UWord lineno )
2150 {
2151    OCacheLine tmp;
2152    stats_ocacheL1_movefwds++;
2153    tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2154    tmp = set->line[lineno-1];
2155    set->line[lineno-1] = set->line[lineno];
2156    set->line[lineno] = tmp;
2157 }
2158 
zeroise_OCacheLine(OCacheLine * line,Addr tag)2159 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2160    UWord i;
2161    for (i = 0; i < OC_W32S_PER_LINE; i++) {
2162       line->w32[i] = 0; /* NO ORIGIN */
2163       line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2164    }
2165    line->tag = tag;
2166 }
2167 
2168 //////////////////////////////////////////////////////////////
2169 //// OCache backing store
2170 
2171 static OSet* ocacheL2 = NULL;
2172 
ocacheL2_malloc(HChar * cc,SizeT szB)2173 static void* ocacheL2_malloc ( HChar* cc, SizeT szB ) {
2174    return VG_(malloc)(cc, szB);
2175 }
ocacheL2_free(void * v)2176 static void ocacheL2_free ( void* v ) {
2177    VG_(free)( v );
2178 }
2179 
2180 /* Stats: # nodes currently in tree */
2181 static UWord stats__ocacheL2_n_nodes = 0;
2182 
init_ocacheL2(void)2183 static void init_ocacheL2 ( void )
2184 {
2185    tl_assert(!ocacheL2);
2186    tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2187    tl_assert(0 == offsetof(OCacheLine,tag));
2188    ocacheL2
2189       = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2190                              NULL, /* fast cmp */
2191                              ocacheL2_malloc, "mc.ioL2", ocacheL2_free );
2192    tl_assert(ocacheL2);
2193    stats__ocacheL2_n_nodes = 0;
2194 }
2195 
2196 /* Find line with the given tag in the tree, or NULL if not found. */
ocacheL2_find_tag(Addr tag)2197 static OCacheLine* ocacheL2_find_tag ( Addr tag )
2198 {
2199    OCacheLine* line;
2200    tl_assert(is_valid_oc_tag(tag));
2201    stats__ocacheL2_refs++;
2202    line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2203    return line;
2204 }
2205 
2206 /* Delete the line with the given tag from the tree, if it is present, and
2207    free up the associated memory. */
ocacheL2_del_tag(Addr tag)2208 static void ocacheL2_del_tag ( Addr tag )
2209 {
2210    OCacheLine* line;
2211    tl_assert(is_valid_oc_tag(tag));
2212    stats__ocacheL2_refs++;
2213    line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2214    if (line) {
2215       VG_(OSetGen_FreeNode)(ocacheL2, line);
2216       tl_assert(stats__ocacheL2_n_nodes > 0);
2217       stats__ocacheL2_n_nodes--;
2218    }
2219 }
2220 
2221 /* Add a copy of the given line to the tree.  It must not already be
2222    present. */
ocacheL2_add_line(OCacheLine * line)2223 static void ocacheL2_add_line ( OCacheLine* line )
2224 {
2225    OCacheLine* copy;
2226    tl_assert(is_valid_oc_tag(line->tag));
2227    copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2228    tl_assert(copy);
2229    *copy = *line;
2230    stats__ocacheL2_refs++;
2231    VG_(OSetGen_Insert)( ocacheL2, copy );
2232    stats__ocacheL2_n_nodes++;
2233    if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2234       stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2235 }
2236 
2237 ////
2238 //////////////////////////////////////////////////////////////
2239 
2240 __attribute__((noinline))
find_OCacheLine_SLOW(Addr a)2241 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2242 {
2243    OCacheLine *victim, *inL2;
2244    UChar c;
2245    UWord line;
2246    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2247    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2248    UWord tag     = a & tagmask;
2249    tl_assert(setno >= 0 && setno < OC_N_SETS);
2250 
2251    /* we already tried line == 0; skip therefore. */
2252    for (line = 1; line < OC_LINES_PER_SET; line++) {
2253       if (ocacheL1->set[setno].line[line].tag == tag) {
2254          if (line == 1) {
2255             stats_ocacheL1_found_at_1++;
2256          } else {
2257             stats_ocacheL1_found_at_N++;
2258          }
2259          if (UNLIKELY(0 == (ocacheL1_event_ctr++
2260                             & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2261             moveLineForwards( &ocacheL1->set[setno], line );
2262             line--;
2263          }
2264          return &ocacheL1->set[setno].line[line];
2265       }
2266    }
2267 
2268    /* A miss.  Use the last slot.  Implicitly this means we're
2269       ejecting the line in the last slot. */
2270    stats_ocacheL1_misses++;
2271    tl_assert(line == OC_LINES_PER_SET);
2272    line--;
2273    tl_assert(line > 0);
2274 
2275    /* First, move the to-be-ejected line to the L2 cache. */
2276    victim = &ocacheL1->set[setno].line[line];
2277    c = classify_OCacheLine(victim);
2278    switch (c) {
2279       case 'e':
2280          /* the line is empty (has invalid tag); ignore it. */
2281          break;
2282       case 'z':
2283          /* line contains zeroes.  We must ensure the backing store is
2284             updated accordingly, either by copying the line there
2285             verbatim, or by ensuring it isn't present there.  We
2286             chosse the latter on the basis that it reduces the size of
2287             the backing store. */
2288          ocacheL2_del_tag( victim->tag );
2289          break;
2290       case 'n':
2291          /* line contains at least one real, useful origin.  Copy it
2292             to the backing store. */
2293          stats_ocacheL1_lossage++;
2294          inL2 = ocacheL2_find_tag( victim->tag );
2295          if (inL2) {
2296             *inL2 = *victim;
2297          } else {
2298             ocacheL2_add_line( victim );
2299          }
2300          break;
2301       default:
2302          tl_assert(0);
2303    }
2304 
2305    /* Now we must reload the L1 cache from the backing tree, if
2306       possible. */
2307    tl_assert(tag != victim->tag); /* stay sane */
2308    inL2 = ocacheL2_find_tag( tag );
2309    if (inL2) {
2310       /* We're in luck.  It's in the L2. */
2311       ocacheL1->set[setno].line[line] = *inL2;
2312    } else {
2313       /* Missed at both levels of the cache hierarchy.  We have to
2314          declare it as full of zeroes (unknown origins). */
2315       stats__ocacheL2_misses++;
2316       zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2317    }
2318 
2319    /* Move it one forwards */
2320    moveLineForwards( &ocacheL1->set[setno], line );
2321    line--;
2322 
2323    return &ocacheL1->set[setno].line[line];
2324 }
2325 
find_OCacheLine(Addr a)2326 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2327 {
2328    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2329    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2330    UWord tag     = a & tagmask;
2331 
2332    stats_ocacheL1_find++;
2333 
2334    if (OC_ENABLE_ASSERTIONS) {
2335       tl_assert(setno >= 0 && setno < OC_N_SETS);
2336       tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2337    }
2338 
2339    if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2340       return &ocacheL1->set[setno].line[0];
2341    }
2342 
2343    return find_OCacheLine_SLOW( a );
2344 }
2345 
set_aligned_word64_Origin_to_undef(Addr a,UInt otag)2346 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2347 {
2348    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2349    //// Set the origins for a+0 .. a+7
2350    { OCacheLine* line;
2351      UWord lineoff = oc_line_offset(a);
2352      if (OC_ENABLE_ASSERTIONS) {
2353         tl_assert(lineoff >= 0
2354                   && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2355      }
2356      line = find_OCacheLine( a );
2357      line->descr[lineoff+0] = 0xF;
2358      line->descr[lineoff+1] = 0xF;
2359      line->w32[lineoff+0]   = otag;
2360      line->w32[lineoff+1]   = otag;
2361    }
2362    //// END inlined, specialised version of MC_(helperc_b_store8)
2363 }
2364 
2365 
2366 /*------------------------------------------------------------*/
2367 /*--- Aligned fast case permission setters,                ---*/
2368 /*--- for dealing with stacks                              ---*/
2369 /*------------------------------------------------------------*/
2370 
2371 /*--------------------- 32-bit ---------------------*/
2372 
2373 /* Nb: by "aligned" here we mean 4-byte aligned */
2374 
make_aligned_word32_undefined(Addr a)2375 static INLINE void make_aligned_word32_undefined ( Addr a )
2376 {
2377    PROF_EVENT(300, "make_aligned_word32_undefined");
2378 
2379 #ifndef PERF_FAST_STACK2
2380    make_mem_undefined(a, 4);
2381 #else
2382    {
2383       UWord   sm_off;
2384       SecMap* sm;
2385 
2386       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2387          PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
2388          make_mem_undefined(a, 4);
2389          return;
2390       }
2391 
2392       sm                  = get_secmap_for_writing_low(a);
2393       sm_off              = SM_OFF(a);
2394       sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2395    }
2396 #endif
2397 }
2398 
2399 static INLINE
make_aligned_word32_undefined_w_otag(Addr a,UInt otag)2400 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2401 {
2402    make_aligned_word32_undefined(a);
2403    //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2404    //// Set the origins for a+0 .. a+3
2405    { OCacheLine* line;
2406      UWord lineoff = oc_line_offset(a);
2407      if (OC_ENABLE_ASSERTIONS) {
2408         tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2409      }
2410      line = find_OCacheLine( a );
2411      line->descr[lineoff] = 0xF;
2412      line->w32[lineoff]   = otag;
2413    }
2414    //// END inlined, specialised version of MC_(helperc_b_store4)
2415 }
2416 
2417 static INLINE
make_aligned_word32_noaccess(Addr a)2418 void make_aligned_word32_noaccess ( Addr a )
2419 {
2420    PROF_EVENT(310, "make_aligned_word32_noaccess");
2421 
2422 #ifndef PERF_FAST_STACK2
2423    MC_(make_mem_noaccess)(a, 4);
2424 #else
2425    {
2426       UWord   sm_off;
2427       SecMap* sm;
2428 
2429       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2430          PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
2431          MC_(make_mem_noaccess)(a, 4);
2432          return;
2433       }
2434 
2435       sm                  = get_secmap_for_writing_low(a);
2436       sm_off              = SM_OFF(a);
2437       sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2438 
2439       //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2440       //// Set the origins for a+0 .. a+3.
2441       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2442          OCacheLine* line;
2443          UWord lineoff = oc_line_offset(a);
2444          if (OC_ENABLE_ASSERTIONS) {
2445             tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2446          }
2447          line = find_OCacheLine( a );
2448          line->descr[lineoff] = 0;
2449       }
2450       //// END inlined, specialised version of MC_(helperc_b_store4)
2451    }
2452 #endif
2453 }
2454 
2455 /*--------------------- 64-bit ---------------------*/
2456 
2457 /* Nb: by "aligned" here we mean 8-byte aligned */
2458 
make_aligned_word64_undefined(Addr a)2459 static INLINE void make_aligned_word64_undefined ( Addr a )
2460 {
2461    PROF_EVENT(320, "make_aligned_word64_undefined");
2462 
2463 #ifndef PERF_FAST_STACK2
2464    make_mem_undefined(a, 8);
2465 #else
2466    {
2467       UWord   sm_off16;
2468       SecMap* sm;
2469 
2470       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2471          PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
2472          make_mem_undefined(a, 8);
2473          return;
2474       }
2475 
2476       sm       = get_secmap_for_writing_low(a);
2477       sm_off16 = SM_OFF_16(a);
2478       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
2479    }
2480 #endif
2481 }
2482 
2483 static INLINE
make_aligned_word64_undefined_w_otag(Addr a,UInt otag)2484 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2485 {
2486    make_aligned_word64_undefined(a);
2487    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2488    //// Set the origins for a+0 .. a+7
2489    { OCacheLine* line;
2490      UWord lineoff = oc_line_offset(a);
2491      tl_assert(lineoff >= 0
2492                && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2493      line = find_OCacheLine( a );
2494      line->descr[lineoff+0] = 0xF;
2495      line->descr[lineoff+1] = 0xF;
2496      line->w32[lineoff+0]   = otag;
2497      line->w32[lineoff+1]   = otag;
2498    }
2499    //// END inlined, specialised version of MC_(helperc_b_store8)
2500 }
2501 
2502 static INLINE
make_aligned_word64_noaccess(Addr a)2503 void make_aligned_word64_noaccess ( Addr a )
2504 {
2505    PROF_EVENT(330, "make_aligned_word64_noaccess");
2506 
2507 #ifndef PERF_FAST_STACK2
2508    MC_(make_mem_noaccess)(a, 8);
2509 #else
2510    {
2511       UWord   sm_off16;
2512       SecMap* sm;
2513 
2514       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2515          PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
2516          MC_(make_mem_noaccess)(a, 8);
2517          return;
2518       }
2519 
2520       sm       = get_secmap_for_writing_low(a);
2521       sm_off16 = SM_OFF_16(a);
2522       ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
2523 
2524       //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2525       //// Clear the origins for a+0 .. a+7.
2526       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2527          OCacheLine* line;
2528          UWord lineoff = oc_line_offset(a);
2529          tl_assert(lineoff >= 0
2530                    && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2531          line = find_OCacheLine( a );
2532          line->descr[lineoff+0] = 0;
2533          line->descr[lineoff+1] = 0;
2534       }
2535       //// END inlined, specialised version of MC_(helperc_b_store8)
2536    }
2537 #endif
2538 }
2539 
2540 
2541 /*------------------------------------------------------------*/
2542 /*--- Stack pointer adjustment                             ---*/
2543 /*------------------------------------------------------------*/
2544 
2545 #ifdef PERF_FAST_STACK
2546 #  define MAYBE_USED
2547 #else
2548 #  define MAYBE_USED __attribute__((unused))
2549 #endif
2550 
2551 /*--------------- adjustment by 4 bytes ---------------*/
2552 
2553 MAYBE_USED
mc_new_mem_stack_4_w_ECU(Addr new_SP,UInt ecu)2554 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2555 {
2556    UInt otag = ecu | MC_OKIND_STACK;
2557    PROF_EVENT(110, "new_mem_stack_4");
2558    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2559       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2560    } else {
2561       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2562    }
2563 }
2564 
2565 MAYBE_USED
mc_new_mem_stack_4(Addr new_SP)2566 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2567 {
2568    PROF_EVENT(110, "new_mem_stack_4");
2569    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2570       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2571    } else {
2572       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2573    }
2574 }
2575 
2576 MAYBE_USED
mc_die_mem_stack_4(Addr new_SP)2577 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2578 {
2579    PROF_EVENT(120, "die_mem_stack_4");
2580    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2581       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2582    } else {
2583       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2584    }
2585 }
2586 
2587 /*--------------- adjustment by 8 bytes ---------------*/
2588 
2589 MAYBE_USED
mc_new_mem_stack_8_w_ECU(Addr new_SP,UInt ecu)2590 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2591 {
2592    UInt otag = ecu | MC_OKIND_STACK;
2593    PROF_EVENT(111, "new_mem_stack_8");
2594    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2595       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2596    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2597       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2598       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2599    } else {
2600       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2601    }
2602 }
2603 
2604 MAYBE_USED
mc_new_mem_stack_8(Addr new_SP)2605 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2606 {
2607    PROF_EVENT(111, "new_mem_stack_8");
2608    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2609       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2610    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2611       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2612       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2613    } else {
2614       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2615    }
2616 }
2617 
2618 MAYBE_USED
mc_die_mem_stack_8(Addr new_SP)2619 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2620 {
2621    PROF_EVENT(121, "die_mem_stack_8");
2622    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2623       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2624    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2625       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2626       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2627    } else {
2628       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2629    }
2630 }
2631 
2632 /*--------------- adjustment by 12 bytes ---------------*/
2633 
2634 MAYBE_USED
mc_new_mem_stack_12_w_ECU(Addr new_SP,UInt ecu)2635 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2636 {
2637    UInt otag = ecu | MC_OKIND_STACK;
2638    PROF_EVENT(112, "new_mem_stack_12");
2639    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2640       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2641       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2642    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2643       /* from previous test we don't have 8-alignment at offset +0,
2644          hence must have 8 alignment at offsets +4/-4.  Hence safe to
2645          do 4 at +0 and then 8 at +4/. */
2646       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2647       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2648    } else {
2649       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2650    }
2651 }
2652 
2653 MAYBE_USED
mc_new_mem_stack_12(Addr new_SP)2654 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
2655 {
2656    PROF_EVENT(112, "new_mem_stack_12");
2657    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2658       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2659       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2660    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2661       /* from previous test we don't have 8-alignment at offset +0,
2662          hence must have 8 alignment at offsets +4/-4.  Hence safe to
2663          do 4 at +0 and then 8 at +4/. */
2664       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2665       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2666    } else {
2667       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
2668    }
2669 }
2670 
2671 MAYBE_USED
mc_die_mem_stack_12(Addr new_SP)2672 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
2673 {
2674    PROF_EVENT(122, "die_mem_stack_12");
2675    /* Note the -12 in the test */
2676    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
2677       /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2678          -4. */
2679       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2680       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2681    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2682       /* We have 4-alignment at +0, but we don't have 8-alignment at
2683          -12.  So we must have 8-alignment at -8.  Hence do 4 at -12
2684          and then 8 at -8. */
2685       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2686       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
2687    } else {
2688       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
2689    }
2690 }
2691 
2692 /*--------------- adjustment by 16 bytes ---------------*/
2693 
2694 MAYBE_USED
mc_new_mem_stack_16_w_ECU(Addr new_SP,UInt ecu)2695 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
2696 {
2697    UInt otag = ecu | MC_OKIND_STACK;
2698    PROF_EVENT(113, "new_mem_stack_16");
2699    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2700       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2701       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2702       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2703    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2704       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2705          Hence do 4 at +0, 8 at +4, 4 at +12. */
2706       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2707       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2708       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2709    } else {
2710       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
2711    }
2712 }
2713 
2714 MAYBE_USED
mc_new_mem_stack_16(Addr new_SP)2715 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
2716 {
2717    PROF_EVENT(113, "new_mem_stack_16");
2718    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2719       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2720       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2721       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2722    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2723       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2724          Hence do 4 at +0, 8 at +4, 4 at +12. */
2725       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2726       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4  );
2727       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2728    } else {
2729       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
2730    }
2731 }
2732 
2733 MAYBE_USED
mc_die_mem_stack_16(Addr new_SP)2734 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
2735 {
2736    PROF_EVENT(123, "die_mem_stack_16");
2737    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2738       /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
2739       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2740       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
2741    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2742       /* 8 alignment must be at -12.  Do 4 at -16, 8 at -12, 4 at -4. */
2743       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2744       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2745       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2746    } else {
2747       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
2748    }
2749 }
2750 
2751 /*--------------- adjustment by 32 bytes ---------------*/
2752 
2753 MAYBE_USED
mc_new_mem_stack_32_w_ECU(Addr new_SP,UInt ecu)2754 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
2755 {
2756    UInt otag = ecu | MC_OKIND_STACK;
2757    PROF_EVENT(114, "new_mem_stack_32");
2758    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2759       /* Straightforward */
2760       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2761       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2762       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2763       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2764    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2765       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
2766          +0,+28. */
2767       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2768       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2769       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2770       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
2771       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
2772    } else {
2773       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
2774    }
2775 }
2776 
2777 MAYBE_USED
mc_new_mem_stack_32(Addr new_SP)2778 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
2779 {
2780    PROF_EVENT(114, "new_mem_stack_32");
2781    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2782       /* Straightforward */
2783       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2784       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2785       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2786       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2787    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2788       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
2789          +0,+28. */
2790       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2791       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2792       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2793       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
2794       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
2795    } else {
2796       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
2797    }
2798 }
2799 
2800 MAYBE_USED
mc_die_mem_stack_32(Addr new_SP)2801 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
2802 {
2803    PROF_EVENT(124, "die_mem_stack_32");
2804    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2805       /* Straightforward */
2806       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2807       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2808       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2809       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2810    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2811       /* 8 alignment must be at -4 etc.  Hence do 8 at -12,-20,-28 and
2812          4 at -32,-4. */
2813       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2814       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
2815       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
2816       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2817       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2818    } else {
2819       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
2820    }
2821 }
2822 
2823 /*--------------- adjustment by 112 bytes ---------------*/
2824 
2825 MAYBE_USED
mc_new_mem_stack_112_w_ECU(Addr new_SP,UInt ecu)2826 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
2827 {
2828    UInt otag = ecu | MC_OKIND_STACK;
2829    PROF_EVENT(115, "new_mem_stack_112");
2830    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2831       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2832       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2833       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2834       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2835       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
2836       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
2837       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
2838       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
2839       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
2840       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
2841       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
2842       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
2843       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
2844       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
2845    } else {
2846       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
2847    }
2848 }
2849 
2850 MAYBE_USED
mc_new_mem_stack_112(Addr new_SP)2851 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
2852 {
2853    PROF_EVENT(115, "new_mem_stack_112");
2854    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2855       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2856       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2857       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2858       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2859       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
2860       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
2861       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
2862       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
2863       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
2864       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
2865       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
2866       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
2867       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
2868       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
2869    } else {
2870       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
2871    }
2872 }
2873 
2874 MAYBE_USED
mc_die_mem_stack_112(Addr new_SP)2875 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
2876 {
2877    PROF_EVENT(125, "die_mem_stack_112");
2878    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2879       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
2880       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
2881       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
2882       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
2883       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
2884       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
2885       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
2886       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
2887       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
2888       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
2889       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2890       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2891       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2892       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2893    } else {
2894       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
2895    }
2896 }
2897 
2898 /*--------------- adjustment by 128 bytes ---------------*/
2899 
2900 MAYBE_USED
mc_new_mem_stack_128_w_ECU(Addr new_SP,UInt ecu)2901 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
2902 {
2903    UInt otag = ecu | MC_OKIND_STACK;
2904    PROF_EVENT(116, "new_mem_stack_128");
2905    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2906       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2907       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2908       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2909       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2910       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
2911       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
2912       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
2913       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
2914       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
2915       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
2916       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
2917       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
2918       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
2919       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
2920       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
2921       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
2922    } else {
2923       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
2924    }
2925 }
2926 
2927 MAYBE_USED
mc_new_mem_stack_128(Addr new_SP)2928 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
2929 {
2930    PROF_EVENT(116, "new_mem_stack_128");
2931    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2932       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2933       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2934       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2935       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2936       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
2937       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
2938       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
2939       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
2940       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
2941       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
2942       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
2943       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
2944       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
2945       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
2946       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
2947       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
2948    } else {
2949       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
2950    }
2951 }
2952 
2953 MAYBE_USED
mc_die_mem_stack_128(Addr new_SP)2954 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
2955 {
2956    PROF_EVENT(126, "die_mem_stack_128");
2957    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2958       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
2959       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
2960       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
2961       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
2962       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
2963       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
2964       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
2965       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
2966       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
2967       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
2968       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
2969       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
2970       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2971       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2972       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2973       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2974    } else {
2975       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
2976    }
2977 }
2978 
2979 /*--------------- adjustment by 144 bytes ---------------*/
2980 
2981 MAYBE_USED
mc_new_mem_stack_144_w_ECU(Addr new_SP,UInt ecu)2982 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
2983 {
2984    UInt otag = ecu | MC_OKIND_STACK;
2985    PROF_EVENT(117, "new_mem_stack_144");
2986    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2987       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
2988       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
2989       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
2990       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
2991       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
2992       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
2993       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
2994       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
2995       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
2996       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
2997       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
2998       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
2999       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
3000       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3001       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3002       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3003       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3004       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3005    } else {
3006       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3007    }
3008 }
3009 
3010 MAYBE_USED
mc_new_mem_stack_144(Addr new_SP)3011 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3012 {
3013    PROF_EVENT(117, "new_mem_stack_144");
3014    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3015       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3016       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3017       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3018       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3019       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3020       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3021       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3022       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3023       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3024       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3025       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3026       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3027       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3028       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3029       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3030       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3031       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3032       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3033    } else {
3034       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3035    }
3036 }
3037 
3038 MAYBE_USED
mc_die_mem_stack_144(Addr new_SP)3039 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3040 {
3041    PROF_EVENT(127, "die_mem_stack_144");
3042    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3043       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3044       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3045       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3046       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3047       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3048       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3049       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3050       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3051       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3052       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3053       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3054       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3055       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3056       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3057       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3058       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3059       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3060       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3061    } else {
3062       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3063    }
3064 }
3065 
3066 /*--------------- adjustment by 160 bytes ---------------*/
3067 
3068 MAYBE_USED
mc_new_mem_stack_160_w_ECU(Addr new_SP,UInt ecu)3069 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3070 {
3071    UInt otag = ecu | MC_OKIND_STACK;
3072    PROF_EVENT(118, "new_mem_stack_160");
3073    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3074       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
3075       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
3076       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
3077       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
3078       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
3079       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
3080       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
3081       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
3082       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
3083       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
3084       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
3085       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
3086       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
3087       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3088       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3089       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3090       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3091       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3092       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3093       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3094    } else {
3095       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3096    }
3097 }
3098 
3099 MAYBE_USED
mc_new_mem_stack_160(Addr new_SP)3100 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3101 {
3102    PROF_EVENT(118, "new_mem_stack_160");
3103    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3104       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3105       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3106       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3107       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3108       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3109       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3110       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3111       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3112       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3113       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3114       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3115       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3116       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3117       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3118       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3119       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3120       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3121       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3122       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3123       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3124    } else {
3125       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3126    }
3127 }
3128 
3129 MAYBE_USED
mc_die_mem_stack_160(Addr new_SP)3130 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3131 {
3132    PROF_EVENT(128, "die_mem_stack_160");
3133    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3134       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3135       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3136       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3137       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3138       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3139       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3140       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3141       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3142       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3143       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3144       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3145       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3146       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3147       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3148       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3149       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3150       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3151       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3152       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3153       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3154    } else {
3155       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3156    }
3157 }
3158 
3159 /*--------------- adjustment by N bytes ---------------*/
3160 
mc_new_mem_stack_w_ECU(Addr a,SizeT len,UInt ecu)3161 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3162 {
3163    UInt otag = ecu | MC_OKIND_STACK;
3164    PROF_EVENT(115, "new_mem_stack_w_otag");
3165    MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3166 }
3167 
mc_new_mem_stack(Addr a,SizeT len)3168 static void mc_new_mem_stack ( Addr a, SizeT len )
3169 {
3170    PROF_EVENT(115, "new_mem_stack");
3171    make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3172 }
3173 
mc_die_mem_stack(Addr a,SizeT len)3174 static void mc_die_mem_stack ( Addr a, SizeT len )
3175 {
3176    PROF_EVENT(125, "die_mem_stack");
3177    MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3178 }
3179 
3180 
3181 /* The AMD64 ABI says:
3182 
3183    "The 128-byte area beyond the location pointed to by %rsp is considered
3184     to be reserved and shall not be modified by signal or interrupt
3185     handlers.  Therefore, functions may use this area for temporary data
3186     that is not needed across function calls.  In particular, leaf functions
3187     may use this area for their entire stack frame, rather than adjusting
3188     the stack pointer in the prologue and epilogue.  This area is known as
3189     red zone [sic]."
3190 
3191    So after any call or return we need to mark this redzone as containing
3192    undefined values.
3193 
3194    Consider this:  we're in function f.  f calls g.  g moves rsp down
3195    modestly (say 16 bytes) and writes stuff all over the red zone, making it
3196    defined.  g returns.  f is buggy and reads from parts of the red zone
3197    that it didn't write on.  But because g filled that area in, f is going
3198    to be picking up defined V bits and so any errors from reading bits of
3199    the red zone it didn't write, will be missed.  The only solution I could
3200    think of was to make the red zone undefined when g returns to f.
3201 
3202    This is in accordance with the ABI, which makes it clear the redzone
3203    is volatile across function calls.
3204 
3205    The problem occurs the other way round too: f could fill the RZ up
3206    with defined values and g could mistakenly read them.  So the RZ
3207    also needs to be nuked on function calls.
3208 */
3209 
3210 
3211 /* Here's a simple cache to hold nia -> ECU mappings.  It could be
3212    improved so as to have a lower miss rate. */
3213 
3214 static UWord stats__nia_cache_queries = 0;
3215 static UWord stats__nia_cache_misses  = 0;
3216 
3217 typedef
3218    struct { UWord nia0; UWord ecu0;   /* nia0 maps to ecu0 */
3219             UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3220    WCacheEnt;
3221 
3222 #define N_NIA_TO_ECU_CACHE 511
3223 
3224 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3225 
init_nia_to_ecu_cache(void)3226 static void init_nia_to_ecu_cache ( void )
3227 {
3228    UWord       i;
3229    Addr        zero_addr = 0;
3230    ExeContext* zero_ec;
3231    UInt        zero_ecu;
3232    /* Fill all the slots with an entry for address zero, and the
3233       relevant otags accordingly.  Hence the cache is initially filled
3234       with valid data. */
3235    zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3236    tl_assert(zero_ec);
3237    zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3238    tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3239    for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3240       nia_to_ecu_cache[i].nia0 = zero_addr;
3241       nia_to_ecu_cache[i].ecu0 = zero_ecu;
3242       nia_to_ecu_cache[i].nia1 = zero_addr;
3243       nia_to_ecu_cache[i].ecu1 = zero_ecu;
3244    }
3245 }
3246 
convert_nia_to_ecu(Addr nia)3247 static inline UInt convert_nia_to_ecu ( Addr nia )
3248 {
3249    UWord i;
3250    UInt        ecu;
3251    ExeContext* ec;
3252 
3253    tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3254 
3255    stats__nia_cache_queries++;
3256    i = nia % N_NIA_TO_ECU_CACHE;
3257    tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3258 
3259    if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3260       return nia_to_ecu_cache[i].ecu0;
3261 
3262    if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3263 #     define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3264       SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3265       SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3266 #     undef SWAP
3267       return nia_to_ecu_cache[i].ecu0;
3268    }
3269 
3270    stats__nia_cache_misses++;
3271    ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3272    tl_assert(ec);
3273    ecu = VG_(get_ECU_from_ExeContext)(ec);
3274    tl_assert(VG_(is_plausible_ECU)(ecu));
3275 
3276    nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3277    nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3278 
3279    nia_to_ecu_cache[i].nia0 = nia;
3280    nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3281    return ecu;
3282 }
3283 
3284 
3285 /* Note that this serves both the origin-tracking and
3286    no-origin-tracking modes.  We assume that calls to it are
3287    sufficiently infrequent that it isn't worth specialising for the
3288    with/without origin-tracking cases. */
MC_(helperc_MAKE_STACK_UNINIT)3289 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
3290 {
3291    UInt otag;
3292    tl_assert(sizeof(UWord) == sizeof(SizeT));
3293    if (0)
3294       VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
3295                   base, len, nia );
3296 
3297    if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3298       UInt ecu = convert_nia_to_ecu ( nia );
3299       tl_assert(VG_(is_plausible_ECU)(ecu));
3300       otag = ecu | MC_OKIND_STACK;
3301    } else {
3302       tl_assert(nia == 0);
3303       otag = 0;
3304    }
3305 
3306 #  if 0
3307    /* Really slow version */
3308    MC_(make_mem_undefined)(base, len, otag);
3309 #  endif
3310 
3311 #  if 0
3312    /* Slow(ish) version, which is fairly easily seen to be correct.
3313    */
3314    if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3315       make_aligned_word64_undefined(base +   0, otag);
3316       make_aligned_word64_undefined(base +   8, otag);
3317       make_aligned_word64_undefined(base +  16, otag);
3318       make_aligned_word64_undefined(base +  24, otag);
3319 
3320       make_aligned_word64_undefined(base +  32, otag);
3321       make_aligned_word64_undefined(base +  40, otag);
3322       make_aligned_word64_undefined(base +  48, otag);
3323       make_aligned_word64_undefined(base +  56, otag);
3324 
3325       make_aligned_word64_undefined(base +  64, otag);
3326       make_aligned_word64_undefined(base +  72, otag);
3327       make_aligned_word64_undefined(base +  80, otag);
3328       make_aligned_word64_undefined(base +  88, otag);
3329 
3330       make_aligned_word64_undefined(base +  96, otag);
3331       make_aligned_word64_undefined(base + 104, otag);
3332       make_aligned_word64_undefined(base + 112, otag);
3333       make_aligned_word64_undefined(base + 120, otag);
3334    } else {
3335       MC_(make_mem_undefined)(base, len, otag);
3336    }
3337 #  endif
3338 
3339    /* Idea is: go fast when
3340          * 8-aligned and length is 128
3341          * the sm is available in the main primary map
3342          * the address range falls entirely with a single secondary map
3343       If all those conditions hold, just update the V+A bits by writing
3344       directly into the vabits array.  (If the sm was distinguished, this
3345       will make a copy and then write to it.)
3346    */
3347 
3348    if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3349       /* Now we know the address range is suitably sized and aligned. */
3350       UWord a_lo = (UWord)(base);
3351       UWord a_hi = (UWord)(base + 128 - 1);
3352       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3353       if (a_hi <= MAX_PRIMARY_ADDRESS) {
3354          // Now we know the entire range is within the main primary map.
3355          SecMap* sm    = get_secmap_for_writing_low(a_lo);
3356          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3357          /* Now we know that the entire address range falls within a
3358             single secondary map, and that that secondary 'lives' in
3359             the main primary map. */
3360          if (LIKELY(sm == sm_hi)) {
3361             // Finally, we know that the range is entirely within one secmap.
3362             UWord   v_off = SM_OFF(a_lo);
3363             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
3364             p[ 0] = VA_BITS16_UNDEFINED;
3365             p[ 1] = VA_BITS16_UNDEFINED;
3366             p[ 2] = VA_BITS16_UNDEFINED;
3367             p[ 3] = VA_BITS16_UNDEFINED;
3368             p[ 4] = VA_BITS16_UNDEFINED;
3369             p[ 5] = VA_BITS16_UNDEFINED;
3370             p[ 6] = VA_BITS16_UNDEFINED;
3371             p[ 7] = VA_BITS16_UNDEFINED;
3372             p[ 8] = VA_BITS16_UNDEFINED;
3373             p[ 9] = VA_BITS16_UNDEFINED;
3374             p[10] = VA_BITS16_UNDEFINED;
3375             p[11] = VA_BITS16_UNDEFINED;
3376             p[12] = VA_BITS16_UNDEFINED;
3377             p[13] = VA_BITS16_UNDEFINED;
3378             p[14] = VA_BITS16_UNDEFINED;
3379             p[15] = VA_BITS16_UNDEFINED;
3380             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3381                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3382                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3383                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3384                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3385                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3386                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3387                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3388                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3389                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3390                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3391                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3392                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3393                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3394                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3395                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3396                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3397             }
3398             return;
3399          }
3400       }
3401    }
3402 
3403    /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3404    if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3405       /* Now we know the address range is suitably sized and aligned. */
3406       UWord a_lo = (UWord)(base);
3407       UWord a_hi = (UWord)(base + 288 - 1);
3408       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3409       if (a_hi <= MAX_PRIMARY_ADDRESS) {
3410          // Now we know the entire range is within the main primary map.
3411          SecMap* sm    = get_secmap_for_writing_low(a_lo);
3412          SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3413          /* Now we know that the entire address range falls within a
3414             single secondary map, and that that secondary 'lives' in
3415             the main primary map. */
3416          if (LIKELY(sm == sm_hi)) {
3417             // Finally, we know that the range is entirely within one secmap.
3418             UWord   v_off = SM_OFF(a_lo);
3419             UShort* p     = (UShort*)(&sm->vabits8[v_off]);
3420             p[ 0] = VA_BITS16_UNDEFINED;
3421             p[ 1] = VA_BITS16_UNDEFINED;
3422             p[ 2] = VA_BITS16_UNDEFINED;
3423             p[ 3] = VA_BITS16_UNDEFINED;
3424             p[ 4] = VA_BITS16_UNDEFINED;
3425             p[ 5] = VA_BITS16_UNDEFINED;
3426             p[ 6] = VA_BITS16_UNDEFINED;
3427             p[ 7] = VA_BITS16_UNDEFINED;
3428             p[ 8] = VA_BITS16_UNDEFINED;
3429             p[ 9] = VA_BITS16_UNDEFINED;
3430             p[10] = VA_BITS16_UNDEFINED;
3431             p[11] = VA_BITS16_UNDEFINED;
3432             p[12] = VA_BITS16_UNDEFINED;
3433             p[13] = VA_BITS16_UNDEFINED;
3434             p[14] = VA_BITS16_UNDEFINED;
3435             p[15] = VA_BITS16_UNDEFINED;
3436             p[16] = VA_BITS16_UNDEFINED;
3437             p[17] = VA_BITS16_UNDEFINED;
3438             p[18] = VA_BITS16_UNDEFINED;
3439             p[19] = VA_BITS16_UNDEFINED;
3440             p[20] = VA_BITS16_UNDEFINED;
3441             p[21] = VA_BITS16_UNDEFINED;
3442             p[22] = VA_BITS16_UNDEFINED;
3443             p[23] = VA_BITS16_UNDEFINED;
3444             p[24] = VA_BITS16_UNDEFINED;
3445             p[25] = VA_BITS16_UNDEFINED;
3446             p[26] = VA_BITS16_UNDEFINED;
3447             p[27] = VA_BITS16_UNDEFINED;
3448             p[28] = VA_BITS16_UNDEFINED;
3449             p[29] = VA_BITS16_UNDEFINED;
3450             p[30] = VA_BITS16_UNDEFINED;
3451             p[31] = VA_BITS16_UNDEFINED;
3452             p[32] = VA_BITS16_UNDEFINED;
3453             p[33] = VA_BITS16_UNDEFINED;
3454             p[34] = VA_BITS16_UNDEFINED;
3455             p[35] = VA_BITS16_UNDEFINED;
3456             if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3457                set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3458                set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3459                set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3460                set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3461                set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3462                set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3463                set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3464                set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3465                set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3466                set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3467                set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3468                set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3469                set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3470                set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3471                set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3472                set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3473                set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3474                set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3475                set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3476                set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3477                set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3478                set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3479                set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3480                set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3481                set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3482                set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3483                set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3484                set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3485                set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3486                set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3487                set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3488                set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3489                set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3490                set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3491                set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3492                set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3493             }
3494             return;
3495          }
3496       }
3497    }
3498 
3499    /* else fall into slow case */
3500    MC_(make_mem_undefined_w_otag)(base, len, otag);
3501 }
3502 
3503 
3504 /*------------------------------------------------------------*/
3505 /*--- Checking memory                                      ---*/
3506 /*------------------------------------------------------------*/
3507 
3508 typedef
3509    enum {
3510       MC_Ok = 5,
3511       MC_AddrErr = 6,
3512       MC_ValueErr = 7
3513    }
3514    MC_ReadResult;
3515 
3516 
3517 /* Check permissions for address range.  If inadequate permissions
3518    exist, *bad_addr is set to the offending address, so the caller can
3519    know what it is. */
3520 
3521 /* Returns True if [a .. a+len) is not addressible.  Otherwise,
3522    returns False, and if bad_addr is non-NULL, sets *bad_addr to
3523    indicate the lowest failing address.  Functions below are
3524    similar. */
MC_(check_mem_is_noaccess)3525 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
3526 {
3527    SizeT i;
3528    UWord vabits2;
3529 
3530    PROF_EVENT(60, "check_mem_is_noaccess");
3531    for (i = 0; i < len; i++) {
3532       PROF_EVENT(61, "check_mem_is_noaccess(loop)");
3533       vabits2 = get_vabits2(a);
3534       if (VA_BITS2_NOACCESS != vabits2) {
3535          if (bad_addr != NULL) *bad_addr = a;
3536          return False;
3537       }
3538       a++;
3539    }
3540    return True;
3541 }
3542 
is_mem_addressable(Addr a,SizeT len,Addr * bad_addr)3543 static Bool is_mem_addressable ( Addr a, SizeT len,
3544                                  /*OUT*/Addr* bad_addr )
3545 {
3546    SizeT i;
3547    UWord vabits2;
3548 
3549    PROF_EVENT(62, "is_mem_addressable");
3550    for (i = 0; i < len; i++) {
3551       PROF_EVENT(63, "is_mem_addressable(loop)");
3552       vabits2 = get_vabits2(a);
3553       if (VA_BITS2_NOACCESS == vabits2) {
3554          if (bad_addr != NULL) *bad_addr = a;
3555          return False;
3556       }
3557       a++;
3558    }
3559    return True;
3560 }
3561 
is_mem_defined(Addr a,SizeT len,Addr * bad_addr,UInt * otag)3562 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
3563                                       /*OUT*/Addr* bad_addr,
3564                                       /*OUT*/UInt* otag )
3565 {
3566    SizeT i;
3567    UWord vabits2;
3568 
3569    PROF_EVENT(64, "is_mem_defined");
3570    DEBUG("is_mem_defined\n");
3571 
3572    if (otag)     *otag = 0;
3573    if (bad_addr) *bad_addr = 0;
3574    for (i = 0; i < len; i++) {
3575       PROF_EVENT(65, "is_mem_defined(loop)");
3576       vabits2 = get_vabits2(a);
3577       if (VA_BITS2_DEFINED != vabits2) {
3578          // Error!  Nb: Report addressability errors in preference to
3579          // definedness errors.  And don't report definedeness errors unless
3580          // --undef-value-errors=yes.
3581          if (bad_addr) {
3582             *bad_addr = a;
3583          }
3584          if (VA_BITS2_NOACCESS == vabits2) {
3585             return MC_AddrErr;
3586          }
3587          if (MC_(clo_mc_level) >= 2) {
3588             if (otag && MC_(clo_mc_level) == 3) {
3589                *otag = MC_(helperc_b_load1)( a );
3590             }
3591             return MC_ValueErr;
3592          }
3593       }
3594       a++;
3595    }
3596    return MC_Ok;
3597 }
3598 
3599 
3600 /* Check a zero-terminated ascii string.  Tricky -- don't want to
3601    examine the actual bytes, to find the end, until we're sure it is
3602    safe to do so. */
3603 
mc_is_defined_asciiz(Addr a,Addr * bad_addr,UInt * otag)3604 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
3605 {
3606    UWord vabits2;
3607 
3608    PROF_EVENT(66, "mc_is_defined_asciiz");
3609    DEBUG("mc_is_defined_asciiz\n");
3610 
3611    if (otag)     *otag = 0;
3612    if (bad_addr) *bad_addr = 0;
3613    while (True) {
3614       PROF_EVENT(67, "mc_is_defined_asciiz(loop)");
3615       vabits2 = get_vabits2(a);
3616       if (VA_BITS2_DEFINED != vabits2) {
3617          // Error!  Nb: Report addressability errors in preference to
3618          // definedness errors.  And don't report definedeness errors unless
3619          // --undef-value-errors=yes.
3620          if (bad_addr) {
3621             *bad_addr = a;
3622          }
3623          if (VA_BITS2_NOACCESS == vabits2) {
3624             return MC_AddrErr;
3625          }
3626          if (MC_(clo_mc_level) >= 2) {
3627             if (otag && MC_(clo_mc_level) == 3) {
3628                *otag = MC_(helperc_b_load1)( a );
3629             }
3630             return MC_ValueErr;
3631          }
3632       }
3633       /* Ok, a is safe to read. */
3634       if (* ((UChar*)a) == 0) {
3635          return MC_Ok;
3636       }
3637       a++;
3638    }
3639 }
3640 
3641 
3642 /*------------------------------------------------------------*/
3643 /*--- Memory event handlers                                ---*/
3644 /*------------------------------------------------------------*/
3645 
3646 static
check_mem_is_addressable(CorePart part,ThreadId tid,Char * s,Addr base,SizeT size)3647 void check_mem_is_addressable ( CorePart part, ThreadId tid, Char* s,
3648                                 Addr base, SizeT size )
3649 {
3650    Addr bad_addr;
3651    Bool ok = is_mem_addressable ( base, size, &bad_addr );
3652 
3653    if (!ok) {
3654       switch (part) {
3655       case Vg_CoreSysCall:
3656          MC_(record_memparam_error) ( tid, bad_addr,
3657                                       /*isAddrErr*/True, s, 0/*otag*/ );
3658          break;
3659 
3660       case Vg_CoreSignal:
3661          MC_(record_core_mem_error)( tid, s );
3662          break;
3663 
3664       default:
3665          VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
3666       }
3667    }
3668 }
3669 
3670 static
check_mem_is_defined(CorePart part,ThreadId tid,Char * s,Addr base,SizeT size)3671 void check_mem_is_defined ( CorePart part, ThreadId tid, Char* s,
3672                             Addr base, SizeT size )
3673 {
3674    UInt otag = 0;
3675    Addr bad_addr;
3676    MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
3677 
3678    if (MC_Ok != res) {
3679       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3680 
3681       switch (part) {
3682       case Vg_CoreSysCall:
3683          MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3684                                       isAddrErr ? 0 : otag );
3685          break;
3686 
3687       case Vg_CoreSysCallArgInMem:
3688          MC_(record_regparam_error) ( tid, s, otag );
3689          break;
3690 
3691       /* If we're being asked to jump to a silly address, record an error
3692          message before potentially crashing the entire system. */
3693       case Vg_CoreTranslate:
3694          MC_(record_jump_error)( tid, bad_addr );
3695          break;
3696 
3697       default:
3698          VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
3699       }
3700    }
3701 }
3702 
3703 static
check_mem_is_defined_asciiz(CorePart part,ThreadId tid,Char * s,Addr str)3704 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
3705                                    Char* s, Addr str )
3706 {
3707    MC_ReadResult res;
3708    Addr bad_addr = 0;   // shut GCC up
3709    UInt otag = 0;
3710 
3711    tl_assert(part == Vg_CoreSysCall);
3712    res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
3713    if (MC_Ok != res) {
3714       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3715       MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3716                                    isAddrErr ? 0 : otag );
3717    }
3718 }
3719 
3720 /* Handling of mmap and mprotect is not as simple as it seems.
3721 
3722    The underlying semantics are that memory obtained from mmap is
3723    always initialised, but may be inaccessible.  And changes to the
3724    protection of memory do not change its contents and hence not its
3725    definedness state.  Problem is we can't model
3726    inaccessible-but-with-some-definedness state; once we mark memory
3727    as inaccessible we lose all info about definedness, and so can't
3728    restore that if it is later made accessible again.
3729 
3730    One obvious thing to do is this:
3731 
3732       mmap/mprotect NONE  -> noaccess
3733       mmap/mprotect other -> defined
3734 
3735    The problem case here is: taking accessible memory, writing
3736    uninitialised data to it, mprotecting it NONE and later mprotecting
3737    it back to some accessible state causes the undefinedness to be
3738    lost.
3739 
3740    A better proposal is:
3741 
3742      (1) mmap NONE       ->  make noaccess
3743      (2) mmap other      ->  make defined
3744 
3745      (3) mprotect NONE   ->  # no change
3746      (4) mprotect other  ->  change any "noaccess" to "defined"
3747 
3748    (2) is OK because memory newly obtained from mmap really is defined
3749        (zeroed out by the kernel -- doing anything else would
3750        constitute a massive security hole.)
3751 
3752    (1) is OK because the only way to make the memory usable is via
3753        (4), in which case we also wind up correctly marking it all as
3754        defined.
3755 
3756    (3) is the weak case.  We choose not to change memory state.
3757        (presumably the range is in some mixture of "defined" and
3758        "undefined", viz, accessible but with arbitrary V bits).  Doing
3759        nothing means we retain the V bits, so that if the memory is
3760        later mprotected "other", the V bits remain unchanged, so there
3761        can be no false negatives.  The bad effect is that if there's
3762        an access in the area, then MC cannot warn; but at least we'll
3763        get a SEGV to show, so it's better than nothing.
3764 
3765    Consider the sequence (3) followed by (4).  Any memory that was
3766    "defined" or "undefined" previously retains its state (as
3767    required).  Any memory that was "noaccess" before can only have
3768    been made that way by (1), and so it's OK to change it to
3769    "defined".
3770 
3771    See https://bugs.kde.org/show_bug.cgi?id=205541
3772    and https://bugs.kde.org/show_bug.cgi?id=210268
3773 */
3774 static
mc_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)3775 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
3776                        ULong di_handle )
3777 {
3778    if (rr || ww || xx) {
3779       /* (2) mmap/mprotect other -> defined */
3780       MC_(make_mem_defined)(a, len);
3781    } else {
3782       /* (1) mmap/mprotect NONE  -> noaccess */
3783       MC_(make_mem_noaccess)(a, len);
3784    }
3785 }
3786 
3787 static
mc_new_mem_mprotect(Addr a,SizeT len,Bool rr,Bool ww,Bool xx)3788 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
3789 {
3790    if (rr || ww || xx) {
3791       /* (4) mprotect other  ->  change any "noaccess" to "defined" */
3792       make_mem_defined_if_noaccess(a, len);
3793    } else {
3794       /* (3) mprotect NONE   ->  # no change */
3795       /* do nothing */
3796    }
3797 }
3798 
3799 
3800 static
mc_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)3801 void mc_new_mem_startup( Addr a, SizeT len,
3802                          Bool rr, Bool ww, Bool xx, ULong di_handle )
3803 {
3804    // Because code is defined, initialised variables get put in the data
3805    // segment and are defined, and uninitialised variables get put in the
3806    // bss segment and are auto-zeroed (and so defined).
3807    //
3808    // It's possible that there will be padding between global variables.
3809    // This will also be auto-zeroed, and marked as defined by Memcheck.  If
3810    // a program uses it, Memcheck will not complain.  This is arguably a
3811    // false negative, but it's a grey area -- the behaviour is defined (the
3812    // padding is zeroed) but it's probably not what the user intended.  And
3813    // we can't avoid it.
3814    //
3815    // Note: we generally ignore RWX permissions, because we can't track them
3816    // without requiring more than one A bit which would slow things down a
3817    // lot.  But on Darwin the 0th page is mapped but !R and !W and !X.
3818    // So we mark any such pages as "unaddressable".
3819    DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
3820          a, (ULong)len, rr, ww, xx);
3821    mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
3822 }
3823 
3824 static
mc_post_mem_write(CorePart part,ThreadId tid,Addr a,SizeT len)3825 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
3826 {
3827    MC_(make_mem_defined)(a, len);
3828 }
3829 
3830 
3831 /*------------------------------------------------------------*/
3832 /*--- Register event handlers                              ---*/
3833 /*------------------------------------------------------------*/
3834 
3835 /* Try and get a nonzero origin for the guest state section of thread
3836    tid characterised by (offset,size).  Return 0 if nothing to show
3837    for it. */
mb_get_origin_for_guest_offset(ThreadId tid,Int offset,SizeT size)3838 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
3839                                              Int offset, SizeT size )
3840 {
3841    Int   sh2off;
3842    UChar area[6];
3843    UInt  otag;
3844    sh2off = MC_(get_otrack_shadow_offset)( offset, size );
3845    if (sh2off == -1)
3846       return 0;  /* This piece of guest state is not tracked */
3847    tl_assert(sh2off >= 0);
3848    tl_assert(0 == (sh2off % 4));
3849    area[0] = 0x31;
3850    area[5] = 0x27;
3851    VG_(get_shadow_regs_area)( tid, &area[1], 2/*shadowno*/,sh2off,4 );
3852    tl_assert(area[0] == 0x31);
3853    tl_assert(area[5] == 0x27);
3854    otag = *(UInt*)&area[1];
3855    return otag;
3856 }
3857 
3858 
3859 /* When some chunk of guest state is written, mark the corresponding
3860    shadow area as valid.  This is used to initialise arbitrarily large
3861    chunks of guest state, hence the _SIZE value, which has to be as
3862    big as the biggest guest state.
3863 */
mc_post_reg_write(CorePart part,ThreadId tid,PtrdiffT offset,SizeT size)3864 static void mc_post_reg_write ( CorePart part, ThreadId tid,
3865                                 PtrdiffT offset, SizeT size)
3866 {
3867 #  define MAX_REG_WRITE_SIZE 1408
3868    UChar area[MAX_REG_WRITE_SIZE];
3869    tl_assert(size <= MAX_REG_WRITE_SIZE);
3870    VG_(memset)(area, V_BITS8_DEFINED, size);
3871    VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
3872 #  undef MAX_REG_WRITE_SIZE
3873 }
3874 
3875 static
mc_post_reg_write_clientcall(ThreadId tid,PtrdiffT offset,SizeT size,Addr f)3876 void mc_post_reg_write_clientcall ( ThreadId tid,
3877                                     PtrdiffT offset, SizeT size, Addr f)
3878 {
3879    mc_post_reg_write(/*dummy*/0, tid, offset, size);
3880 }
3881 
3882 /* Look at the definedness of the guest's shadow state for
3883    [offset, offset+len).  If any part of that is undefined, record
3884    a parameter error.
3885 */
mc_pre_reg_read(CorePart part,ThreadId tid,Char * s,PtrdiffT offset,SizeT size)3886 static void mc_pre_reg_read ( CorePart part, ThreadId tid, Char* s,
3887                               PtrdiffT offset, SizeT size)
3888 {
3889    Int   i;
3890    Bool  bad;
3891    UInt  otag;
3892 
3893    UChar area[16];
3894    tl_assert(size <= 16);
3895 
3896    VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
3897 
3898    bad = False;
3899    for (i = 0; i < size; i++) {
3900       if (area[i] != V_BITS8_DEFINED) {
3901          bad = True;
3902          break;
3903       }
3904    }
3905 
3906    if (!bad)
3907       return;
3908 
3909    /* We've found some undefinedness.  See if we can also find an
3910       origin for it. */
3911    otag = mb_get_origin_for_guest_offset( tid, offset, size );
3912    MC_(record_regparam_error) ( tid, s, otag );
3913 }
3914 
3915 
3916 /*------------------------------------------------------------*/
3917 /*--- Functions called directly from generated code:       ---*/
3918 /*--- Load/store handlers.                                 ---*/
3919 /*------------------------------------------------------------*/
3920 
3921 /* Types:  LOADV32, LOADV16, LOADV8 are:
3922                UWord fn ( Addr a )
3923    so they return 32-bits on 32-bit machines and 64-bits on
3924    64-bit machines.  Addr has the same size as a host word.
3925 
3926    LOADV64 is always  ULong fn ( Addr a )
3927 
3928    Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
3929    are a UWord, and for STOREV64 they are a ULong.
3930 */
3931 
3932 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
3933    naturally '_sz/8'-aligned, or it exceeds the range covered by the
3934    primary map.  This is all very tricky (and important!), so let's
3935    work through the maths by hand (below), *and* assert for these
3936    values at startup. */
3937 #define MASK(_szInBytes) \
3938    ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
3939 
3940 /* MASK only exists so as to define this macro. */
3941 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
3942    ((_a) & MASK((_szInBits>>3)))
3943 
3944 /* On a 32-bit machine:
3945 
3946    N_PRIMARY_BITS          == 16, so
3947    N_PRIMARY_MAP           == 0x10000, so
3948    N_PRIMARY_MAP-1         == 0xFFFF, so
3949    (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
3950 
3951    MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
3952            = ~ ( 0xFFFF | 0xFFFF0000 )
3953            = ~ 0xFFFF'FFFF
3954            = 0
3955 
3956    MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
3957            = ~ ( 0xFFFE | 0xFFFF0000 )
3958            = ~ 0xFFFF'FFFE
3959            = 1
3960 
3961    MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
3962            = ~ ( 0xFFFC | 0xFFFF0000 )
3963            = ~ 0xFFFF'FFFC
3964            = 3
3965 
3966    MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
3967            = ~ ( 0xFFF8 | 0xFFFF0000 )
3968            = ~ 0xFFFF'FFF8
3969            = 7
3970 
3971    Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
3972    precisely when a is not 1/2/4/8-bytes aligned.  And obviously, for
3973    the 1-byte alignment case, it is always a zero value, since MASK(1)
3974    is zero.  All as expected.
3975 
3976    On a 64-bit machine, it's more complex, since we're testing
3977    simultaneously for misalignment and for the address being at or
3978    above 32G:
3979 
3980    N_PRIMARY_BITS          == 19, so
3981    N_PRIMARY_MAP           == 0x80000, so
3982    N_PRIMARY_MAP-1         == 0x7FFFF, so
3983    (N_PRIMARY_MAP-1) << 16 == 0x7FFFF'0000, and so
3984 
3985    MASK(1) = ~ ( (0x10000 - 1) | 0x7FFFF'0000 )
3986            = ~ ( 0xFFFF | 0x7FFFF'0000 )
3987            = ~ 0x7FFFF'FFFF
3988            = 0xFFFF'FFF8'0000'0000
3989 
3990    MASK(2) = ~ ( (0x10000 - 2) | 0x7FFFF'0000 )
3991            = ~ ( 0xFFFE | 0x7FFFF'0000 )
3992            = ~ 0x7FFFF'FFFE
3993            = 0xFFFF'FFF8'0000'0001
3994 
3995    MASK(4) = ~ ( (0x10000 - 4) | 0x7FFFF'0000 )
3996            = ~ ( 0xFFFC | 0x7FFFF'0000 )
3997            = ~ 0x7FFFF'FFFC
3998            = 0xFFFF'FFF8'0000'0003
3999 
4000    MASK(8) = ~ ( (0x10000 - 8) | 0x7FFFF'0000 )
4001            = ~ ( 0xFFF8 | 0x7FFFF'0000 )
4002            = ~ 0x7FFFF'FFF8
4003            = 0xFFFF'FFF8'0000'0007
4004 */
4005 
4006 
4007 /* ------------------------ Size = 8 ------------------------ */
4008 
4009 static INLINE
mc_LOADV64(Addr a,Bool isBigEndian)4010 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4011 {
4012    PROF_EVENT(200, "mc_LOADV64");
4013 
4014 #ifndef PERF_FAST_LOADV
4015    return mc_LOADVn_slow( a, 64, isBigEndian );
4016 #else
4017    {
4018       UWord   sm_off16, vabits16;
4019       SecMap* sm;
4020 
4021       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4022          PROF_EVENT(201, "mc_LOADV64-slow1");
4023          return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4024       }
4025 
4026       sm       = get_secmap_for_reading_low(a);
4027       sm_off16 = SM_OFF_16(a);
4028       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4029 
4030       // Handle common case quickly: a is suitably aligned, is mapped, and
4031       // addressible.
4032       // Convert V bits from compact memory form to expanded register form.
4033       if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4034          return V_BITS64_DEFINED;
4035       } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4036          return V_BITS64_UNDEFINED;
4037       } else {
4038          /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4039          PROF_EVENT(202, "mc_LOADV64-slow2");
4040          return mc_LOADVn_slow( a, 64, isBigEndian );
4041       }
4042    }
4043 #endif
4044 }
4045 
MC_(helperc_LOADV64be)4046 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4047 {
4048    return mc_LOADV64(a, True);
4049 }
MC_(helperc_LOADV64le)4050 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4051 {
4052    return mc_LOADV64(a, False);
4053 }
4054 
4055 
4056 static INLINE
mc_STOREV64(Addr a,ULong vbits64,Bool isBigEndian)4057 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4058 {
4059    PROF_EVENT(210, "mc_STOREV64");
4060 
4061 #ifndef PERF_FAST_STOREV
4062    // XXX: this slow case seems to be marginally faster than the fast case!
4063    // Investigate further.
4064    mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4065 #else
4066    {
4067       UWord   sm_off16, vabits16;
4068       SecMap* sm;
4069 
4070       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4071          PROF_EVENT(211, "mc_STOREV64-slow1");
4072          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4073          return;
4074       }
4075 
4076       sm       = get_secmap_for_reading_low(a);
4077       sm_off16 = SM_OFF_16(a);
4078       vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4079 
4080       if (LIKELY( !is_distinguished_sm(sm) &&
4081                           (VA_BITS16_DEFINED   == vabits16 ||
4082                            VA_BITS16_UNDEFINED == vabits16) ))
4083       {
4084          /* Handle common case quickly: a is suitably aligned, */
4085          /* is mapped, and is addressible. */
4086          // Convert full V-bits in register to compact 2-bit form.
4087          if (V_BITS64_DEFINED == vbits64) {
4088             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
4089          } else if (V_BITS64_UNDEFINED == vbits64) {
4090             ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
4091          } else {
4092             /* Slow but general case -- writing partially defined bytes. */
4093             PROF_EVENT(212, "mc_STOREV64-slow2");
4094             mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4095          }
4096       } else {
4097          /* Slow but general case. */
4098          PROF_EVENT(213, "mc_STOREV64-slow3");
4099          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4100       }
4101    }
4102 #endif
4103 }
4104 
MC_(helperc_STOREV64be)4105 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
4106 {
4107    mc_STOREV64(a, vbits64, True);
4108 }
MC_(helperc_STOREV64le)4109 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
4110 {
4111    mc_STOREV64(a, vbits64, False);
4112 }
4113 
4114 
4115 /* ------------------------ Size = 4 ------------------------ */
4116 
4117 static INLINE
mc_LOADV32(Addr a,Bool isBigEndian)4118 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
4119 {
4120    PROF_EVENT(220, "mc_LOADV32");
4121 
4122 #ifndef PERF_FAST_LOADV
4123    return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4124 #else
4125    {
4126       UWord   sm_off, vabits8;
4127       SecMap* sm;
4128 
4129       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4130          PROF_EVENT(221, "mc_LOADV32-slow1");
4131          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4132       }
4133 
4134       sm      = get_secmap_for_reading_low(a);
4135       sm_off  = SM_OFF(a);
4136       vabits8 = sm->vabits8[sm_off];
4137 
4138       // Handle common case quickly: a is suitably aligned, is mapped, and the
4139       // entire word32 it lives in is addressible.
4140       // Convert V bits from compact memory form to expanded register form.
4141       // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
4142       // Almost certainly not necessary, but be paranoid.
4143       if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4144          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
4145       } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
4146          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
4147       } else {
4148          /* Slow case: the 4 bytes are not all-defined or all-undefined. */
4149          PROF_EVENT(222, "mc_LOADV32-slow2");
4150          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4151       }
4152    }
4153 #endif
4154 }
4155 
MC_(helperc_LOADV32be)4156 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
4157 {
4158    return mc_LOADV32(a, True);
4159 }
MC_(helperc_LOADV32le)4160 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
4161 {
4162    return mc_LOADV32(a, False);
4163 }
4164 
4165 
4166 static INLINE
mc_STOREV32(Addr a,UWord vbits32,Bool isBigEndian)4167 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
4168 {
4169    PROF_EVENT(230, "mc_STOREV32");
4170 
4171 #ifndef PERF_FAST_STOREV
4172    mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4173 #else
4174    {
4175       UWord   sm_off, vabits8;
4176       SecMap* sm;
4177 
4178       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4179          PROF_EVENT(231, "mc_STOREV32-slow1");
4180          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4181          return;
4182       }
4183 
4184       sm      = get_secmap_for_reading_low(a);
4185       sm_off  = SM_OFF(a);
4186       vabits8 = sm->vabits8[sm_off];
4187 
4188       // Cleverness:  sometimes we don't have to write the shadow memory at
4189       // all, if we can tell that what we want to write is the same as what is
4190       // already there.  The 64/16/8 bit cases also have cleverness at this
4191       // point, but it works a little differently to the code below.
4192       if (V_BITS32_DEFINED == vbits32) {
4193          if (vabits8 == (UInt)VA_BITS8_DEFINED) {
4194             return;
4195          } else if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
4196             sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
4197          } else {
4198             // not defined/undefined, or distinguished and changing state
4199             PROF_EVENT(232, "mc_STOREV32-slow2");
4200             mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4201          }
4202       } else if (V_BITS32_UNDEFINED == vbits32) {
4203          if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
4204             return;
4205          } else if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
4206             sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
4207          } else {
4208             // not defined/undefined, or distinguished and changing state
4209             PROF_EVENT(233, "mc_STOREV32-slow3");
4210             mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4211          }
4212       } else {
4213          // Partially defined word
4214          PROF_EVENT(234, "mc_STOREV32-slow4");
4215          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4216       }
4217    }
4218 #endif
4219 }
4220 
MC_(helperc_STOREV32be)4221 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
4222 {
4223    mc_STOREV32(a, vbits32, True);
4224 }
MC_(helperc_STOREV32le)4225 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
4226 {
4227    mc_STOREV32(a, vbits32, False);
4228 }
4229 
4230 
4231 /* ------------------------ Size = 2 ------------------------ */
4232 
4233 static INLINE
mc_LOADV16(Addr a,Bool isBigEndian)4234 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
4235 {
4236    PROF_EVENT(240, "mc_LOADV16");
4237 
4238 #ifndef PERF_FAST_LOADV
4239    return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4240 #else
4241    {
4242       UWord   sm_off, vabits8;
4243       SecMap* sm;
4244 
4245       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4246          PROF_EVENT(241, "mc_LOADV16-slow1");
4247          return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4248       }
4249 
4250       sm      = get_secmap_for_reading_low(a);
4251       sm_off  = SM_OFF(a);
4252       vabits8 = sm->vabits8[sm_off];
4253       // Handle common case quickly: a is suitably aligned, is mapped, and is
4254       // addressible.
4255       // Convert V bits from compact memory form to expanded register form
4256       if      (vabits8 == VA_BITS8_DEFINED  ) { return V_BITS16_DEFINED;   }
4257       else if (vabits8 == VA_BITS8_UNDEFINED) { return V_BITS16_UNDEFINED; }
4258       else {
4259          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4260          // the two sub-bytes.
4261          UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
4262          if      (vabits4 == VA_BITS4_DEFINED  ) { return V_BITS16_DEFINED;   }
4263          else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
4264          else {
4265             /* Slow case: the two bytes are not all-defined or all-undefined. */
4266             PROF_EVENT(242, "mc_LOADV16-slow2");
4267             return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4268          }
4269       }
4270    }
4271 #endif
4272 }
4273 
MC_(helperc_LOADV16be)4274 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
4275 {
4276    return mc_LOADV16(a, True);
4277 }
MC_(helperc_LOADV16le)4278 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
4279 {
4280    return mc_LOADV16(a, False);
4281 }
4282 
4283 
4284 static INLINE
mc_STOREV16(Addr a,UWord vbits16,Bool isBigEndian)4285 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
4286 {
4287    PROF_EVENT(250, "mc_STOREV16");
4288 
4289 #ifndef PERF_FAST_STOREV
4290    mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4291 #else
4292    {
4293       UWord   sm_off, vabits8;
4294       SecMap* sm;
4295 
4296       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4297          PROF_EVENT(251, "mc_STOREV16-slow1");
4298          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4299          return;
4300       }
4301 
4302       sm      = get_secmap_for_reading_low(a);
4303       sm_off  = SM_OFF(a);
4304       vabits8 = sm->vabits8[sm_off];
4305       if (LIKELY( !is_distinguished_sm(sm) &&
4306                           (VA_BITS8_DEFINED   == vabits8 ||
4307                            VA_BITS8_UNDEFINED == vabits8) ))
4308       {
4309          /* Handle common case quickly: a is suitably aligned, */
4310          /* is mapped, and is addressible. */
4311          // Convert full V-bits in register to compact 2-bit form.
4312          if (V_BITS16_DEFINED == vbits16) {
4313             insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED ,
4314                                          &(sm->vabits8[sm_off]) );
4315          } else if (V_BITS16_UNDEFINED == vbits16) {
4316             insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
4317                                          &(sm->vabits8[sm_off]) );
4318          } else {
4319             /* Slow but general case -- writing partially defined bytes. */
4320             PROF_EVENT(252, "mc_STOREV16-slow2");
4321             mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4322          }
4323       } else {
4324          /* Slow but general case. */
4325          PROF_EVENT(253, "mc_STOREV16-slow3");
4326          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4327       }
4328    }
4329 #endif
4330 }
4331 
MC_(helperc_STOREV16be)4332 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
4333 {
4334    mc_STOREV16(a, vbits16, True);
4335 }
MC_(helperc_STOREV16le)4336 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
4337 {
4338    mc_STOREV16(a, vbits16, False);
4339 }
4340 
4341 
4342 /* ------------------------ Size = 1 ------------------------ */
4343 /* Note: endianness is irrelevant for size == 1 */
4344 
4345 VG_REGPARM(1)
MC_(helperc_LOADV8)4346 UWord MC_(helperc_LOADV8) ( Addr a )
4347 {
4348    PROF_EVENT(260, "mc_LOADV8");
4349 
4350 #ifndef PERF_FAST_LOADV
4351    return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4352 #else
4353    {
4354       UWord   sm_off, vabits8;
4355       SecMap* sm;
4356 
4357       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4358          PROF_EVENT(261, "mc_LOADV8-slow1");
4359          return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4360       }
4361 
4362       sm      = get_secmap_for_reading_low(a);
4363       sm_off  = SM_OFF(a);
4364       vabits8 = sm->vabits8[sm_off];
4365       // Convert V bits from compact memory form to expanded register form
4366       // Handle common case quickly: a is mapped, and the entire
4367       // word32 it lives in is addressible.
4368       if      (vabits8 == VA_BITS8_DEFINED  ) { return V_BITS8_DEFINED;   }
4369       else if (vabits8 == VA_BITS8_UNDEFINED) { return V_BITS8_UNDEFINED; }
4370       else {
4371          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4372          // the single byte.
4373          UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
4374          if      (vabits2 == VA_BITS2_DEFINED  ) { return V_BITS8_DEFINED;   }
4375          else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
4376          else {
4377             /* Slow case: the byte is not all-defined or all-undefined. */
4378             PROF_EVENT(262, "mc_LOADV8-slow2");
4379             return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4380          }
4381       }
4382    }
4383 #endif
4384 }
4385 
4386 
4387 VG_REGPARM(2)
MC_(helperc_STOREV8)4388 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
4389 {
4390    PROF_EVENT(270, "mc_STOREV8");
4391 
4392 #ifndef PERF_FAST_STOREV
4393    mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4394 #else
4395    {
4396       UWord   sm_off, vabits8;
4397       SecMap* sm;
4398 
4399       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4400          PROF_EVENT(271, "mc_STOREV8-slow1");
4401          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4402          return;
4403       }
4404 
4405       sm      = get_secmap_for_reading_low(a);
4406       sm_off  = SM_OFF(a);
4407       vabits8 = sm->vabits8[sm_off];
4408       if (LIKELY
4409             ( !is_distinguished_sm(sm) &&
4410               ( (VA_BITS8_DEFINED == vabits8 || VA_BITS8_UNDEFINED == vabits8)
4411              || (VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8))
4412               )
4413             )
4414          )
4415       {
4416          /* Handle common case quickly: a is mapped, the entire word32 it
4417             lives in is addressible. */
4418          // Convert full V-bits in register to compact 2-bit form.
4419          if (V_BITS8_DEFINED == vbits8) {
4420             insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
4421                                           &(sm->vabits8[sm_off]) );
4422          } else if (V_BITS8_UNDEFINED == vbits8) {
4423             insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
4424                                           &(sm->vabits8[sm_off]) );
4425          } else {
4426             /* Slow but general case -- writing partially defined bytes. */
4427             PROF_EVENT(272, "mc_STOREV8-slow2");
4428             mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4429          }
4430       } else {
4431          /* Slow but general case. */
4432          PROF_EVENT(273, "mc_STOREV8-slow3");
4433          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4434       }
4435    }
4436 #endif
4437 }
4438 
4439 
4440 /*------------------------------------------------------------*/
4441 /*--- Functions called directly from generated code:       ---*/
4442 /*--- Value-check failure handlers.                        ---*/
4443 /*------------------------------------------------------------*/
4444 
4445 /* Call these ones when an origin is available ... */
4446 VG_REGPARM(1)
MC_(helperc_value_check0_fail_w_o)4447 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
4448    MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
4449 }
4450 
4451 VG_REGPARM(1)
MC_(helperc_value_check1_fail_w_o)4452 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
4453    MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
4454 }
4455 
4456 VG_REGPARM(1)
MC_(helperc_value_check4_fail_w_o)4457 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
4458    MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
4459 }
4460 
4461 VG_REGPARM(1)
MC_(helperc_value_check8_fail_w_o)4462 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
4463    MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
4464 }
4465 
4466 VG_REGPARM(2)
MC_(helperc_value_checkN_fail_w_o)4467 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
4468    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
4469 }
4470 
4471 /* ... and these when an origin isn't available. */
4472 
4473 VG_REGPARM(0)
MC_(helperc_value_check0_fail_no_o)4474 void MC_(helperc_value_check0_fail_no_o) ( void ) {
4475    MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
4476 }
4477 
4478 VG_REGPARM(0)
MC_(helperc_value_check1_fail_no_o)4479 void MC_(helperc_value_check1_fail_no_o) ( void ) {
4480    MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
4481 }
4482 
4483 VG_REGPARM(0)
MC_(helperc_value_check4_fail_no_o)4484 void MC_(helperc_value_check4_fail_no_o) ( void ) {
4485    MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
4486 }
4487 
4488 VG_REGPARM(0)
MC_(helperc_value_check8_fail_no_o)4489 void MC_(helperc_value_check8_fail_no_o) ( void ) {
4490    MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
4491 }
4492 
4493 VG_REGPARM(1)
MC_(helperc_value_checkN_fail_no_o)4494 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
4495    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
4496 }
4497 
4498 
4499 /*------------------------------------------------------------*/
4500 /*--- Metadata get/set functions, for client requests.     ---*/
4501 /*------------------------------------------------------------*/
4502 
4503 // Nb: this expands the V+A bits out into register-form V bits, even though
4504 // they're in memory.  This is for backward compatibility, and because it's
4505 // probably what the user wants.
4506 
4507 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
4508    error [no longer used], 3 == addressing error. */
4509 /* Nb: We used to issue various definedness/addressability errors from here,
4510    but we took them out because they ranged from not-very-helpful to
4511    downright annoying, and they complicated the error data structures. */
mc_get_or_set_vbits_for_client(Addr a,Addr vbits,SizeT szB,Bool setting)4512 static Int mc_get_or_set_vbits_for_client (
4513    Addr a,
4514    Addr vbits,
4515    SizeT szB,
4516    Bool setting /* True <=> set vbits,  False <=> get vbits */
4517 )
4518 {
4519    SizeT i;
4520    Bool  ok;
4521    UChar vbits8;
4522 
4523    /* Check that arrays are addressible before doing any getting/setting. */
4524    for (i = 0; i < szB; i++) {
4525       if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
4526           VA_BITS2_NOACCESS == get_vabits2(vbits + i)) {
4527          return 3;
4528       }
4529    }
4530 
4531    /* Do the copy */
4532    if (setting) {
4533       /* setting */
4534       for (i = 0; i < szB; i++) {
4535          ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
4536          tl_assert(ok);
4537       }
4538    } else {
4539       /* getting */
4540       for (i = 0; i < szB; i++) {
4541          ok = get_vbits8(a + i, &vbits8);
4542          tl_assert(ok);
4543          ((UChar*)vbits)[i] = vbits8;
4544       }
4545       // The bytes in vbits[] have now been set, so mark them as such.
4546       MC_(make_mem_defined)(vbits, szB);
4547    }
4548 
4549    return 1;
4550 }
4551 
4552 
4553 /*------------------------------------------------------------*/
4554 /*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
4555 /*------------------------------------------------------------*/
4556 
4557 /* For the memory leak detector, say whether an entire 64k chunk of
4558    address space is possibly in use, or not.  If in doubt return
4559    True.
4560 */
MC_(is_within_valid_secondary)4561 Bool MC_(is_within_valid_secondary) ( Addr a )
4562 {
4563    SecMap* sm = maybe_get_secmap_for ( a );
4564    if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]
4565        || MC_(in_ignored_range)(a)) {
4566       /* Definitely not in use. */
4567       return False;
4568    } else {
4569       return True;
4570    }
4571 }
4572 
4573 
4574 /* For the memory leak detector, say whether or not a given word
4575    address is to be regarded as valid. */
MC_(is_valid_aligned_word)4576 Bool MC_(is_valid_aligned_word) ( Addr a )
4577 {
4578    tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
4579    tl_assert(VG_IS_WORD_ALIGNED(a));
4580    if (is_mem_defined( a, sizeof(UWord), NULL, NULL) == MC_Ok
4581        && !MC_(in_ignored_range)(a)) {
4582       return True;
4583    } else {
4584       return False;
4585    }
4586 }
4587 
4588 
4589 /*------------------------------------------------------------*/
4590 /*--- Initialisation                                       ---*/
4591 /*------------------------------------------------------------*/
4592 
init_shadow_memory(void)4593 static void init_shadow_memory ( void )
4594 {
4595    Int     i;
4596    SecMap* sm;
4597 
4598    tl_assert(V_BIT_UNDEFINED   == 1);
4599    tl_assert(V_BIT_DEFINED     == 0);
4600    tl_assert(V_BITS8_UNDEFINED == 0xFF);
4601    tl_assert(V_BITS8_DEFINED   == 0);
4602 
4603    /* Build the 3 distinguished secondaries */
4604    sm = &sm_distinguished[SM_DIST_NOACCESS];
4605    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
4606 
4607    sm = &sm_distinguished[SM_DIST_UNDEFINED];
4608    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
4609 
4610    sm = &sm_distinguished[SM_DIST_DEFINED];
4611    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
4612 
4613    /* Set up the primary map. */
4614    /* These entries gradually get overwritten as the used address
4615       space expands. */
4616    for (i = 0; i < N_PRIMARY_MAP; i++)
4617       primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
4618 
4619    /* Auxiliary primary maps */
4620    init_auxmap_L1_L2();
4621 
4622    /* auxmap_size = auxmap_used = 0;
4623       no ... these are statically initialised */
4624 
4625    /* Secondary V bit table */
4626    secVBitTable = createSecVBitTable();
4627 }
4628 
4629 
4630 /*------------------------------------------------------------*/
4631 /*--- Sanity check machinery (permanently engaged)         ---*/
4632 /*------------------------------------------------------------*/
4633 
mc_cheap_sanity_check(void)4634 static Bool mc_cheap_sanity_check ( void )
4635 {
4636    n_sanity_cheap++;
4637    PROF_EVENT(490, "cheap_sanity_check");
4638    /* Check for sane operating level */
4639    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
4640       return False;
4641    /* nothing else useful we can rapidly check */
4642    return True;
4643 }
4644 
mc_expensive_sanity_check(void)4645 static Bool mc_expensive_sanity_check ( void )
4646 {
4647    Int     i;
4648    Word    n_secmaps_found;
4649    SecMap* sm;
4650    HChar*  errmsg;
4651    Bool    bad = False;
4652 
4653    if (0) VG_(printf)("expensive sanity check\n");
4654    if (0) return True;
4655 
4656    n_sanity_expensive++;
4657    PROF_EVENT(491, "expensive_sanity_check");
4658 
4659    /* Check for sane operating level */
4660    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
4661       return False;
4662 
4663    /* Check that the 3 distinguished SMs are still as they should be. */
4664 
4665    /* Check noaccess DSM. */
4666    sm = &sm_distinguished[SM_DIST_NOACCESS];
4667    for (i = 0; i < SM_CHUNKS; i++)
4668       if (sm->vabits8[i] != VA_BITS8_NOACCESS)
4669          bad = True;
4670 
4671    /* Check undefined DSM. */
4672    sm = &sm_distinguished[SM_DIST_UNDEFINED];
4673    for (i = 0; i < SM_CHUNKS; i++)
4674       if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
4675          bad = True;
4676 
4677    /* Check defined DSM. */
4678    sm = &sm_distinguished[SM_DIST_DEFINED];
4679    for (i = 0; i < SM_CHUNKS; i++)
4680       if (sm->vabits8[i] != VA_BITS8_DEFINED)
4681          bad = True;
4682 
4683    if (bad) {
4684       VG_(printf)("memcheck expensive sanity: "
4685                   "distinguished_secondaries have changed\n");
4686       return False;
4687    }
4688 
4689    /* If we're not checking for undefined value errors, the secondary V bit
4690     * table should be empty. */
4691    if (MC_(clo_mc_level) == 1) {
4692       if (0 != VG_(OSetGen_Size)(secVBitTable))
4693          return False;
4694    }
4695 
4696    /* check the auxiliary maps, very thoroughly */
4697    n_secmaps_found = 0;
4698    errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
4699    if (errmsg) {
4700       VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
4701       return False;
4702    }
4703 
4704    /* n_secmaps_found is now the number referred to by the auxiliary
4705       primary map.  Now add on the ones referred to by the main
4706       primary map. */
4707    for (i = 0; i < N_PRIMARY_MAP; i++) {
4708       if (primary_map[i] == NULL) {
4709          bad = True;
4710       } else {
4711          if (!is_distinguished_sm(primary_map[i]))
4712             n_secmaps_found++;
4713       }
4714    }
4715 
4716    /* check that the number of secmaps issued matches the number that
4717       are reachable (iow, no secmap leaks) */
4718    if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
4719       bad = True;
4720 
4721    if (bad) {
4722       VG_(printf)("memcheck expensive sanity: "
4723                   "apparent secmap leakage\n");
4724       return False;
4725    }
4726 
4727    if (bad) {
4728       VG_(printf)("memcheck expensive sanity: "
4729                   "auxmap covers wrong address space\n");
4730       return False;
4731    }
4732 
4733    /* there is only one pointer to each secmap (expensive) */
4734 
4735    return True;
4736 }
4737 
4738 /*------------------------------------------------------------*/
4739 /*--- Command line args                                    ---*/
4740 /*------------------------------------------------------------*/
4741 
4742 Bool          MC_(clo_partial_loads_ok)       = False;
4743 Long          MC_(clo_freelist_vol)           = 20*1000*1000LL;
4744 LeakCheckMode MC_(clo_leak_check)             = LC_Summary;
4745 VgRes         MC_(clo_leak_resolution)        = Vg_HighRes;
4746 Bool          MC_(clo_show_reachable)         = False;
4747 Bool          MC_(clo_show_possibly_lost)     = True;
4748 Bool          MC_(clo_workaround_gcc296_bugs) = False;
4749 Int           MC_(clo_malloc_fill)            = -1;
4750 Int           MC_(clo_free_fill)              = -1;
4751 Int           MC_(clo_mc_level)               = 2;
4752 const char*   MC_(clo_summary_file)           = NULL;
4753 
4754 
mc_process_cmd_line_options(Char * arg)4755 static Bool mc_process_cmd_line_options(Char* arg)
4756 {
4757    Char* tmp_str;
4758 
4759    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
4760 
4761    /* Set MC_(clo_mc_level):
4762          1 = A bit tracking only
4763          2 = A and V bit tracking, but no V bit origins
4764          3 = A and V bit tracking, and V bit origins
4765 
4766       Do this by inspecting --undef-value-errors= and
4767       --track-origins=.  Reject the case --undef-value-errors=no
4768       --track-origins=yes as meaningless.
4769    */
4770    if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
4771       if (MC_(clo_mc_level) == 3) {
4772          goto bad_level;
4773       } else {
4774          MC_(clo_mc_level) = 1;
4775          return True;
4776       }
4777    }
4778    if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
4779       if (MC_(clo_mc_level) == 1)
4780          MC_(clo_mc_level) = 2;
4781       return True;
4782    }
4783    if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
4784       if (MC_(clo_mc_level) == 3)
4785          MC_(clo_mc_level) = 2;
4786       return True;
4787    }
4788    if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
4789       if (MC_(clo_mc_level) == 1) {
4790          goto bad_level;
4791       } else {
4792          MC_(clo_mc_level) = 3;
4793          return True;
4794       }
4795    }
4796 
4797 	if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
4798    else if VG_BOOL_CLO(arg, "--show-reachable",   MC_(clo_show_reachable))   {}
4799    else if VG_BOOL_CLO(arg, "--show-possibly-lost",
4800                                             MC_(clo_show_possibly_lost))     {}
4801    else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
4802                                             MC_(clo_workaround_gcc296_bugs)) {}
4803 
4804    else if VG_BINT_CLO(arg, "--freelist-vol",  MC_(clo_freelist_vol),
4805                                                0, 10*1000*1000*1000LL) {}
4806 
4807    else if VG_XACT_CLO(arg, "--leak-check=no",
4808                             MC_(clo_leak_check), LC_Off) {}
4809    else if VG_XACT_CLO(arg, "--leak-check=summary",
4810                             MC_(clo_leak_check), LC_Summary) {}
4811    else if VG_XACT_CLO(arg, "--leak-check=yes",
4812                             MC_(clo_leak_check), LC_Full) {}
4813    else if VG_XACT_CLO(arg, "--leak-check=full",
4814                             MC_(clo_leak_check), LC_Full) {}
4815 
4816    else if VG_XACT_CLO(arg, "--leak-resolution=low",
4817                             MC_(clo_leak_resolution), Vg_LowRes) {}
4818    else if VG_XACT_CLO(arg, "--leak-resolution=med",
4819                             MC_(clo_leak_resolution), Vg_MedRes) {}
4820    else if VG_XACT_CLO(arg, "--leak-resolution=high",
4821                             MC_(clo_leak_resolution), Vg_HighRes) {}
4822 
4823    else if VG_STR_CLO(arg, "--summary-file", tmp_str) {
4824       MC_(clo_summary_file) = VG_(strdup)("clo_summary_file", tmp_str);
4825    }
4826    else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
4827       Int  i;
4828       Bool ok  = parse_ignore_ranges(tmp_str);
4829       if (!ok)
4830         return False;
4831       tl_assert(ignoreRanges.used >= 0);
4832       tl_assert(ignoreRanges.used < M_IGNORE_RANGES);
4833       for (i = 0; i < ignoreRanges.used; i++) {
4834          Addr s = ignoreRanges.start[i];
4835          Addr e = ignoreRanges.end[i];
4836          Addr limit = 0x4000000; /* 64M - entirely arbitrary limit */
4837          if (e <= s) {
4838             VG_(message)(Vg_DebugMsg,
4839                "ERROR: --ignore-ranges: end <= start in range:\n");
4840             VG_(message)(Vg_DebugMsg,
4841                "       0x%lx-0x%lx\n", s, e);
4842             return False;
4843          }
4844          if (e - s > limit) {
4845             VG_(message)(Vg_DebugMsg,
4846                "ERROR: --ignore-ranges: suspiciously large range:\n");
4847             VG_(message)(Vg_DebugMsg,
4848                "       0x%lx-0x%lx (size %ld)\n", s, e, (UWord)(e-s));
4849             return False;
4850 	 }
4851       }
4852    }
4853 
4854    else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
4855    else if VG_BHEX_CLO(arg, "--free-fill",   MC_(clo_free_fill),   0x00,0xFF) {}
4856 
4857    else
4858       return VG_(replacement_malloc_process_cmd_line_option)(arg);
4859 
4860    return True;
4861 
4862 
4863   bad_level:
4864    VG_(fmsg_bad_option)(arg,
4865       "--track-origins=yes has no effect when --undef-value-errors=no.\n");
4866 }
4867 
mc_print_usage(void)4868 static void mc_print_usage(void)
4869 {
4870    VG_(printf)(
4871 "    --leak-check=no|summary|full     search for memory leaks at exit?  [summary]\n"
4872 "    --leak-resolution=low|med|high   differentiation of leak stack traces [high]\n"
4873 "    --show-reachable=no|yes          show reachable blocks in leak check? [no]\n"
4874 "    --show-possibly-lost=no|yes      show possibly lost blocks in leak check?\n"
4875 "                                     [yes]\n"
4876 "    --undef-value-errors=no|yes      check for undefined value errors [yes]\n"
4877 "    --track-origins=no|yes           show origins of undefined values? [no]\n"
4878 "    --partial-loads-ok=no|yes        too hard to explain here; see manual [no]\n"
4879 "    --freelist-vol=<number>          volume of freed blocks queue [20000000]\n"
4880 "    --workaround-gcc296-bugs=no|yes  self explanatory [no]\n"
4881 "    --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS]   assume given addresses are OK\n"
4882 "    --malloc-fill=<hexnumber>        fill malloc'd areas with given value\n"
4883 "    --free-fill=<hexnumber>          fill free'd areas with given value\n"
4884    );
4885 }
4886 
mc_print_debug_usage(void)4887 static void mc_print_debug_usage(void)
4888 {
4889    VG_(printf)(
4890 "    (none)\n"
4891    );
4892 }
4893 
4894 
4895 /*------------------------------------------------------------*/
4896 /*--- Client blocks                                        ---*/
4897 /*------------------------------------------------------------*/
4898 
4899 /* Client block management:
4900 
4901    This is managed as an expanding array of client block descriptors.
4902    Indices of live descriptors are issued to the client, so it can ask
4903    to free them later.  Therefore we cannot slide live entries down
4904    over dead ones.  Instead we must use free/inuse flags and scan for
4905    an empty slot at allocation time.  This in turn means allocation is
4906    relatively expensive, so we hope this does not happen too often.
4907 
4908    An unused block has start == size == 0
4909 */
4910 
4911 /* type CGenBlock is defined in mc_include.h */
4912 
4913 /* This subsystem is self-initialising. */
4914 static UWord      cgb_size = 0;
4915 static UWord      cgb_used = 0;
4916 static CGenBlock* cgbs     = NULL;
4917 
4918 /* Stats for this subsystem. */
4919 static ULong cgb_used_MAX = 0;   /* Max in use. */
4920 static ULong cgb_allocs   = 0;   /* Number of allocs. */
4921 static ULong cgb_discards = 0;   /* Number of discards. */
4922 static ULong cgb_search   = 0;   /* Number of searches. */
4923 
4924 
4925 /* Get access to the client block array. */
MC_(get_ClientBlock_array)4926 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
4927                                  /*OUT*/UWord* nBlocks )
4928 {
4929    *blocks  = cgbs;
4930    *nBlocks = cgb_used;
4931 }
4932 
4933 
4934 static
alloc_client_block(void)4935 Int alloc_client_block ( void )
4936 {
4937    UWord      i, sz_new;
4938    CGenBlock* cgbs_new;
4939 
4940    cgb_allocs++;
4941 
4942    for (i = 0; i < cgb_used; i++) {
4943       cgb_search++;
4944       if (cgbs[i].start == 0 && cgbs[i].size == 0)
4945          return i;
4946    }
4947 
4948    /* Not found.  Try to allocate one at the end. */
4949    if (cgb_used < cgb_size) {
4950       cgb_used++;
4951       return cgb_used-1;
4952    }
4953 
4954    /* Ok, we have to allocate a new one. */
4955    tl_assert(cgb_used == cgb_size);
4956    sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
4957 
4958    cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
4959    for (i = 0; i < cgb_used; i++)
4960       cgbs_new[i] = cgbs[i];
4961 
4962    if (cgbs != NULL)
4963       VG_(free)( cgbs );
4964    cgbs = cgbs_new;
4965 
4966    cgb_size = sz_new;
4967    cgb_used++;
4968    if (cgb_used > cgb_used_MAX)
4969       cgb_used_MAX = cgb_used;
4970    return cgb_used-1;
4971 }
4972 
4973 
show_client_block_stats(void)4974 static void show_client_block_stats ( void )
4975 {
4976    VG_(message)(Vg_DebugMsg,
4977       "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
4978       cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
4979    );
4980 }
4981 
4982 
4983 /*------------------------------------------------------------*/
4984 /*--- Client requests                                      ---*/
4985 /*------------------------------------------------------------*/
4986 
mc_handle_client_request(ThreadId tid,UWord * arg,UWord * ret)4987 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
4988 {
4989    Int   i;
4990    Bool  ok;
4991    Addr  bad_addr;
4992 
4993    if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
4994        && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
4995        && VG_USERREQ__FREELIKE_BLOCK   != arg[0]
4996        && VG_USERREQ__CREATE_MEMPOOL   != arg[0]
4997        && VG_USERREQ__DESTROY_MEMPOOL  != arg[0]
4998        && VG_USERREQ__MEMPOOL_ALLOC    != arg[0]
4999        && VG_USERREQ__MEMPOOL_FREE     != arg[0]
5000        && VG_USERREQ__MEMPOOL_TRIM     != arg[0]
5001        && VG_USERREQ__MOVE_MEMPOOL     != arg[0]
5002        && VG_USERREQ__MEMPOOL_CHANGE   != arg[0]
5003        && VG_USERREQ__MEMPOOL_EXISTS   != arg[0])
5004       return False;
5005 
5006    switch (arg[0]) {
5007       case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE:
5008          ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
5009          if (!ok)
5010             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
5011          *ret = ok ? (UWord)NULL : bad_addr;
5012          break;
5013 
5014       case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
5015          MC_ReadResult res;
5016          UInt otag = 0;
5017          res = is_mem_defined ( arg[1], arg[2], &bad_addr, &otag );
5018          if (MC_AddrErr == res)
5019             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
5020          else if (MC_ValueErr == res)
5021             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/False, otag );
5022          *ret = ( res==MC_Ok ? (UWord)NULL : bad_addr );
5023          break;
5024       }
5025 
5026       case VG_USERREQ__DO_LEAK_CHECK:
5027          MC_(detect_memory_leaks)(tid, arg[1] ? LC_Summary : LC_Full);
5028          *ret = 0; /* return value is meaningless */
5029          break;
5030 
5031       case VG_USERREQ__MAKE_MEM_NOACCESS:
5032          MC_(make_mem_noaccess) ( arg[1], arg[2] );
5033          *ret = -1;
5034          break;
5035 
5036       case VG_USERREQ__MAKE_MEM_UNDEFINED:
5037          make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
5038                                               MC_OKIND_USER );
5039          *ret = -1;
5040          break;
5041 
5042       case VG_USERREQ__MAKE_MEM_DEFINED:
5043          MC_(make_mem_defined) ( arg[1], arg[2] );
5044          *ret = -1;
5045          break;
5046 
5047       case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
5048          make_mem_defined_if_addressable ( arg[1], arg[2] );
5049          *ret = -1;
5050          break;
5051 
5052       case VG_USERREQ__CREATE_BLOCK: /* describe a block */
5053          if (arg[1] != 0 && arg[2] != 0) {
5054             i = alloc_client_block();
5055             /* VG_(printf)("allocated %d %p\n", i, cgbs); */
5056             cgbs[i].start = arg[1];
5057             cgbs[i].size  = arg[2];
5058             cgbs[i].desc  = VG_(strdup)("mc.mhcr.1", (Char *)arg[3]);
5059             cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
5060             *ret = i;
5061          } else
5062             *ret = -1;
5063          break;
5064 
5065       case VG_USERREQ__DISCARD: /* discard */
5066          if (cgbs == NULL
5067              || arg[2] >= cgb_used ||
5068              (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
5069             *ret = 1;
5070          } else {
5071             tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
5072             cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
5073             VG_(free)(cgbs[arg[2]].desc);
5074             cgb_discards++;
5075             *ret = 0;
5076          }
5077          break;
5078 
5079       case VG_USERREQ__GET_VBITS:
5080          *ret = mc_get_or_set_vbits_for_client
5081                    ( arg[1], arg[2], arg[3], False /* get them */ );
5082          break;
5083 
5084       case VG_USERREQ__SET_VBITS:
5085          *ret = mc_get_or_set_vbits_for_client
5086                    ( arg[1], arg[2], arg[3], True /* set them */ );
5087          break;
5088 
5089       case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
5090          UWord** argp = (UWord**)arg;
5091          // MC_(bytes_leaked) et al were set by the last leak check (or zero
5092          // if no prior leak checks performed).
5093          *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
5094          *argp[2] = MC_(bytes_dubious);
5095          *argp[3] = MC_(bytes_reachable);
5096          *argp[4] = MC_(bytes_suppressed);
5097          // there is no argp[5]
5098          //*argp[5] = MC_(bytes_indirect);
5099          // XXX need to make *argp[1-4] defined;  currently done in the
5100          // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
5101          *ret = 0;
5102          return True;
5103       }
5104       case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
5105          UWord** argp = (UWord**)arg;
5106          // MC_(blocks_leaked) et al were set by the last leak check (or zero
5107          // if no prior leak checks performed).
5108          *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
5109          *argp[2] = MC_(blocks_dubious);
5110          *argp[3] = MC_(blocks_reachable);
5111          *argp[4] = MC_(blocks_suppressed);
5112          // there is no argp[5]
5113          //*argp[5] = MC_(blocks_indirect);
5114          // XXX need to make *argp[1-4] defined;  currently done in the
5115          // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
5116          *ret = 0;
5117          return True;
5118       }
5119       case VG_USERREQ__MALLOCLIKE_BLOCK: {
5120          Addr p         = (Addr)arg[1];
5121          SizeT sizeB    =       arg[2];
5122          //UInt rzB       =       arg[3];    XXX: unused!
5123          Bool is_zeroed = (Bool)arg[4];
5124 
5125          MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
5126                           MC_AllocCustom, MC_(malloc_list) );
5127          return True;
5128       }
5129       case VG_USERREQ__FREELIKE_BLOCK: {
5130          Addr p         = (Addr)arg[1];
5131          UInt rzB       =       arg[2];
5132 
5133          MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
5134          return True;
5135       }
5136 
5137       case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
5138          Char* s   = (Char*)arg[1];
5139          Addr  dst = (Addr) arg[2];
5140          Addr  src = (Addr) arg[3];
5141          SizeT len = (SizeT)arg[4];
5142          MC_(record_overlap_error)(tid, s, src, dst, len);
5143          return True;
5144       }
5145 
5146       case VG_USERREQ__CREATE_MEMPOOL: {
5147          Addr pool      = (Addr)arg[1];
5148          UInt rzB       =       arg[2];
5149          Bool is_zeroed = (Bool)arg[3];
5150 
5151          MC_(create_mempool) ( pool, rzB, is_zeroed );
5152          return True;
5153       }
5154 
5155       case VG_USERREQ__DESTROY_MEMPOOL: {
5156          Addr pool      = (Addr)arg[1];
5157 
5158          MC_(destroy_mempool) ( pool );
5159          return True;
5160       }
5161 
5162       case VG_USERREQ__MEMPOOL_ALLOC: {
5163          Addr pool      = (Addr)arg[1];
5164          Addr addr      = (Addr)arg[2];
5165          UInt size      =       arg[3];
5166 
5167          MC_(mempool_alloc) ( tid, pool, addr, size );
5168          return True;
5169       }
5170 
5171       case VG_USERREQ__MEMPOOL_FREE: {
5172          Addr pool      = (Addr)arg[1];
5173          Addr addr      = (Addr)arg[2];
5174 
5175          MC_(mempool_free) ( pool, addr );
5176          return True;
5177       }
5178 
5179       case VG_USERREQ__MEMPOOL_TRIM: {
5180          Addr pool      = (Addr)arg[1];
5181          Addr addr      = (Addr)arg[2];
5182          UInt size      =       arg[3];
5183 
5184          MC_(mempool_trim) ( pool, addr, size );
5185          return True;
5186       }
5187 
5188       case VG_USERREQ__MOVE_MEMPOOL: {
5189          Addr poolA     = (Addr)arg[1];
5190          Addr poolB     = (Addr)arg[2];
5191 
5192          MC_(move_mempool) ( poolA, poolB );
5193          return True;
5194       }
5195 
5196       case VG_USERREQ__MEMPOOL_CHANGE: {
5197          Addr pool      = (Addr)arg[1];
5198          Addr addrA     = (Addr)arg[2];
5199          Addr addrB     = (Addr)arg[3];
5200          UInt size      =       arg[4];
5201 
5202          MC_(mempool_change) ( pool, addrA, addrB, size );
5203          return True;
5204       }
5205 
5206       case VG_USERREQ__MEMPOOL_EXISTS: {
5207          Addr pool      = (Addr)arg[1];
5208 
5209          *ret = (UWord) MC_(mempool_exists) ( pool );
5210 	 return True;
5211       }
5212 
5213 
5214       default:
5215          VG_(message)(
5216             Vg_UserMsg,
5217             "Warning: unknown memcheck client request code %llx\n",
5218             (ULong)arg[0]
5219          );
5220          return False;
5221    }
5222    return True;
5223 }
5224 
5225 
5226 /*------------------------------------------------------------*/
5227 /*--- Crude profiling machinery.                           ---*/
5228 /*------------------------------------------------------------*/
5229 
5230 // We track a number of interesting events (using PROF_EVENT)
5231 // if MC_PROFILE_MEMORY is defined.
5232 
5233 #ifdef MC_PROFILE_MEMORY
5234 
5235 UInt   MC_(event_ctr)[N_PROF_EVENTS];
5236 HChar* MC_(event_ctr_name)[N_PROF_EVENTS];
5237 
init_prof_mem(void)5238 static void init_prof_mem ( void )
5239 {
5240    Int i;
5241    for (i = 0; i < N_PROF_EVENTS; i++) {
5242       MC_(event_ctr)[i] = 0;
5243       MC_(event_ctr_name)[i] = NULL;
5244    }
5245 }
5246 
done_prof_mem(void)5247 static void done_prof_mem ( void )
5248 {
5249    Int  i;
5250    Bool spaced = False;
5251    for (i = 0; i < N_PROF_EVENTS; i++) {
5252       if (!spaced && (i % 10) == 0) {
5253          VG_(printf)("\n");
5254          spaced = True;
5255       }
5256       if (MC_(event_ctr)[i] > 0) {
5257          spaced = False;
5258          VG_(printf)( "prof mem event %3d: %9d   %s\n",
5259                       i, MC_(event_ctr)[i],
5260                       MC_(event_ctr_name)[i]
5261                          ? MC_(event_ctr_name)[i] : "unnamed");
5262       }
5263    }
5264 }
5265 
5266 #else
5267 
init_prof_mem(void)5268 static void init_prof_mem ( void ) { }
done_prof_mem(void)5269 static void done_prof_mem ( void ) { }
5270 
5271 #endif
5272 
5273 
5274 /*------------------------------------------------------------*/
5275 /*--- Origin tracking stuff                                ---*/
5276 /*------------------------------------------------------------*/
5277 
5278 /*--------------------------------------------*/
5279 /*--- Origin tracking: load handlers       ---*/
5280 /*--------------------------------------------*/
5281 
merge_origins(UInt or1,UInt or2)5282 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
5283    return or1 > or2 ? or1 : or2;
5284 }
5285 
MC_(helperc_b_load1)5286 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
5287    OCacheLine* line;
5288    UChar descr;
5289    UWord lineoff = oc_line_offset(a);
5290    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
5291 
5292    if (OC_ENABLE_ASSERTIONS) {
5293       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5294    }
5295 
5296    line = find_OCacheLine( a );
5297 
5298    descr = line->descr[lineoff];
5299    if (OC_ENABLE_ASSERTIONS) {
5300       tl_assert(descr < 0x10);
5301    }
5302 
5303    if (LIKELY(0 == (descr & (1 << byteoff))))  {
5304       return 0;
5305    } else {
5306       return line->w32[lineoff];
5307    }
5308 }
5309 
MC_(helperc_b_load2)5310 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
5311    OCacheLine* line;
5312    UChar descr;
5313    UWord lineoff, byteoff;
5314 
5315    if (UNLIKELY(a & 1)) {
5316       /* Handle misaligned case, slowly. */
5317       UInt oLo   = (UInt)MC_(helperc_b_load1)( a + 0 );
5318       UInt oHi   = (UInt)MC_(helperc_b_load1)( a + 1 );
5319       return merge_origins(oLo, oHi);
5320    }
5321 
5322    lineoff = oc_line_offset(a);
5323    byteoff = a & 3; /* 0 or 2 */
5324 
5325    if (OC_ENABLE_ASSERTIONS) {
5326       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5327    }
5328    line = find_OCacheLine( a );
5329 
5330    descr = line->descr[lineoff];
5331    if (OC_ENABLE_ASSERTIONS) {
5332       tl_assert(descr < 0x10);
5333    }
5334 
5335    if (LIKELY(0 == (descr & (3 << byteoff)))) {
5336       return 0;
5337    } else {
5338       return line->w32[lineoff];
5339    }
5340 }
5341 
MC_(helperc_b_load4)5342 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
5343    OCacheLine* line;
5344    UChar descr;
5345    UWord lineoff;
5346 
5347    if (UNLIKELY(a & 3)) {
5348       /* Handle misaligned case, slowly. */
5349       UInt oLo   = (UInt)MC_(helperc_b_load2)( a + 0 );
5350       UInt oHi   = (UInt)MC_(helperc_b_load2)( a + 2 );
5351       return merge_origins(oLo, oHi);
5352    }
5353 
5354    lineoff = oc_line_offset(a);
5355    if (OC_ENABLE_ASSERTIONS) {
5356       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5357    }
5358 
5359    line = find_OCacheLine( a );
5360 
5361    descr = line->descr[lineoff];
5362    if (OC_ENABLE_ASSERTIONS) {
5363       tl_assert(descr < 0x10);
5364    }
5365 
5366    if (LIKELY(0 == descr)) {
5367       return 0;
5368    } else {
5369       return line->w32[lineoff];
5370    }
5371 }
5372 
MC_(helperc_b_load8)5373 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
5374    OCacheLine* line;
5375    UChar descrLo, descrHi, descr;
5376    UWord lineoff;
5377 
5378    if (UNLIKELY(a & 7)) {
5379       /* Handle misaligned case, slowly. */
5380       UInt oLo   = (UInt)MC_(helperc_b_load4)( a + 0 );
5381       UInt oHi   = (UInt)MC_(helperc_b_load4)( a + 4 );
5382       return merge_origins(oLo, oHi);
5383    }
5384 
5385    lineoff = oc_line_offset(a);
5386    if (OC_ENABLE_ASSERTIONS) {
5387       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
5388    }
5389 
5390    line = find_OCacheLine( a );
5391 
5392    descrLo = line->descr[lineoff + 0];
5393    descrHi = line->descr[lineoff + 1];
5394    descr   = descrLo | descrHi;
5395    if (OC_ENABLE_ASSERTIONS) {
5396       tl_assert(descr < 0x10);
5397    }
5398 
5399    if (LIKELY(0 == descr)) {
5400       return 0; /* both 32-bit chunks are defined */
5401    } else {
5402       UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
5403       UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
5404       return merge_origins(oLo, oHi);
5405    }
5406 }
5407 
MC_(helperc_b_load16)5408 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
5409    UInt oLo   = (UInt)MC_(helperc_b_load8)( a + 0 );
5410    UInt oHi   = (UInt)MC_(helperc_b_load8)( a + 8 );
5411    UInt oBoth = merge_origins(oLo, oHi);
5412    return (UWord)oBoth;
5413 }
5414 
5415 
5416 /*--------------------------------------------*/
5417 /*--- Origin tracking: store handlers      ---*/
5418 /*--------------------------------------------*/
5419 
MC_(helperc_b_store1)5420 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
5421    OCacheLine* line;
5422    UWord lineoff = oc_line_offset(a);
5423    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
5424 
5425    if (OC_ENABLE_ASSERTIONS) {
5426       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5427    }
5428 
5429    line = find_OCacheLine( a );
5430 
5431    if (d32 == 0) {
5432       line->descr[lineoff] &= ~(1 << byteoff);
5433    } else {
5434       line->descr[lineoff] |= (1 << byteoff);
5435       line->w32[lineoff] = d32;
5436    }
5437 }
5438 
MC_(helperc_b_store2)5439 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
5440    OCacheLine* line;
5441    UWord lineoff, byteoff;
5442 
5443    if (UNLIKELY(a & 1)) {
5444       /* Handle misaligned case, slowly. */
5445       MC_(helperc_b_store1)( a + 0, d32 );
5446       MC_(helperc_b_store1)( a + 1, d32 );
5447       return;
5448    }
5449 
5450    lineoff = oc_line_offset(a);
5451    byteoff = a & 3; /* 0 or 2 */
5452 
5453    if (OC_ENABLE_ASSERTIONS) {
5454       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5455    }
5456 
5457    line = find_OCacheLine( a );
5458 
5459    if (d32 == 0) {
5460       line->descr[lineoff] &= ~(3 << byteoff);
5461    } else {
5462       line->descr[lineoff] |= (3 << byteoff);
5463       line->w32[lineoff] = d32;
5464    }
5465 }
5466 
MC_(helperc_b_store4)5467 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
5468    OCacheLine* line;
5469    UWord lineoff;
5470 
5471    if (UNLIKELY(a & 3)) {
5472       /* Handle misaligned case, slowly. */
5473       MC_(helperc_b_store2)( a + 0, d32 );
5474       MC_(helperc_b_store2)( a + 2, d32 );
5475       return;
5476    }
5477 
5478    lineoff = oc_line_offset(a);
5479    if (OC_ENABLE_ASSERTIONS) {
5480       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5481    }
5482 
5483    line = find_OCacheLine( a );
5484 
5485    if (d32 == 0) {
5486       line->descr[lineoff] = 0;
5487    } else {
5488       line->descr[lineoff] = 0xF;
5489       line->w32[lineoff] = d32;
5490    }
5491 }
5492 
MC_(helperc_b_store8)5493 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
5494    OCacheLine* line;
5495    UWord lineoff;
5496 
5497    if (UNLIKELY(a & 7)) {
5498       /* Handle misaligned case, slowly. */
5499       MC_(helperc_b_store4)( a + 0, d32 );
5500       MC_(helperc_b_store4)( a + 4, d32 );
5501       return;
5502    }
5503 
5504    lineoff = oc_line_offset(a);
5505    if (OC_ENABLE_ASSERTIONS) {
5506       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
5507    }
5508 
5509    line = find_OCacheLine( a );
5510 
5511    if (d32 == 0) {
5512       line->descr[lineoff + 0] = 0;
5513       line->descr[lineoff + 1] = 0;
5514    } else {
5515       line->descr[lineoff + 0] = 0xF;
5516       line->descr[lineoff + 1] = 0xF;
5517       line->w32[lineoff + 0] = d32;
5518       line->w32[lineoff + 1] = d32;
5519    }
5520 }
5521 
MC_(helperc_b_store16)5522 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
5523    MC_(helperc_b_store8)( a + 0, d32 );
5524    MC_(helperc_b_store8)( a + 8, d32 );
5525 }
5526 
5527 
5528 /*--------------------------------------------*/
5529 /*--- Origin tracking: sarp handlers       ---*/
5530 /*--------------------------------------------*/
5531 
5532 __attribute__((noinline))
ocache_sarp_Set_Origins(Addr a,UWord len,UInt otag)5533 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
5534    if ((a & 1) && len >= 1) {
5535       MC_(helperc_b_store1)( a, otag );
5536       a++;
5537       len--;
5538    }
5539    if ((a & 2) && len >= 2) {
5540       MC_(helperc_b_store2)( a, otag );
5541       a += 2;
5542       len -= 2;
5543    }
5544    if (len >= 4)
5545       tl_assert(0 == (a & 3));
5546    while (len >= 4) {
5547       MC_(helperc_b_store4)( a, otag );
5548       a += 4;
5549       len -= 4;
5550    }
5551    if (len >= 2) {
5552       MC_(helperc_b_store2)( a, otag );
5553       a += 2;
5554       len -= 2;
5555    }
5556    if (len >= 1) {
5557       MC_(helperc_b_store1)( a, otag );
5558       //a++;
5559       len--;
5560    }
5561    tl_assert(len == 0);
5562 }
5563 
5564 __attribute__((noinline))
ocache_sarp_Clear_Origins(Addr a,UWord len)5565 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
5566    if ((a & 1) && len >= 1) {
5567       MC_(helperc_b_store1)( a, 0 );
5568       a++;
5569       len--;
5570    }
5571    if ((a & 2) && len >= 2) {
5572       MC_(helperc_b_store2)( a, 0 );
5573       a += 2;
5574       len -= 2;
5575    }
5576    if (len >= 4)
5577       tl_assert(0 == (a & 3));
5578    while (len >= 4) {
5579       MC_(helperc_b_store4)( a, 0 );
5580       a += 4;
5581       len -= 4;
5582    }
5583    if (len >= 2) {
5584       MC_(helperc_b_store2)( a, 0 );
5585       a += 2;
5586       len -= 2;
5587    }
5588    if (len >= 1) {
5589       MC_(helperc_b_store1)( a, 0 );
5590       //a++;
5591       len--;
5592    }
5593    tl_assert(len == 0);
5594 }
5595 
5596 
5597 /*------------------------------------------------------------*/
5598 /*--- Setup and finalisation                               ---*/
5599 /*------------------------------------------------------------*/
5600 
mc_post_clo_init(void)5601 static void mc_post_clo_init ( void )
5602 {
5603    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
5604 
5605    if (MC_(clo_mc_level) == 3) {
5606       /* We're doing origin tracking. */
5607 #     ifdef PERF_FAST_STACK
5608       VG_(track_new_mem_stack_4_w_ECU)   ( mc_new_mem_stack_4_w_ECU   );
5609       VG_(track_new_mem_stack_8_w_ECU)   ( mc_new_mem_stack_8_w_ECU   );
5610       VG_(track_new_mem_stack_12_w_ECU)  ( mc_new_mem_stack_12_w_ECU  );
5611       VG_(track_new_mem_stack_16_w_ECU)  ( mc_new_mem_stack_16_w_ECU  );
5612       VG_(track_new_mem_stack_32_w_ECU)  ( mc_new_mem_stack_32_w_ECU  );
5613       VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
5614       VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
5615       VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
5616       VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
5617 #     endif
5618       VG_(track_new_mem_stack_w_ECU)     ( mc_new_mem_stack_w_ECU     );
5619    } else {
5620       /* Not doing origin tracking */
5621 #     ifdef PERF_FAST_STACK
5622       VG_(track_new_mem_stack_4)   ( mc_new_mem_stack_4   );
5623       VG_(track_new_mem_stack_8)   ( mc_new_mem_stack_8   );
5624       VG_(track_new_mem_stack_12)  ( mc_new_mem_stack_12  );
5625       VG_(track_new_mem_stack_16)  ( mc_new_mem_stack_16  );
5626       VG_(track_new_mem_stack_32)  ( mc_new_mem_stack_32  );
5627       VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
5628       VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
5629       VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
5630       VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
5631 #     endif
5632       VG_(track_new_mem_stack)     ( mc_new_mem_stack     );
5633    }
5634 
5635    /* This origin tracking cache is huge (~100M), so only initialise
5636       if we need it. */
5637    if (MC_(clo_mc_level) >= 3) {
5638       init_OCache();
5639       tl_assert(ocacheL1 != NULL);
5640       tl_assert(ocacheL2 != NULL);
5641    } else {
5642       tl_assert(ocacheL1 == NULL);
5643       tl_assert(ocacheL2 == NULL);
5644    }
5645 }
5646 
print_SM_info(char * type,int n_SMs)5647 static void print_SM_info(char* type, int n_SMs)
5648 {
5649    VG_(message)(Vg_DebugMsg,
5650       " memcheck: SMs: %s = %d (%ldk, %ldM)\n",
5651       type,
5652       n_SMs,
5653       n_SMs * sizeof(SecMap) / 1024UL,
5654       n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
5655 }
5656 
mc_fini(Int exitcode)5657 static void mc_fini ( Int exitcode )
5658 {
5659    MC_(print_malloc_stats)();
5660 
5661    if (MC_(clo_leak_check) != LC_Off) {
5662       MC_(detect_memory_leaks)(1/*bogus ThreadId*/, MC_(clo_leak_check));
5663    } else {
5664       if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
5665          VG_(umsg)(
5666             "For a detailed leak analysis, rerun with: --leak-check=full\n"
5667             "\n"
5668          );
5669       }
5670    }
5671 
5672    if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
5673       VG_(message)(Vg_UserMsg,
5674                    "For counts of detected and suppressed errors, rerun with: -v\n");
5675    }
5676 
5677    if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
5678        && MC_(clo_mc_level) == 2) {
5679       VG_(message)(Vg_UserMsg,
5680                    "Use --track-origins=yes to see where "
5681                    "uninitialised values come from\n");
5682    }
5683 
5684    done_prof_mem();
5685 
5686    if (VG_(clo_stats)) {
5687       SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
5688 
5689       VG_(message)(Vg_DebugMsg,
5690          " memcheck: sanity checks: %d cheap, %d expensive\n",
5691          n_sanity_cheap, n_sanity_expensive );
5692       VG_(message)(Vg_DebugMsg,
5693          " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n",
5694          n_auxmap_L2_nodes,
5695          n_auxmap_L2_nodes * 64,
5696          n_auxmap_L2_nodes / 16 );
5697       VG_(message)(Vg_DebugMsg,
5698          " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n",
5699          n_auxmap_L1_searches, n_auxmap_L1_cmps,
5700          (10ULL * n_auxmap_L1_cmps)
5701             / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
5702       );
5703       VG_(message)(Vg_DebugMsg,
5704          " memcheck: auxmaps_L2: %lld searches, %lld nodes\n",
5705          n_auxmap_L2_searches, n_auxmap_L2_nodes
5706       );
5707 
5708       print_SM_info("n_issued     ", n_issued_SMs);
5709       print_SM_info("n_deissued   ", n_deissued_SMs);
5710       print_SM_info("max_noaccess ", max_noaccess_SMs);
5711       print_SM_info("max_undefined", max_undefined_SMs);
5712       print_SM_info("max_defined  ", max_defined_SMs);
5713       print_SM_info("max_non_DSM  ", max_non_DSM_SMs);
5714 
5715       // Three DSMs, plus the non-DSM ones
5716       max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
5717       // The 3*sizeof(Word) bytes is the AVL node metadata size.
5718       // The 4*sizeof(Word) bytes is the malloc metadata size.
5719       // Hardwiring these sizes in sucks, but I don't see how else to do it.
5720       max_secVBit_szB = max_secVBit_nodes *
5721             (sizeof(SecVBitNode) + 3*sizeof(Word) + 4*sizeof(Word));
5722       max_shmem_szB   = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
5723 
5724       VG_(message)(Vg_DebugMsg,
5725          " memcheck: max sec V bit nodes:    %d (%ldk, %ldM)\n",
5726          max_secVBit_nodes, max_secVBit_szB / 1024,
5727                             max_secVBit_szB / (1024 * 1024));
5728       VG_(message)(Vg_DebugMsg,
5729          " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
5730          sec_vbits_new_nodes + sec_vbits_updates,
5731          sec_vbits_new_nodes, sec_vbits_updates );
5732       VG_(message)(Vg_DebugMsg,
5733          " memcheck: max shadow mem size:   %ldk, %ldM\n",
5734          max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
5735 
5736       if (MC_(clo_mc_level) >= 3) {
5737          VG_(message)(Vg_DebugMsg,
5738                       " ocacheL1: %'12lu refs   %'12lu misses (%'lu lossage)\n",
5739                       stats_ocacheL1_find,
5740                       stats_ocacheL1_misses,
5741                       stats_ocacheL1_lossage );
5742          VG_(message)(Vg_DebugMsg,
5743                       " ocacheL1: %'12lu at 0   %'12lu at 1\n",
5744                       stats_ocacheL1_find - stats_ocacheL1_misses
5745                          - stats_ocacheL1_found_at_1
5746                          - stats_ocacheL1_found_at_N,
5747                       stats_ocacheL1_found_at_1 );
5748          VG_(message)(Vg_DebugMsg,
5749                       " ocacheL1: %'12lu at 2+  %'12lu move-fwds\n",
5750                       stats_ocacheL1_found_at_N,
5751                       stats_ocacheL1_movefwds );
5752          VG_(message)(Vg_DebugMsg,
5753                       " ocacheL1: %'12lu sizeB  %'12u useful\n",
5754                       (UWord)sizeof(OCache),
5755                       4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
5756          VG_(message)(Vg_DebugMsg,
5757                       " ocacheL2: %'12lu refs   %'12lu misses\n",
5758                       stats__ocacheL2_refs,
5759                       stats__ocacheL2_misses );
5760          VG_(message)(Vg_DebugMsg,
5761                       " ocacheL2:    %'9lu max nodes %'9lu curr nodes\n",
5762                       stats__ocacheL2_n_nodes_max,
5763                       stats__ocacheL2_n_nodes );
5764          VG_(message)(Vg_DebugMsg,
5765                       " niacache: %'12lu refs   %'12lu misses\n",
5766                       stats__nia_cache_queries, stats__nia_cache_misses);
5767       } else {
5768          tl_assert(ocacheL1 == NULL);
5769          tl_assert(ocacheL2 == NULL);
5770       }
5771    }
5772 
5773    if (0) {
5774       VG_(message)(Vg_DebugMsg,
5775         "------ Valgrind's client block stats follow ---------------\n" );
5776       show_client_block_stats();
5777    }
5778 }
5779 
mc_pre_clo_init(void)5780 static void mc_pre_clo_init(void)
5781 {
5782    VG_(details_name)            ("Memcheck");
5783    VG_(details_version)         (NULL);
5784    VG_(details_description)     ("a memory error detector");
5785    VG_(details_copyright_author)(
5786       "Copyright (C) 2002-2010, and GNU GPL'd, by Julian Seward et al.");
5787    VG_(details_bug_reports_to)  (VG_BUGS_TO);
5788    VG_(details_avg_translation_sizeB) ( 556 );
5789 
5790    VG_(basic_tool_funcs)          (mc_post_clo_init,
5791                                    MC_(instrument),
5792                                    mc_fini);
5793 
5794    VG_(needs_final_IR_tidy_pass)  ( MC_(final_tidy) );
5795 
5796 
5797    VG_(needs_core_errors)         ();
5798    VG_(needs_tool_errors)         (MC_(eq_Error),
5799                                    MC_(before_pp_Error),
5800                                    MC_(pp_Error),
5801                                    True,/*show TIDs for errors*/
5802                                    MC_(update_Error_extra),
5803                                    MC_(is_recognised_suppression),
5804                                    MC_(read_extra_suppression_info),
5805                                    MC_(error_matches_suppression),
5806                                    MC_(get_error_name),
5807                                    MC_(get_extra_suppression_info));
5808    VG_(needs_libc_freeres)        ();
5809    VG_(needs_command_line_options)(mc_process_cmd_line_options,
5810                                    mc_print_usage,
5811                                    mc_print_debug_usage);
5812    VG_(needs_client_requests)     (mc_handle_client_request);
5813    VG_(needs_sanity_checks)       (mc_cheap_sanity_check,
5814                                    mc_expensive_sanity_check);
5815    VG_(needs_malloc_replacement)  (MC_(malloc),
5816                                    MC_(__builtin_new),
5817                                    MC_(__builtin_vec_new),
5818                                    MC_(memalign),
5819                                    MC_(calloc),
5820                                    MC_(free),
5821                                    MC_(__builtin_delete),
5822                                    MC_(__builtin_vec_delete),
5823                                    MC_(realloc),
5824                                    MC_(malloc_usable_size),
5825                                    MC_MALLOC_REDZONE_SZB );
5826 
5827    VG_(needs_xml_output)          ();
5828 
5829    VG_(track_new_mem_startup)     ( mc_new_mem_startup );
5830    VG_(track_new_mem_stack_signal)( make_mem_undefined_w_tid );
5831    // We assume that brk()/sbrk() does not initialise new memory.  Is this
5832    // accurate?  John Reiser says:
5833    //
5834    //   0) sbrk() can *decrease* process address space.  No zero fill is done
5835    //   for a decrease, not even the fragment on the high end of the last page
5836    //   that is beyond the new highest address.  For maximum safety and
5837    //   portability, then the bytes in the last page that reside above [the
5838    //   new] sbrk(0) should be considered to be uninitialized, but in practice
5839    //   it is exceedingly likely that they will retain their previous
5840    //   contents.
5841    //
5842    //   1) If an increase is large enough to require new whole pages, then
5843    //   those new whole pages (like all new pages) are zero-filled by the
5844    //   operating system.  So if sbrk(0) already is page aligned, then
5845    //   sbrk(PAGE_SIZE) *does* zero-fill the new memory.
5846    //
5847    //   2) Any increase that lies within an existing allocated page is not
5848    //   changed.  So if (x = sbrk(0)) is not page aligned, then
5849    //   sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
5850    //   existing contents, and an additional PAGE_SIZE bytes which are zeroed.
5851    //   ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
5852    //   of them come along for the ride because the operating system deals
5853    //   only in whole pages.  Again, for maximum safety and portability, then
5854    //   anything that lives above [the new] sbrk(0) should be considered
5855    //   uninitialized, but in practice will retain previous contents [zero in
5856    //   this case.]"
5857    //
5858    // In short:
5859    //
5860    //   A key property of sbrk/brk is that new whole pages that are supplied
5861    //   by the operating system *do* get initialized to zero.
5862    //
5863    // As for the portability of all this:
5864    //
5865    //   sbrk and brk are not POSIX.  However, any system that is a derivative
5866    //   of *nix has sbrk and brk because there are too many softwares (such as
5867    //   the Bourne shell) which rely on the traditional memory map (.text,
5868    //   .data+.bss, stack) and the existence of sbrk/brk.
5869    //
5870    // So we should arguably observe all this.  However:
5871    // - The current inaccuracy has caused maybe one complaint in seven years(?)
5872    // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
5873    //   doubt most programmers know the above information.
5874    // So I'm not terribly unhappy with marking it as undefined. --njn.
5875    //
5876    // [More:  I think most of what John said only applies to sbrk().  It seems
5877    // that brk() always deals in whole pages.  And since this event deals
5878    // directly with brk(), not with sbrk(), perhaps it would be reasonable to
5879    // just mark all memory it allocates as defined.]
5880    //
5881    VG_(track_new_mem_brk)         ( make_mem_undefined_w_tid );
5882 
5883    // Handling of mmap and mprotect isn't simple (well, it is simple,
5884    // but the justification isn't.)  See comments above, just prior to
5885    // mc_new_mem_mmap.
5886    VG_(track_new_mem_mmap)        ( mc_new_mem_mmap );
5887    VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
5888 
5889    VG_(track_copy_mem_remap)      ( MC_(copy_address_range_state) );
5890 
5891    VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
5892    VG_(track_die_mem_brk)         ( MC_(make_mem_noaccess) );
5893    VG_(track_die_mem_munmap)      ( MC_(make_mem_noaccess) );
5894 
5895    /* Defer the specification of the new_mem_stack functions to the
5896       post_clo_init function, since we need to first parse the command
5897       line before deciding which set to use. */
5898 
5899 #  ifdef PERF_FAST_STACK
5900    VG_(track_die_mem_stack_4)     ( mc_die_mem_stack_4   );
5901    VG_(track_die_mem_stack_8)     ( mc_die_mem_stack_8   );
5902    VG_(track_die_mem_stack_12)    ( mc_die_mem_stack_12  );
5903    VG_(track_die_mem_stack_16)    ( mc_die_mem_stack_16  );
5904    VG_(track_die_mem_stack_32)    ( mc_die_mem_stack_32  );
5905    VG_(track_die_mem_stack_112)   ( mc_die_mem_stack_112 );
5906    VG_(track_die_mem_stack_128)   ( mc_die_mem_stack_128 );
5907    VG_(track_die_mem_stack_144)   ( mc_die_mem_stack_144 );
5908    VG_(track_die_mem_stack_160)   ( mc_die_mem_stack_160 );
5909 #  endif
5910    VG_(track_die_mem_stack)       ( mc_die_mem_stack     );
5911 
5912    VG_(track_ban_mem_stack)       ( MC_(make_mem_noaccess) );
5913 
5914    VG_(track_pre_mem_read)        ( check_mem_is_defined );
5915    VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
5916    VG_(track_pre_mem_write)       ( check_mem_is_addressable );
5917    VG_(track_post_mem_write)      ( mc_post_mem_write );
5918 
5919    if (MC_(clo_mc_level) >= 2)
5920       VG_(track_pre_reg_read)     ( mc_pre_reg_read );
5921 
5922    VG_(track_post_reg_write)                  ( mc_post_reg_write );
5923    VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
5924 
5925    init_shadow_memory();
5926    MC_(malloc_list)  = VG_(HT_construct)( "MC_(malloc_list)" );
5927    MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
5928    init_prof_mem();
5929 
5930    tl_assert( mc_expensive_sanity_check() );
5931 
5932    // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
5933    tl_assert(sizeof(UWord) == sizeof(Addr));
5934    // Call me paranoid.  I don't care.
5935    tl_assert(sizeof(void*) == sizeof(Addr));
5936 
5937    // BYTES_PER_SEC_VBIT_NODE must be a power of two.
5938    tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
5939 
5940    /* This is small.  Always initialise it. */
5941    init_nia_to_ecu_cache();
5942 
5943    /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
5944       if we need to, since the command line args haven't been
5945       processed yet.  Hence defer it to mc_post_clo_init. */
5946    tl_assert(ocacheL1 == NULL);
5947    tl_assert(ocacheL2 == NULL);
5948 
5949    /* Check some important stuff.  See extensive comments above
5950       re UNALIGNED_OR_HIGH for background. */
5951 #  if VG_WORDSIZE == 4
5952    tl_assert(sizeof(void*) == 4);
5953    tl_assert(sizeof(Addr)  == 4);
5954    tl_assert(sizeof(UWord) == 4);
5955    tl_assert(sizeof(Word)  == 4);
5956    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
5957    tl_assert(MASK(1) == 0UL);
5958    tl_assert(MASK(2) == 1UL);
5959    tl_assert(MASK(4) == 3UL);
5960    tl_assert(MASK(8) == 7UL);
5961 #  else
5962    tl_assert(VG_WORDSIZE == 8);
5963    tl_assert(sizeof(void*) == 8);
5964    tl_assert(sizeof(Addr)  == 8);
5965    tl_assert(sizeof(UWord) == 8);
5966    tl_assert(sizeof(Word)  == 8);
5967    tl_assert(MAX_PRIMARY_ADDRESS == 0x3FFFFFFFFFULL);
5968    tl_assert(MASK(1) == 0xFFFFFFC000000000ULL);
5969    tl_assert(MASK(2) == 0xFFFFFFC000000001ULL);
5970    tl_assert(MASK(4) == 0xFFFFFFC000000003ULL);
5971    tl_assert(MASK(8) == 0xFFFFFFC000000007ULL);
5972 #  endif
5973 }
5974 
5975 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
5976 
5977 /*--------------------------------------------------------------------*/
5978 /*--- end                                                mc_main.c ---*/
5979 /*--------------------------------------------------------------------*/
5980