• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*--------------------------------------------------------------------*/
4 /*--- MemCheck: Maintain bitmaps of memory, tracking the           ---*/
5 /*--- accessibility (A) and validity (V) status of each byte.      ---*/
6 /*---                                                    mc_main.c ---*/
7 /*--------------------------------------------------------------------*/
8 
9 /*
10    This file is part of MemCheck, a heavyweight Valgrind tool for
11    detecting memory errors.
12 
13    Copyright (C) 2000-2017 Julian Seward
14       jseward@acm.org
15 
16    This program is free software; you can redistribute it and/or
17    modify it under the terms of the GNU General Public License as
18    published by the Free Software Foundation; either version 2 of the
19    License, or (at your option) any later version.
20 
21    This program is distributed in the hope that it will be useful, but
22    WITHOUT ANY WARRANTY; without even the implied warranty of
23    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24    General Public License for more details.
25 
26    You should have received a copy of the GNU General Public License
27    along with this program; if not, write to the Free Software
28    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29    02111-1307, USA.
30 
31    The GNU General Public License is contained in the file COPYING.
32 */
33 
34 #include "pub_tool_basics.h"
35 #include "pub_tool_aspacemgr.h"
36 #include "pub_tool_gdbserver.h"
37 #include "pub_tool_poolalloc.h"
38 #include "pub_tool_hashtable.h"     // For mc_include.h
39 #include "pub_tool_libcbase.h"
40 #include "pub_tool_libcassert.h"
41 #include "pub_tool_libcprint.h"
42 #include "pub_tool_machine.h"
43 #include "pub_tool_mallocfree.h"
44 #include "pub_tool_options.h"
45 #include "pub_tool_oset.h"
46 #include "pub_tool_rangemap.h"
47 #include "pub_tool_replacemalloc.h"
48 #include "pub_tool_tooliface.h"
49 #include "pub_tool_threadstate.h"
50 #include "pub_tool_xarray.h"
51 #include "pub_tool_xtree.h"
52 #include "pub_tool_xtmemory.h"
53 
54 #include "mc_include.h"
55 #include "memcheck.h"   /* for client requests */
56 
57 
58 /* Set to 1 to enable handwritten assembly helpers on targets for
59    which it is supported. */
60 #define ENABLE_ASSEMBLY_HELPERS 1
61 
62 /* Set to 1 to do a little more sanity checking */
63 #define VG_DEBUG_MEMORY 0
64 
65 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
66 
67 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
68 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
69 
70 
71 /*------------------------------------------------------------*/
72 /*--- Fast-case knobs                                      ---*/
73 /*------------------------------------------------------------*/
74 
75 // Comment these out to disable the fast cases (don't just set them to zero).
76 
77 #define PERF_FAST_LOADV    1
78 #define PERF_FAST_STOREV   1
79 
80 #define PERF_FAST_SARP     1
81 
82 #define PERF_FAST_STACK    1
83 #define PERF_FAST_STACK2   1
84 
85 /* Change this to 1 to enable assertions on origin tracking cache fast
86    paths */
87 #define OC_ENABLE_ASSERTIONS 0
88 
89 
90 /*------------------------------------------------------------*/
91 /*--- Comments on the origin tracking implementation       ---*/
92 /*------------------------------------------------------------*/
93 
94 /* See detailed comment entitled
95    AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
96    which is contained further on in this file. */
97 
98 
99 /*------------------------------------------------------------*/
100 /*--- V bits and A bits                                    ---*/
101 /*------------------------------------------------------------*/
102 
103 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
104    thinks the corresponding value bit is defined.  And every memory byte
105    has an A bit, which tracks whether Memcheck thinks the program can access
106    it safely (ie. it's mapped, and has at least one of the RWX permission bits
107    set).  So every N-bit register is shadowed with N V bits, and every memory
108    byte is shadowed with 8 V bits and one A bit.
109 
110    In the implementation, we use two forms of compression (compressed V bits
111    and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
112    for memory.
113 
114    Memcheck also tracks extra information about each heap block that is
115    allocated, for detecting memory leaks and other purposes.
116 */
117 
118 /*------------------------------------------------------------*/
119 /*--- Basic A/V bitmap representation.                     ---*/
120 /*------------------------------------------------------------*/
121 
122 /* All reads and writes are checked against a memory map (a.k.a. shadow
123    memory), which records the state of all memory in the process.
124 
125    On 32-bit machines the memory map is organised as follows.
126    The top 16 bits of an address are used to index into a top-level
127    map table, containing 65536 entries.  Each entry is a pointer to a
128    second-level map, which records the accesibililty and validity
129    permissions for the 65536 bytes indexed by the lower 16 bits of the
130    address.  Each byte is represented by two bits (details are below).  So
131    each second-level map contains 16384 bytes.  This two-level arrangement
132    conveniently divides the 4G address space into 64k lumps, each size 64k
133    bytes.
134 
135    All entries in the primary (top-level) map must point to a valid
136    secondary (second-level) map.  Since many of the 64kB chunks will
137    have the same status for every bit -- ie. noaccess (for unused
138    address space) or entirely addressable and defined (for code segments) --
139    there are three distinguished secondary maps, which indicate 'noaccess',
140    'undefined' and 'defined'.  For these uniform 64kB chunks, the primary
141    map entry points to the relevant distinguished map.  In practice,
142    typically more than half of the addressable memory is represented with
143    the 'undefined' or 'defined' distinguished secondary map, so it gives a
144    good saving.  It also lets us set the V+A bits of large address regions
145    quickly in set_address_range_perms().
146 
147    On 64-bit machines it's more complicated.  If we followed the same basic
148    scheme we'd have a four-level table which would require too many memory
149    accesses.  So instead the top-level map table has 2^20 entries (indexed
150    using bits 16..35 of the address);  this covers the bottom 64GB.  Any
151    accesses above 64GB are handled with a slow, sparse auxiliary table.
152    Valgrind's address space manager tries very hard to keep things below
153    this 64GB barrier so that performance doesn't suffer too much.
154 
155    Note that this file has a lot of different functions for reading and
156    writing shadow memory.  Only a couple are strictly necessary (eg.
157    get_vabits2 and set_vabits2), most are just specialised for specific
158    common cases to improve performance.
159 
160    Aside: the V+A bits are less precise than they could be -- we have no way
161    of marking memory as read-only.  It would be great if we could add an
162    extra state VA_BITSn_READONLY.  But then we'd have 5 different states,
163    which requires 2.3 bits to hold, and there's no way to do that elegantly
164    -- we'd have to double up to 4 bits of metadata per byte, which doesn't
165    seem worth it.
166 */
167 
168 /* --------------- Basic configuration --------------- */
169 
170 /* Only change this.  N_PRIMARY_MAP *must* be a power of 2. */
171 
172 #if VG_WORDSIZE == 4
173 
174 /* cover the entire address space */
175 #  define N_PRIMARY_BITS  16
176 
177 #else
178 
179 /* Just handle the first 128G fast and the rest via auxiliary
180    primaries.  If you change this, Memcheck will assert at startup.
181    See the definition of UNALIGNED_OR_HIGH for extensive comments. */
182 #  define N_PRIMARY_BITS  21
183 
184 #endif
185 
186 
187 /* Do not change this. */
188 #define N_PRIMARY_MAP  ( ((UWord)1) << N_PRIMARY_BITS)
189 
190 /* Do not change this. */
191 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
192 
193 
194 /* --------------- Secondary maps --------------- */
195 
196 // Each byte of memory conceptually has an A bit, which indicates its
197 // addressability, and 8 V bits, which indicates its definedness.
198 //
199 // But because very few bytes are partially defined, we can use a nice
200 // compression scheme to reduce the size of shadow memory.  Each byte of
201 // memory has 2 bits which indicates its state (ie. V+A bits):
202 //
203 //   00:  noaccess    (unaddressable but treated as fully defined)
204 //   01:  undefined   (addressable and fully undefined)
205 //   10:  defined     (addressable and fully defined)
206 //   11:  partdefined (addressable and partially defined)
207 //
208 // In the "partdefined" case, we use a secondary table to store the V bits.
209 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
210 // bits.
211 //
212 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
213 // four bytes (32 bits) of memory are in each chunk.  Hence the name
214 // "vabits8".  This lets us get the V+A bits for four bytes at a time
215 // easily (without having to do any shifting and/or masking), and that is a
216 // very common operation.  (Note that although each vabits8 chunk
217 // is 8 bits in size, it represents 32 bits of memory.)
218 //
219 // The representation is "inverse" little-endian... each 4 bytes of
220 // memory is represented by a 1 byte value, where:
221 //
222 // - the status of byte (a+0) is held in bits [1..0]
223 // - the status of byte (a+1) is held in bits [3..2]
224 // - the status of byte (a+2) is held in bits [5..4]
225 // - the status of byte (a+3) is held in bits [7..6]
226 //
227 // It's "inverse" because endianness normally describes a mapping from
228 // value bits to memory addresses;  in this case the mapping is inverted.
229 // Ie. instead of particular value bits being held in certain addresses, in
230 // this case certain addresses are represented by particular value bits.
231 // See insert_vabits2_into_vabits8() for an example.
232 //
233 // But note that we don't compress the V bits stored in registers;  they
234 // need to be explicit to made the shadow operations possible.  Therefore
235 // when moving values between registers and memory we need to convert
236 // between the expanded in-register format and the compressed in-memory
237 // format.  This isn't so difficult, it just requires careful attention in a
238 // few places.
239 
240 // These represent eight bits of memory.
241 #define VA_BITS2_NOACCESS     0x0      // 00b
242 #define VA_BITS2_UNDEFINED    0x1      // 01b
243 #define VA_BITS2_DEFINED      0x2      // 10b
244 #define VA_BITS2_PARTDEFINED  0x3      // 11b
245 
246 // These represent 16 bits of memory.
247 #define VA_BITS4_NOACCESS     0x0      // 00_00b
248 #define VA_BITS4_UNDEFINED    0x5      // 01_01b
249 #define VA_BITS4_DEFINED      0xa      // 10_10b
250 
251 // These represent 32 bits of memory.
252 #define VA_BITS8_NOACCESS     0x00     // 00_00_00_00b
253 #define VA_BITS8_UNDEFINED    0x55     // 01_01_01_01b
254 #define VA_BITS8_DEFINED      0xaa     // 10_10_10_10b
255 
256 // These represent 64 bits of memory.
257 #define VA_BITS16_NOACCESS    0x0000   // 00_00_00_00b x 2
258 #define VA_BITS16_UNDEFINED   0x5555   // 01_01_01_01b x 2
259 #define VA_BITS16_DEFINED     0xaaaa   // 10_10_10_10b x 2
260 
261 // These represent 128 bits of memory.
262 #define VA_BITS32_UNDEFINED   0x55555555  // 01_01_01_01b x 4
263 
264 
265 #define SM_CHUNKS             16384    // Each SM covers 64k of memory.
266 #define SM_OFF(aaa)           (((aaa) & 0xffff) >> 2)
267 #define SM_OFF_16(aaa)        (((aaa) & 0xffff) >> 3)
268 
269 // Paranoia:  it's critical for performance that the requested inlining
270 // occurs.  So try extra hard.
271 #define INLINE    inline __attribute__((always_inline))
272 
start_of_this_sm(Addr a)273 static INLINE Addr start_of_this_sm ( Addr a ) {
274    return (a & (~SM_MASK));
275 }
is_start_of_sm(Addr a)276 static INLINE Bool is_start_of_sm ( Addr a ) {
277    return (start_of_this_sm(a) == a);
278 }
279 
280 STATIC_ASSERT(SM_CHUNKS % 2 == 0);
281 
282 typedef
283    union {
284       UChar vabits8[SM_CHUNKS];
285       UShort vabits16[SM_CHUNKS/2];
286    }
287    SecMap;
288 
289 // 3 distinguished secondary maps, one for no-access, one for
290 // accessible but undefined, and one for accessible and defined.
291 // Distinguished secondaries may never be modified.
292 #define SM_DIST_NOACCESS   0
293 #define SM_DIST_UNDEFINED  1
294 #define SM_DIST_DEFINED    2
295 
296 static SecMap sm_distinguished[3];
297 
is_distinguished_sm(SecMap * sm)298 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
299    return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
300 }
301 
302 // Forward declaration
303 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
304 
305 /* dist_sm points to one of our three distinguished secondaries.  Make
306    a copy of it so that we can write to it.
307 */
copy_for_writing(SecMap * dist_sm)308 static SecMap* copy_for_writing ( SecMap* dist_sm )
309 {
310    SecMap* new_sm;
311    tl_assert(dist_sm == &sm_distinguished[0]
312           || dist_sm == &sm_distinguished[1]
313           || dist_sm == &sm_distinguished[2]);
314 
315    new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
316    if (new_sm == NULL)
317       VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
318                                    sizeof(SecMap) );
319    VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
320    update_SM_counts(dist_sm, new_sm);
321    return new_sm;
322 }
323 
324 /* --------------- Stats --------------- */
325 
326 static Int   n_issued_SMs      = 0;
327 static Int   n_deissued_SMs    = 0;
328 static Int   n_noaccess_SMs    = N_PRIMARY_MAP; // start with many noaccess DSMs
329 static Int   n_undefined_SMs   = 0;
330 static Int   n_defined_SMs     = 0;
331 static Int   n_non_DSM_SMs     = 0;
332 static Int   max_noaccess_SMs  = 0;
333 static Int   max_undefined_SMs = 0;
334 static Int   max_defined_SMs   = 0;
335 static Int   max_non_DSM_SMs   = 0;
336 
337 /* # searches initiated in auxmap_L1, and # base cmps required */
338 static ULong n_auxmap_L1_searches  = 0;
339 static ULong n_auxmap_L1_cmps      = 0;
340 /* # of searches that missed in auxmap_L1 and therefore had to
341    be handed to auxmap_L2. And the number of nodes inserted. */
342 static ULong n_auxmap_L2_searches  = 0;
343 static ULong n_auxmap_L2_nodes     = 0;
344 
345 static Int   n_sanity_cheap     = 0;
346 static Int   n_sanity_expensive = 0;
347 
348 static Int   n_secVBit_nodes   = 0;
349 static Int   max_secVBit_nodes = 0;
350 
update_SM_counts(SecMap * oldSM,SecMap * newSM)351 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
352 {
353    if      (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
354    else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
355    else if (oldSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  --;
356    else                                                  { n_non_DSM_SMs  --;
357                                                            n_deissued_SMs ++; }
358 
359    if      (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
360    else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
361    else if (newSM == &sm_distinguished[SM_DIST_DEFINED  ]) n_defined_SMs  ++;
362    else                                                  { n_non_DSM_SMs  ++;
363                                                            n_issued_SMs   ++; }
364 
365    if (n_noaccess_SMs  > max_noaccess_SMs ) max_noaccess_SMs  = n_noaccess_SMs;
366    if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
367    if (n_defined_SMs   > max_defined_SMs  ) max_defined_SMs   = n_defined_SMs;
368    if (n_non_DSM_SMs   > max_non_DSM_SMs  ) max_non_DSM_SMs   = n_non_DSM_SMs;
369 }
370 
371 /* --------------- Primary maps --------------- */
372 
373 /* The main primary map.  This covers some initial part of the address
374    space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
375    handled using the auxiliary primary map.
376 */
377 static SecMap* primary_map[N_PRIMARY_MAP];
378 
379 
380 /* An entry in the auxiliary primary map.  base must be a 64k-aligned
381    value, and sm points at the relevant secondary map.  As with the
382    main primary map, the secondary may be either a real secondary, or
383    one of the three distinguished secondaries.  DO NOT CHANGE THIS
384    LAYOUT: the first word has to be the key for OSet fast lookups.
385 */
386 typedef
387    struct {
388       Addr    base;
389       SecMap* sm;
390    }
391    AuxMapEnt;
392 
393 /* Tunable parameter: How big is the L1 queue? */
394 #define N_AUXMAP_L1 24
395 
396 /* Tunable parameter: How far along the L1 queue to insert
397    entries resulting from L2 lookups? */
398 #define AUXMAP_L1_INSERT_IX 12
399 
400 static struct {
401           Addr       base;
402           AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
403        }
404        auxmap_L1[N_AUXMAP_L1];
405 
406 static OSet* auxmap_L2 = NULL;
407 
init_auxmap_L1_L2(void)408 static void init_auxmap_L1_L2 ( void )
409 {
410    Int i;
411    for (i = 0; i < N_AUXMAP_L1; i++) {
412       auxmap_L1[i].base = 0;
413       auxmap_L1[i].ent  = NULL;
414    }
415 
416    tl_assert(0 == offsetof(AuxMapEnt,base));
417    tl_assert(sizeof(Addr) == sizeof(void*));
418    auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/  offsetof(AuxMapEnt,base),
419                                     /*fastCmp*/ NULL,
420                                     VG_(malloc), "mc.iaLL.1", VG_(free) );
421 }
422 
423 /* Check representation invariants; if OK return NULL; else a
424    descriptive bit of text.  Also return the number of
425    non-distinguished secondary maps referred to from the auxiliary
426    primary maps. */
427 
check_auxmap_L1_L2_sanity(Word * n_secmaps_found)428 static const HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
429 {
430    Word i, j;
431    /* On a 32-bit platform, the L2 and L1 tables should
432       both remain empty forever.
433 
434       On a 64-bit platform:
435       In the L2 table:
436        all .base & 0xFFFF == 0
437        all .base > MAX_PRIMARY_ADDRESS
438       In the L1 table:
439        all .base & 0xFFFF == 0
440        all (.base > MAX_PRIMARY_ADDRESS
441             .base & 0xFFFF == 0
442             and .ent points to an AuxMapEnt with the same .base)
443            or
444            (.base == 0 and .ent == NULL)
445    */
446    *n_secmaps_found = 0;
447    if (sizeof(void*) == 4) {
448       /* 32-bit platform */
449       if (VG_(OSetGen_Size)(auxmap_L2) != 0)
450          return "32-bit: auxmap_L2 is non-empty";
451       for (i = 0; i < N_AUXMAP_L1; i++)
452         if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
453       return "32-bit: auxmap_L1 is non-empty";
454    } else {
455       /* 64-bit platform */
456       UWord elems_seen = 0;
457       AuxMapEnt *elem, *res;
458       AuxMapEnt key;
459       /* L2 table */
460       VG_(OSetGen_ResetIter)(auxmap_L2);
461       while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
462          elems_seen++;
463          if (0 != (elem->base & (Addr)0xFFFF))
464             return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
465          if (elem->base <= MAX_PRIMARY_ADDRESS)
466             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
467          if (elem->sm == NULL)
468             return "64-bit: .sm in _L2 is NULL";
469          if (!is_distinguished_sm(elem->sm))
470             (*n_secmaps_found)++;
471       }
472       if (elems_seen != n_auxmap_L2_nodes)
473          return "64-bit: disagreement on number of elems in _L2";
474       /* Check L1-L2 correspondence */
475       for (i = 0; i < N_AUXMAP_L1; i++) {
476          if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
477             continue;
478          if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
479             return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
480          if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
481             return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
482          if (auxmap_L1[i].ent == NULL)
483             return "64-bit: .ent is NULL in auxmap_L1";
484          if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
485             return "64-bit: _L1 and _L2 bases are inconsistent";
486          /* Look it up in auxmap_L2. */
487          key.base = auxmap_L1[i].base;
488          key.sm   = 0;
489          res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
490          if (res == NULL)
491             return "64-bit: _L1 .base not found in _L2";
492          if (res != auxmap_L1[i].ent)
493             return "64-bit: _L1 .ent disagrees with _L2 entry";
494       }
495       /* Check L1 contains no duplicates */
496       for (i = 0; i < N_AUXMAP_L1; i++) {
497          if (auxmap_L1[i].base == 0)
498             continue;
499 	 for (j = i+1; j < N_AUXMAP_L1; j++) {
500             if (auxmap_L1[j].base == 0)
501                continue;
502             if (auxmap_L1[j].base == auxmap_L1[i].base)
503                return "64-bit: duplicate _L1 .base entries";
504          }
505       }
506    }
507    return NULL; /* ok */
508 }
509 
insert_into_auxmap_L1_at(Word rank,AuxMapEnt * ent)510 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
511 {
512    Word i;
513    tl_assert(ent);
514    tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
515    for (i = N_AUXMAP_L1-1; i > rank; i--)
516       auxmap_L1[i] = auxmap_L1[i-1];
517    auxmap_L1[rank].base = ent->base;
518    auxmap_L1[rank].ent  = ent;
519 }
520 
maybe_find_in_auxmap(Addr a)521 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
522 {
523    AuxMapEnt  key;
524    AuxMapEnt* res;
525    Word       i;
526 
527    tl_assert(a > MAX_PRIMARY_ADDRESS);
528    a &= ~(Addr)0xFFFF;
529 
530    /* First search the front-cache, which is a self-organising
531       list containing the most popular entries. */
532 
533    if (LIKELY(auxmap_L1[0].base == a))
534       return auxmap_L1[0].ent;
535    if (LIKELY(auxmap_L1[1].base == a)) {
536       Addr       t_base = auxmap_L1[0].base;
537       AuxMapEnt* t_ent  = auxmap_L1[0].ent;
538       auxmap_L1[0].base = auxmap_L1[1].base;
539       auxmap_L1[0].ent  = auxmap_L1[1].ent;
540       auxmap_L1[1].base = t_base;
541       auxmap_L1[1].ent  = t_ent;
542       return auxmap_L1[0].ent;
543    }
544 
545    n_auxmap_L1_searches++;
546 
547    for (i = 0; i < N_AUXMAP_L1; i++) {
548       if (auxmap_L1[i].base == a) {
549          break;
550       }
551    }
552    tl_assert(i >= 0 && i <= N_AUXMAP_L1);
553 
554    n_auxmap_L1_cmps += (ULong)(i+1);
555 
556    if (i < N_AUXMAP_L1) {
557       if (i > 0) {
558          Addr       t_base = auxmap_L1[i-1].base;
559          AuxMapEnt* t_ent  = auxmap_L1[i-1].ent;
560          auxmap_L1[i-1].base = auxmap_L1[i-0].base;
561          auxmap_L1[i-1].ent  = auxmap_L1[i-0].ent;
562          auxmap_L1[i-0].base = t_base;
563          auxmap_L1[i-0].ent  = t_ent;
564          i--;
565       }
566       return auxmap_L1[i].ent;
567    }
568 
569    n_auxmap_L2_searches++;
570 
571    /* First see if we already have it. */
572    key.base = a;
573    key.sm   = 0;
574 
575    res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
576    if (res)
577       insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
578    return res;
579 }
580 
find_or_alloc_in_auxmap(Addr a)581 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
582 {
583    AuxMapEnt *nyu, *res;
584 
585    /* First see if we already have it. */
586    res = maybe_find_in_auxmap( a );
587    if (LIKELY(res))
588       return res;
589 
590    /* Ok, there's no entry in the secondary map, so we'll have
591       to allocate one. */
592    a &= ~(Addr)0xFFFF;
593 
594    nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
595    nyu->base = a;
596    nyu->sm   = &sm_distinguished[SM_DIST_NOACCESS];
597    VG_(OSetGen_Insert)( auxmap_L2, nyu );
598    insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
599    n_auxmap_L2_nodes++;
600    return nyu;
601 }
602 
603 /* --------------- SecMap fundamentals --------------- */
604 
605 // In all these, 'low' means it's definitely in the main primary map,
606 // 'high' means it's definitely in the auxiliary table.
607 
get_primary_map_low_offset(Addr a)608 static INLINE UWord get_primary_map_low_offset ( Addr a )
609 {
610   UWord pm_off = a >> 16;
611   return pm_off;
612 }
613 
get_secmap_low_ptr(Addr a)614 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
615 {
616    UWord pm_off = a >> 16;
617 #  if VG_DEBUG_MEMORY >= 1
618    tl_assert(pm_off < N_PRIMARY_MAP);
619 #  endif
620    return &primary_map[ pm_off ];
621 }
622 
get_secmap_high_ptr(Addr a)623 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
624 {
625    AuxMapEnt* am = find_or_alloc_in_auxmap(a);
626    return &am->sm;
627 }
628 
get_secmap_ptr(Addr a)629 static INLINE SecMap** get_secmap_ptr ( Addr a )
630 {
631    return ( a <= MAX_PRIMARY_ADDRESS
632           ? get_secmap_low_ptr(a)
633           : get_secmap_high_ptr(a));
634 }
635 
get_secmap_for_reading_low(Addr a)636 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
637 {
638    return *get_secmap_low_ptr(a);
639 }
640 
get_secmap_for_reading_high(Addr a)641 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
642 {
643    return *get_secmap_high_ptr(a);
644 }
645 
get_secmap_for_writing_low(Addr a)646 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
647 {
648    SecMap** p = get_secmap_low_ptr(a);
649    if (UNLIKELY(is_distinguished_sm(*p)))
650       *p = copy_for_writing(*p);
651    return *p;
652 }
653 
get_secmap_for_writing_high(Addr a)654 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
655 {
656    SecMap** p = get_secmap_high_ptr(a);
657    if (UNLIKELY(is_distinguished_sm(*p)))
658       *p = copy_for_writing(*p);
659    return *p;
660 }
661 
662 /* Produce the secmap for 'a', either from the primary map or by
663    ensuring there is an entry for it in the aux primary map.  The
664    secmap may be a distinguished one as the caller will only want to
665    be able to read it.
666 */
get_secmap_for_reading(Addr a)667 static INLINE SecMap* get_secmap_for_reading ( Addr a )
668 {
669    return ( a <= MAX_PRIMARY_ADDRESS
670           ? get_secmap_for_reading_low (a)
671           : get_secmap_for_reading_high(a) );
672 }
673 
674 /* Produce the secmap for 'a', either from the primary map or by
675    ensuring there is an entry for it in the aux primary map.  The
676    secmap may not be a distinguished one, since the caller will want
677    to be able to write it.  If it is a distinguished secondary, make a
678    writable copy of it, install it, and return the copy instead.  (COW
679    semantics).
680 */
get_secmap_for_writing(Addr a)681 static INLINE SecMap* get_secmap_for_writing ( Addr a )
682 {
683    return ( a <= MAX_PRIMARY_ADDRESS
684           ? get_secmap_for_writing_low (a)
685           : get_secmap_for_writing_high(a) );
686 }
687 
688 /* If 'a' has a SecMap, produce it.  Else produce NULL.  But don't
689    allocate one if one doesn't already exist.  This is used by the
690    leak checker.
691 */
maybe_get_secmap_for(Addr a)692 static SecMap* maybe_get_secmap_for ( Addr a )
693 {
694    if (a <= MAX_PRIMARY_ADDRESS) {
695       return get_secmap_for_reading_low(a);
696    } else {
697       AuxMapEnt* am = maybe_find_in_auxmap(a);
698       return am ? am->sm : NULL;
699    }
700 }
701 
702 /* --------------- Fundamental functions --------------- */
703 
704 static INLINE
insert_vabits2_into_vabits8(Addr a,UChar vabits2,UChar * vabits8)705 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
706 {
707    UInt shift =  (a & 3)  << 1;        // shift by 0, 2, 4, or 6
708    *vabits8  &= ~(0x3     << shift);   // mask out the two old bits
709    *vabits8  |=  (vabits2 << shift);   // mask  in the two new bits
710 }
711 
712 static INLINE
insert_vabits4_into_vabits8(Addr a,UChar vabits4,UChar * vabits8)713 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
714 {
715    UInt shift;
716    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
717    shift     =  (a & 2)   << 1;        // shift by 0 or 4
718    *vabits8 &= ~(0xf      << shift);   // mask out the four old bits
719    *vabits8 |=  (vabits4 << shift);    // mask  in the four new bits
720 }
721 
722 static INLINE
extract_vabits2_from_vabits8(Addr a,UChar vabits8)723 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
724 {
725    UInt shift = (a & 3) << 1;          // shift by 0, 2, 4, or 6
726    vabits8 >>= shift;                  // shift the two bits to the bottom
727    return 0x3 & vabits8;               // mask out the rest
728 }
729 
730 static INLINE
extract_vabits4_from_vabits8(Addr a,UChar vabits8)731 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
732 {
733    UInt shift;
734    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
735    shift = (a & 2) << 1;               // shift by 0 or 4
736    vabits8 >>= shift;                  // shift the four bits to the bottom
737    return 0xf & vabits8;               // mask out the rest
738 }
739 
740 // Note that these four are only used in slow cases.  The fast cases do
741 // clever things like combine the auxmap check (in
742 // get_secmap_{read,writ}able) with alignment checks.
743 
744 // *** WARNING! ***
745 // Any time this function is called, if it is possible that vabits2
746 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
747 // sec-V-bits table must also be set!
748 static INLINE
set_vabits2(Addr a,UChar vabits2)749 void set_vabits2 ( Addr a, UChar vabits2 )
750 {
751    SecMap* sm       = get_secmap_for_writing(a);
752    UWord   sm_off   = SM_OFF(a);
753    insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
754 }
755 
756 static INLINE
get_vabits2(Addr a)757 UChar get_vabits2 ( Addr a )
758 {
759    SecMap* sm       = get_secmap_for_reading(a);
760    UWord   sm_off   = SM_OFF(a);
761    UChar   vabits8  = sm->vabits8[sm_off];
762    return extract_vabits2_from_vabits8(a, vabits8);
763 }
764 
765 // *** WARNING! ***
766 // Any time this function is called, if it is possible that any of the
767 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
768 // corresponding entry(s) in the sec-V-bits table must also be set!
769 static INLINE
get_vabits8_for_aligned_word32(Addr a)770 UChar get_vabits8_for_aligned_word32 ( Addr a )
771 {
772    SecMap* sm       = get_secmap_for_reading(a);
773    UWord   sm_off   = SM_OFF(a);
774    UChar   vabits8  = sm->vabits8[sm_off];
775    return vabits8;
776 }
777 
778 static INLINE
set_vabits8_for_aligned_word32(Addr a,UChar vabits8)779 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
780 {
781    SecMap* sm       = get_secmap_for_writing(a);
782    UWord   sm_off   = SM_OFF(a);
783    sm->vabits8[sm_off] = vabits8;
784 }
785 
786 
787 // Forward declarations
788 static UWord get_sec_vbits8(Addr a);
789 static void  set_sec_vbits8(Addr a, UWord vbits8);
790 
791 // Returns False if there was an addressability error.
792 static INLINE
set_vbits8(Addr a,UChar vbits8)793 Bool set_vbits8 ( Addr a, UChar vbits8 )
794 {
795    Bool  ok      = True;
796    UChar vabits2 = get_vabits2(a);
797    if ( VA_BITS2_NOACCESS != vabits2 ) {
798       // Addressable.  Convert in-register format to in-memory format.
799       // Also remove any existing sec V bit entry for the byte if no
800       // longer necessary.
801       if      ( V_BITS8_DEFINED   == vbits8 ) { vabits2 = VA_BITS2_DEFINED;   }
802       else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
803       else                                    { vabits2 = VA_BITS2_PARTDEFINED;
804                                                 set_sec_vbits8(a, vbits8);  }
805       set_vabits2(a, vabits2);
806 
807    } else {
808       // Unaddressable!  Do nothing -- when writing to unaddressable
809       // memory it acts as a black hole, and the V bits can never be seen
810       // again.  So we don't have to write them at all.
811       ok = False;
812    }
813    return ok;
814 }
815 
816 // Returns False if there was an addressability error.  In that case, we put
817 // all defined bits into vbits8.
818 static INLINE
get_vbits8(Addr a,UChar * vbits8)819 Bool get_vbits8 ( Addr a, UChar* vbits8 )
820 {
821    Bool  ok      = True;
822    UChar vabits2 = get_vabits2(a);
823 
824    // Convert the in-memory format to in-register format.
825    if      ( VA_BITS2_DEFINED   == vabits2 ) { *vbits8 = V_BITS8_DEFINED;   }
826    else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
827    else if ( VA_BITS2_NOACCESS  == vabits2 ) {
828       *vbits8 = V_BITS8_DEFINED;    // Make V bits defined!
829       ok = False;
830    } else {
831       tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
832       *vbits8 = get_sec_vbits8(a);
833    }
834    return ok;
835 }
836 
837 
838 /* --------------- Secondary V bit table ------------ */
839 
840 // This table holds the full V bit pattern for partially-defined bytes
841 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
842 // memory.
843 //
844 // Note: the nodes in this table can become stale.  Eg. if you write a PDB,
845 // then overwrite the same address with a fully defined byte, the sec-V-bit
846 // node will not necessarily be removed.  This is because checking for
847 // whether removal is necessary would slow down the fast paths.
848 //
849 // To avoid the stale nodes building up too much, we periodically (once the
850 // table reaches a certain size) garbage collect (GC) the table by
851 // traversing it and evicting any nodes not having PDB.
852 // If more than a certain proportion of nodes survived, we increase the
853 // table size so that GCs occur less often.
854 //
855 // This policy is designed to avoid bad table bloat in the worst case where
856 // a program creates huge numbers of stale PDBs -- we would get this bloat
857 // if we had no GC -- while handling well the case where a node becomes
858 // stale but shortly afterwards is rewritten with a PDB and so becomes
859 // non-stale again (which happens quite often, eg. in perf/bz2).  If we just
860 // remove all stale nodes as soon as possible, we just end up re-adding a
861 // lot of them in later again.  The "sufficiently stale" approach avoids
862 // this.  (If a program has many live PDBs, performance will just suck,
863 // there's no way around that.)
864 //
865 // Further comments, JRS 14 Feb 2012.  It turns out that the policy of
866 // holding on to stale entries for 2 GCs before discarding them can lead
867 // to massive space leaks.  So we're changing to an arrangement where
868 // lines are evicted as soon as they are observed to be stale during a
869 // GC.  This also has a side benefit of allowing the sufficiently_stale
870 // field to be removed from the SecVBitNode struct, reducing its size by
871 // 8 bytes, which is a substantial space saving considering that the
872 // struct was previously 32 or so bytes, on a 64 bit target.
873 //
874 // In order to try and mitigate the problem that the "sufficiently stale"
875 // heuristic was designed to avoid, the table size is allowed to drift
876 // up ("DRIFTUP") slowly to 80000, even if the residency is low.  This
877 // means that nodes will exist in the table longer on average, and hopefully
878 // will be deleted and re-added less frequently.
879 //
880 // The previous scaling up mechanism (now called STEPUP) is retained:
881 // if residency exceeds 50%, the table is scaled up, although by a
882 // factor sqrt(2) rather than 2 as before.  This effectively doubles the
883 // frequency of GCs when there are many PDBs at reduces the tendency of
884 // stale PDBs to reside for long periods in the table.
885 
886 static OSet* secVBitTable;
887 
888 // Stats
889 static ULong sec_vbits_new_nodes = 0;
890 static ULong sec_vbits_updates   = 0;
891 
892 // This must be a power of two;  this is checked in mc_pre_clo_init().
893 // The size chosen here is a trade-off:  if the nodes are bigger (ie. cover
894 // a larger address range) they take more space but we can get multiple
895 // partially-defined bytes in one if they are close to each other, reducing
896 // the number of total nodes.  In practice sometimes they are clustered (eg.
897 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
898 // row), but often not.  So we choose something intermediate.
899 #define BYTES_PER_SEC_VBIT_NODE     16
900 
901 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
902 // more than this many nodes survive a GC.
903 #define STEPUP_SURVIVOR_PROPORTION  0.5
904 #define STEPUP_GROWTH_FACTOR        1.414213562
905 
906 // If the above heuristic doesn't apply, then we may make the table
907 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
908 // this many nodes survive a GC, _and_ the total table size does
909 // not exceed a fixed limit.  The numbers are somewhat arbitrary, but
910 // work tolerably well on long Firefox runs.  The scaleup ratio of 1.5%
911 // effectively although gradually reduces residency and increases time
912 // between GCs for programs with small numbers of PDBs.  The 80000 limit
913 // effectively limits the table size to around 2MB for programs with
914 // small numbers of PDBs, whilst giving a reasonably long lifetime to
915 // entries, to try and reduce the costs resulting from deleting and
916 // re-adding of entries.
917 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
918 #define DRIFTUP_GROWTH_FACTOR       1.015
919 #define DRIFTUP_MAX_SIZE            80000
920 
921 // We GC the table when it gets this many nodes in it, ie. it's effectively
922 // the table size.  It can change.
923 static Int  secVBitLimit = 1000;
924 
925 // The number of GCs done, used to age sec-V-bit nodes for eviction.
926 // Because it's unsigned, wrapping doesn't matter -- the right answer will
927 // come out anyway.
928 static UInt GCs_done = 0;
929 
930 typedef
931    struct {
932       Addr  a;
933       UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
934    }
935    SecVBitNode;
936 
createSecVBitTable(void)937 static OSet* createSecVBitTable(void)
938 {
939    OSet* newSecVBitTable;
940    newSecVBitTable = VG_(OSetGen_Create_With_Pool)
941       ( offsetof(SecVBitNode, a),
942         NULL, // use fast comparisons
943         VG_(malloc), "mc.cSVT.1 (sec VBit table)",
944         VG_(free),
945         1000,
946         sizeof(SecVBitNode));
947    return newSecVBitTable;
948 }
949 
gcSecVBitTable(void)950 static void gcSecVBitTable(void)
951 {
952    OSet*        secVBitTable2;
953    SecVBitNode* n;
954    Int          i, n_nodes = 0, n_survivors = 0;
955 
956    GCs_done++;
957 
958    // Create the new table.
959    secVBitTable2 = createSecVBitTable();
960 
961    // Traverse the table, moving fresh nodes into the new table.
962    VG_(OSetGen_ResetIter)(secVBitTable);
963    while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
964       // Keep node if any of its bytes are non-stale.  Using
965       // get_vabits2() for the lookup is not very efficient, but I don't
966       // think it matters.
967       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
968          if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
969             // Found a non-stale byte, so keep =>
970             // Insert a copy of the node into the new table.
971             SecVBitNode* n2 =
972                VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
973             *n2 = *n;
974             VG_(OSetGen_Insert)(secVBitTable2, n2);
975             break;
976          }
977       }
978    }
979 
980    // Get the before and after sizes.
981    n_nodes     = VG_(OSetGen_Size)(secVBitTable);
982    n_survivors = VG_(OSetGen_Size)(secVBitTable2);
983 
984    // Destroy the old table, and put the new one in its place.
985    VG_(OSetGen_Destroy)(secVBitTable);
986    secVBitTable = secVBitTable2;
987 
988    if (VG_(clo_verbosity) > 1 && n_nodes != 0) {
989       VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%.1f%%)\n",
990                    n_nodes, n_survivors, n_survivors * 100.0 / n_nodes);
991    }
992 
993    // Increase table size if necessary.
994    if ((Double)n_survivors
995        > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
996       secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
997       if (VG_(clo_verbosity) > 1)
998          VG_(message)(Vg_DebugMsg,
999                       "memcheck GC: %d new table size (stepup)\n",
1000                       secVBitLimit);
1001    }
1002    else
1003    if (secVBitLimit < DRIFTUP_MAX_SIZE
1004        && (Double)n_survivors
1005           > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
1006       secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
1007       if (VG_(clo_verbosity) > 1)
1008          VG_(message)(Vg_DebugMsg,
1009                       "memcheck GC: %d new table size (driftup)\n",
1010                       secVBitLimit);
1011    }
1012 }
1013 
get_sec_vbits8(Addr a)1014 static UWord get_sec_vbits8(Addr a)
1015 {
1016    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1017    Int          amod     = a % BYTES_PER_SEC_VBIT_NODE;
1018    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1019    UChar        vbits8;
1020    tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
1021    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1022    // make it to the secondary V bits table.
1023    vbits8 = n->vbits8[amod];
1024    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1025    return vbits8;
1026 }
1027 
set_sec_vbits8(Addr a,UWord vbits8)1028 static void set_sec_vbits8(Addr a, UWord vbits8)
1029 {
1030    Addr         aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1031    Int          i, amod  = a % BYTES_PER_SEC_VBIT_NODE;
1032    SecVBitNode* n        = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1033    // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1034    // make it to the secondary V bits table.
1035    tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1036    if (n) {
1037       n->vbits8[amod] = vbits8;     // update
1038       sec_vbits_updates++;
1039    } else {
1040       // Do a table GC if necessary.  Nb: do this before creating and
1041       // inserting the new node, to avoid erroneously GC'ing the new node.
1042       if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1043          gcSecVBitTable();
1044       }
1045 
1046       // New node:  assign the specific byte, make the rest invalid (they
1047       // should never be read as-is, but be cautious).
1048       n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1049       n->a            = aAligned;
1050       for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1051          n->vbits8[i] = V_BITS8_UNDEFINED;
1052       }
1053       n->vbits8[amod] = vbits8;
1054 
1055       // Insert the new node.
1056       VG_(OSetGen_Insert)(secVBitTable, n);
1057       sec_vbits_new_nodes++;
1058 
1059       n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1060       if (n_secVBit_nodes > max_secVBit_nodes)
1061          max_secVBit_nodes = n_secVBit_nodes;
1062    }
1063 }
1064 
1065 /* --------------- Endianness helpers --------------- */
1066 
1067 /* Returns the offset in memory of the byteno-th most significant byte
1068    in a wordszB-sized word, given the specified endianness. */
byte_offset_w(UWord wordszB,Bool bigendian,UWord byteno)1069 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1070                                     UWord byteno ) {
1071    return bigendian ? (wordszB-1-byteno) : byteno;
1072 }
1073 
1074 
1075 /* --------------- Ignored address ranges --------------- */
1076 
1077 /* Denotes the address-error-reportability status for address ranges:
1078    IAR_NotIgnored:  the usual case -- report errors in this range
1079    IAR_CommandLine: don't report errors -- from command line setting
1080    IAR_ClientReq:   don't report errors -- from client request
1081 */
1082 typedef
1083    enum { IAR_INVALID=99,
1084           IAR_NotIgnored,
1085           IAR_CommandLine,
1086           IAR_ClientReq }
1087    IARKind;
1088 
showIARKind(IARKind iark)1089 static const HChar* showIARKind ( IARKind iark )
1090 {
1091    switch (iark) {
1092       case IAR_INVALID:     return "INVALID";
1093       case IAR_NotIgnored:  return "NotIgnored";
1094       case IAR_CommandLine: return "CommandLine";
1095       case IAR_ClientReq:   return "ClientReq";
1096       default:              return "???";
1097    }
1098 }
1099 
1100 // RangeMap<IARKind>
1101 static RangeMap* gIgnoredAddressRanges = NULL;
1102 
init_gIgnoredAddressRanges(void)1103 static void init_gIgnoredAddressRanges ( void )
1104 {
1105    if (LIKELY(gIgnoredAddressRanges != NULL))
1106       return;
1107    gIgnoredAddressRanges = VG_(newRangeMap)( VG_(malloc), "mc.igIAR.1",
1108                                              VG_(free), IAR_NotIgnored );
1109 }
1110 
MC_(in_ignored_range)1111 Bool MC_(in_ignored_range) ( Addr a )
1112 {
1113    if (LIKELY(gIgnoredAddressRanges == NULL))
1114       return False;
1115    UWord how     = IAR_INVALID;
1116    UWord key_min = ~(UWord)0;
1117    UWord key_max =  (UWord)0;
1118    VG_(lookupRangeMap)(&key_min, &key_max, &how, gIgnoredAddressRanges, a);
1119    tl_assert(key_min <= a && a <= key_max);
1120    switch (how) {
1121       case IAR_NotIgnored:  return False;
1122       case IAR_CommandLine: return True;
1123       case IAR_ClientReq:   return True;
1124       default: break; /* invalid */
1125    }
1126    VG_(tool_panic)("MC_(in_ignore_range)");
1127    /*NOTREACHED*/
1128 }
1129 
MC_(in_ignored_range_below_sp)1130 Bool MC_(in_ignored_range_below_sp) ( Addr sp, Addr a, UInt szB )
1131 {
1132    if (LIKELY(!MC_(clo_ignore_range_below_sp)))
1133        return False;
1134    tl_assert(szB >= 1 && szB <= 32);
1135    tl_assert(MC_(clo_ignore_range_below_sp__first_offset)
1136              > MC_(clo_ignore_range_below_sp__last_offset));
1137    Addr range_lo = sp - MC_(clo_ignore_range_below_sp__first_offset);
1138    Addr range_hi = sp - MC_(clo_ignore_range_below_sp__last_offset);
1139    if (range_lo >= range_hi) {
1140       /* Bizarre.  We have a wraparound situation.  What should we do? */
1141       return False; // Play safe
1142    } else {
1143       /* This is the expected case. */
1144       if (range_lo <= a && a + szB - 1 <= range_hi)
1145          return True;
1146       else
1147          return False;
1148    }
1149    /*NOTREACHED*/
1150    tl_assert(0);
1151 }
1152 
1153 /* Parse two Addrs (in hex) separated by a dash, or fail. */
1154 
parse_Addr_pair(const HChar ** ppc,Addr * result1,Addr * result2)1155 static Bool parse_Addr_pair ( const HChar** ppc, Addr* result1, Addr* result2 )
1156 {
1157    Bool ok = VG_(parse_Addr) (ppc, result1);
1158    if (!ok)
1159       return False;
1160    if (**ppc != '-')
1161       return False;
1162    (*ppc)++;
1163    ok = VG_(parse_Addr) (ppc, result2);
1164    if (!ok)
1165       return False;
1166    return True;
1167 }
1168 
1169 /* Parse two UInts (32 bit unsigned, in decimal) separated by a dash,
1170    or fail. */
1171 
parse_UInt_pair(const HChar ** ppc,UInt * result1,UInt * result2)1172 static Bool parse_UInt_pair ( const HChar** ppc, UInt* result1, UInt* result2 )
1173 {
1174    Bool ok = VG_(parse_UInt) (ppc, result1);
1175    if (!ok)
1176       return False;
1177    if (**ppc != '-')
1178       return False;
1179    (*ppc)++;
1180    ok = VG_(parse_UInt) (ppc, result2);
1181    if (!ok)
1182       return False;
1183    return True;
1184 }
1185 
1186 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1187    fail.  If they are valid, add them to the global set of ignored
1188    ranges. */
parse_ignore_ranges(const HChar * str0)1189 static Bool parse_ignore_ranges ( const HChar* str0 )
1190 {
1191    init_gIgnoredAddressRanges();
1192    const HChar*  str = str0;
1193    const HChar** ppc = &str;
1194    while (1) {
1195       Addr start = ~(Addr)0;
1196       Addr end   = (Addr)0;
1197       Bool ok    = parse_Addr_pair(ppc, &start, &end);
1198       if (!ok)
1199          return False;
1200       if (start > end)
1201          return False;
1202       VG_(bindRangeMap)( gIgnoredAddressRanges, start, end, IAR_CommandLine );
1203       if (**ppc == 0)
1204          return True;
1205       if (**ppc != ',')
1206          return False;
1207       (*ppc)++;
1208    }
1209    /*NOTREACHED*/
1210    return False;
1211 }
1212 
1213 /* Add or remove [start, +len) from the set of ignored ranges. */
modify_ignore_ranges(Bool addRange,Addr start,Addr len)1214 static Bool modify_ignore_ranges ( Bool addRange, Addr start, Addr len )
1215 {
1216    init_gIgnoredAddressRanges();
1217    const Bool verbose = (VG_(clo_verbosity) > 1);
1218    if (len == 0) {
1219       return False;
1220    }
1221    if (addRange) {
1222       VG_(bindRangeMap)(gIgnoredAddressRanges,
1223                         start, start+len-1, IAR_ClientReq);
1224       if (verbose)
1225          VG_(dmsg)("memcheck: modify_ignore_ranges: add %p %p\n",
1226                    (void*)start, (void*)(start+len-1));
1227    } else {
1228       VG_(bindRangeMap)(gIgnoredAddressRanges,
1229                         start, start+len-1, IAR_NotIgnored);
1230       if (verbose)
1231          VG_(dmsg)("memcheck: modify_ignore_ranges: del %p %p\n",
1232                    (void*)start, (void*)(start+len-1));
1233    }
1234    if (verbose) {
1235       VG_(dmsg)("memcheck:   now have %u ranges:\n",
1236                 VG_(sizeRangeMap)(gIgnoredAddressRanges));
1237       UInt i;
1238       for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
1239          UWord val     = IAR_INVALID;
1240          UWord key_min = ~(UWord)0;
1241          UWord key_max = (UWord)0;
1242          VG_(indexRangeMap)( &key_min, &key_max, &val,
1243                              gIgnoredAddressRanges, i );
1244          VG_(dmsg)("memcheck:      [%u]  %016lx-%016lx  %s\n",
1245                    i, key_min, key_max, showIARKind(val));
1246       }
1247    }
1248    return True;
1249 }
1250 
1251 
1252 /* --------------- Load/store slow cases. --------------- */
1253 
1254 static
1255 __attribute__((noinline))
mc_LOADV_128_or_256_slow(ULong * res,Addr a,SizeT nBits,Bool bigendian)1256 void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
1257                                 Addr a, SizeT nBits, Bool bigendian )
1258 {
1259    ULong  pessim[4];     /* only used when p-l-ok=yes */
1260    SSizeT szB            = nBits / 8;
1261    SSizeT szL            = szB / 8;  /* Size in Longs (64-bit units) */
1262    SSizeT i, j;          /* Must be signed. */
1263    SizeT  n_addrs_bad = 0;
1264    Addr   ai;
1265    UChar  vbits8;
1266    Bool   ok;
1267 
1268    /* Code below assumes load size is a power of two and at least 64
1269       bits. */
1270    tl_assert((szB & (szB-1)) == 0 && szL > 0);
1271 
1272    /* If this triggers, you probably just need to increase the size of
1273       the pessim array. */
1274    tl_assert(szL <= sizeof(pessim) / sizeof(pessim[0]));
1275 
1276    for (j = 0; j < szL; j++) {
1277       pessim[j] = V_BITS64_DEFINED;
1278       res[j] = V_BITS64_UNDEFINED;
1279    }
1280 
1281    /* Make up a result V word, which contains the loaded data for
1282       valid addresses and Defined for invalid addresses.  Iterate over
1283       the bytes in the word, from the most significant down to the
1284       least.  The vbits to return are calculated into vbits128.  Also
1285       compute the pessimising value to be used when
1286       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
1287       info can be gleaned from the pessim array) but is used as a
1288       cross-check. */
1289    for (j = szL-1; j >= 0; j--) {
1290       ULong vbits64    = V_BITS64_UNDEFINED;
1291       ULong pessim64   = V_BITS64_DEFINED;
1292       UWord long_index = byte_offset_w(szL, bigendian, j);
1293       for (i = 8-1; i >= 0; i--) {
1294          PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW_LOOP);
1295          ai = a + 8*long_index + byte_offset_w(8, bigendian, i);
1296          ok = get_vbits8(ai, &vbits8);
1297          vbits64 <<= 8;
1298          vbits64 |= vbits8;
1299          if (!ok) n_addrs_bad++;
1300          pessim64 <<= 8;
1301          pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1302       }
1303       res[long_index] = vbits64;
1304       pessim[long_index] = pessim64;
1305    }
1306 
1307    /* In the common case, all the addresses involved are valid, so we
1308       just return the computed V bits and have done. */
1309    if (LIKELY(n_addrs_bad == 0))
1310       return;
1311 
1312    /* If there's no possibility of getting a partial-loads-ok
1313       exemption, report the error and quit. */
1314    if (!MC_(clo_partial_loads_ok)) {
1315       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1316       return;
1317    }
1318 
1319    /* The partial-loads-ok excemption might apply.  Find out if it
1320       does.  If so, don't report an addressing error, but do return
1321       Undefined for the bytes that are out of range, so as to avoid
1322       false negatives.  If it doesn't apply, just report an addressing
1323       error in the usual way. */
1324 
1325    /* Some code steps along byte strings in aligned chunks
1326       even when there is only a partially defined word at the end (eg,
1327       optimised strlen).  This is allowed by the memory model of
1328       modern machines, since an aligned load cannot span two pages and
1329       thus cannot "partially fault".
1330 
1331       Therefore, a load from a partially-addressible place is allowed
1332       if all of the following hold:
1333       - the command-line flag is set [by default, it isn't]
1334       - it's an aligned load
1335       - at least one of the addresses in the word *is* valid
1336 
1337       Since this suppresses the addressing error, we avoid false
1338       negatives by marking bytes undefined when they come from an
1339       invalid address.
1340    */
1341 
1342    /* "at least one of the addresses is invalid" */
1343    ok = False;
1344    for (j = 0; j < szL; j++)
1345       ok |= pessim[j] != V_BITS64_DEFINED;
1346    tl_assert(ok);
1347 
1348    if (0 == (a & (szB - 1)) && n_addrs_bad < szB) {
1349       /* Exemption applies.  Use the previously computed pessimising
1350          value and return the combined result, but don't flag an
1351          addressing error.  The pessimising value is Defined for valid
1352          addresses and Undefined for invalid addresses. */
1353       /* for assumption that doing bitwise or implements UifU */
1354       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1355       /* (really need "UifU" here...)
1356          vbits[j] UifU= pessim[j]  (is pessimised by it, iow) */
1357       for (j = szL-1; j >= 0; j--)
1358          res[j] |= pessim[j];
1359       return;
1360    }
1361 
1362    /* Exemption doesn't apply.  Flag an addressing error in the normal
1363       way. */
1364    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1365 }
1366 
1367 
1368 static
1369 __attribute__((noinline))
1370 __attribute__((used))
1371 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
1372                  this function may get called from hand written assembly. */
mc_LOADVn_slow(Addr a,SizeT nBits,Bool bigendian)1373 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1374 {
1375    PROF_EVENT(MCPE_LOADVN_SLOW);
1376 
1377    /* ------------ BEGIN semi-fast cases ------------ */
1378    /* These deal quickly-ish with the common auxiliary primary map
1379       cases on 64-bit platforms.  Are merely a speedup hack; can be
1380       omitted without loss of correctness/functionality.  Note that in
1381       both cases the "sizeof(void*) == 8" causes these cases to be
1382       folded out by compilers on 32-bit platforms.  These are derived
1383       from LOADV64 and LOADV32.
1384    */
1385    if (LIKELY(sizeof(void*) == 8
1386                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1387       SecMap* sm       = get_secmap_for_reading(a);
1388       UWord   sm_off16 = SM_OFF_16(a);
1389       UWord   vabits16 = sm->vabits16[sm_off16];
1390       if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1391          return V_BITS64_DEFINED;
1392       if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1393          return V_BITS64_UNDEFINED;
1394       /* else fall into the slow case */
1395    }
1396    if (LIKELY(sizeof(void*) == 8
1397                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1398       SecMap* sm = get_secmap_for_reading(a);
1399       UWord sm_off = SM_OFF(a);
1400       UWord vabits8 = sm->vabits8[sm_off];
1401       if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1402          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1403       if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1404          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1405       /* else fall into slow case */
1406    }
1407    /* ------------ END semi-fast cases ------------ */
1408 
1409    ULong  vbits64     = V_BITS64_UNDEFINED; /* result */
1410    ULong  pessim64    = V_BITS64_DEFINED;   /* only used when p-l-ok=yes */
1411    SSizeT szB         = nBits / 8;
1412    SSizeT i;          /* Must be signed. */
1413    SizeT  n_addrs_bad = 0;
1414    Addr   ai;
1415    UChar  vbits8;
1416    Bool   ok;
1417 
1418    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1419 
1420    /* Make up a 64-bit result V word, which contains the loaded data
1421       for valid addresses and Defined for invalid addresses.  Iterate
1422       over the bytes in the word, from the most significant down to
1423       the least.  The vbits to return are calculated into vbits64.
1424       Also compute the pessimising value to be used when
1425       --partial-loads-ok=yes.  n_addrs_bad is redundant (the relevant
1426       info can be gleaned from pessim64) but is used as a
1427       cross-check. */
1428    for (i = szB-1; i >= 0; i--) {
1429       PROF_EVENT(MCPE_LOADVN_SLOW_LOOP);
1430       ai = a + byte_offset_w(szB, bigendian, i);
1431       ok = get_vbits8(ai, &vbits8);
1432       vbits64 <<= 8;
1433       vbits64 |= vbits8;
1434       if (!ok) n_addrs_bad++;
1435       pessim64 <<= 8;
1436       pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1437    }
1438 
1439    /* In the common case, all the addresses involved are valid, so we
1440       just return the computed V bits and have done. */
1441    if (LIKELY(n_addrs_bad == 0))
1442       return vbits64;
1443 
1444    /* If there's no possibility of getting a partial-loads-ok
1445       exemption, report the error and quit. */
1446    if (!MC_(clo_partial_loads_ok)) {
1447       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1448       return vbits64;
1449    }
1450 
1451    /* The partial-loads-ok excemption might apply.  Find out if it
1452       does.  If so, don't report an addressing error, but do return
1453       Undefined for the bytes that are out of range, so as to avoid
1454       false negatives.  If it doesn't apply, just report an addressing
1455       error in the usual way. */
1456 
1457    /* Some code steps along byte strings in aligned word-sized chunks
1458       even when there is only a partially defined word at the end (eg,
1459       optimised strlen).  This is allowed by the memory model of
1460       modern machines, since an aligned load cannot span two pages and
1461       thus cannot "partially fault".  Despite such behaviour being
1462       declared undefined by ANSI C/C++.
1463 
1464       Therefore, a load from a partially-addressible place is allowed
1465       if all of the following hold:
1466       - the command-line flag is set [by default, it isn't]
1467       - it's a word-sized, word-aligned load
1468       - at least one of the addresses in the word *is* valid
1469 
1470       Since this suppresses the addressing error, we avoid false
1471       negatives by marking bytes undefined when they come from an
1472       invalid address.
1473    */
1474 
1475    /* "at least one of the addresses is invalid" */
1476    tl_assert(pessim64 != V_BITS64_DEFINED);
1477 
1478    if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
1479        && n_addrs_bad < VG_WORDSIZE) {
1480       /* Exemption applies.  Use the previously computed pessimising
1481          value for vbits64 and return the combined result, but don't
1482          flag an addressing error.  The pessimising value is Defined
1483          for valid addresses and Undefined for invalid addresses. */
1484       /* for assumption that doing bitwise or implements UifU */
1485       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1486       /* (really need "UifU" here...)
1487          vbits64 UifU= pessim64  (is pessimised by it, iow) */
1488       vbits64 |= pessim64;
1489       return vbits64;
1490    }
1491 
1492    /* Also, in appears that gcc generates string-stepping code in
1493       32-bit chunks on 64 bit platforms.  So, also grant an exception
1494       for this case.  Note that the first clause of the conditional
1495       (VG_WORDSIZE == 8) is known at compile time, so the whole clause
1496       will get folded out in 32 bit builds. */
1497    if (VG_WORDSIZE == 8
1498        && VG_IS_4_ALIGNED(a) && nBits == 32 && n_addrs_bad < 4) {
1499       tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1500       /* (really need "UifU" here...)
1501          vbits64 UifU= pessim64  (is pessimised by it, iow) */
1502       vbits64 |= pessim64;
1503       /* Mark the upper 32 bits as undefined, just to be on the safe
1504          side. */
1505       vbits64 |= (((ULong)V_BITS32_UNDEFINED) << 32);
1506       return vbits64;
1507    }
1508 
1509    /* Exemption doesn't apply.  Flag an addressing error in the normal
1510       way. */
1511    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1512 
1513    return vbits64;
1514 }
1515 
1516 
1517 static
1518 __attribute__((noinline))
mc_STOREVn_slow(Addr a,SizeT nBits,ULong vbytes,Bool bigendian)1519 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1520 {
1521    SizeT szB = nBits / 8;
1522    SizeT i, n_addrs_bad = 0;
1523    UChar vbits8;
1524    Addr  ai;
1525    Bool  ok;
1526 
1527    PROF_EVENT(MCPE_STOREVN_SLOW);
1528 
1529    /* ------------ BEGIN semi-fast cases ------------ */
1530    /* These deal quickly-ish with the common auxiliary primary map
1531       cases on 64-bit platforms.  Are merely a speedup hack; can be
1532       omitted without loss of correctness/functionality.  Note that in
1533       both cases the "sizeof(void*) == 8" causes these cases to be
1534       folded out by compilers on 32-bit platforms.  The logic below
1535       is somewhat similar to some cases extensively commented in
1536       MC_(helperc_STOREV8).
1537    */
1538    if (LIKELY(sizeof(void*) == 8
1539                       && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1540       SecMap* sm       = get_secmap_for_reading(a);
1541       UWord   sm_off16 = SM_OFF_16(a);
1542       UWord   vabits16 = sm->vabits16[sm_off16];
1543       if (LIKELY( !is_distinguished_sm(sm) &&
1544                           (VA_BITS16_DEFINED   == vabits16 ||
1545                            VA_BITS16_UNDEFINED == vabits16) )) {
1546          /* Handle common case quickly: a is suitably aligned, */
1547          /* is mapped, and is addressible. */
1548          // Convert full V-bits in register to compact 2-bit form.
1549          if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1550             sm->vabits16[sm_off16] = VA_BITS16_DEFINED;
1551             return;
1552          } else if (V_BITS64_UNDEFINED == vbytes) {
1553             sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
1554             return;
1555          }
1556          /* else fall into the slow case */
1557       }
1558       /* else fall into the slow case */
1559    }
1560    if (LIKELY(sizeof(void*) == 8
1561                       && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1562       SecMap* sm      = get_secmap_for_reading(a);
1563       UWord   sm_off  = SM_OFF(a);
1564       UWord   vabits8 = sm->vabits8[sm_off];
1565       if (LIKELY( !is_distinguished_sm(sm) &&
1566                           (VA_BITS8_DEFINED   == vabits8 ||
1567                            VA_BITS8_UNDEFINED == vabits8) )) {
1568          /* Handle common case quickly: a is suitably aligned, */
1569          /* is mapped, and is addressible. */
1570          // Convert full V-bits in register to compact 2-bit form.
1571          if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1572             sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1573             return;
1574          } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1575             sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1576             return;
1577          }
1578          /* else fall into the slow case */
1579       }
1580       /* else fall into the slow case */
1581    }
1582    /* ------------ END semi-fast cases ------------ */
1583 
1584    tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1585 
1586    /* Dump vbytes in memory, iterating from least to most significant
1587       byte.  At the same time establish addressibility of the location. */
1588    for (i = 0; i < szB; i++) {
1589       PROF_EVENT(MCPE_STOREVN_SLOW_LOOP);
1590       ai     = a + byte_offset_w(szB, bigendian, i);
1591       vbits8 = vbytes & 0xff;
1592       ok     = set_vbits8(ai, vbits8);
1593       if (!ok) n_addrs_bad++;
1594       vbytes >>= 8;
1595    }
1596 
1597    /* If an address error has happened, report it. */
1598    if (n_addrs_bad > 0)
1599       MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1600 }
1601 
1602 
1603 /*------------------------------------------------------------*/
1604 /*--- Setting permissions over address ranges.             ---*/
1605 /*------------------------------------------------------------*/
1606 
set_address_range_perms(Addr a,SizeT lenT,UWord vabits16,UWord dsm_num)1607 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1608                                       UWord dsm_num )
1609 {
1610    UWord    sm_off, sm_off16;
1611    UWord    vabits2 = vabits16 & 0x3;
1612    SizeT    lenA, lenB, len_to_next_secmap;
1613    Addr     aNext;
1614    SecMap*  sm;
1615    SecMap** sm_ptr;
1616    SecMap*  example_dsm;
1617 
1618    PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS);
1619 
1620    /* Check the V+A bits make sense. */
1621    tl_assert(VA_BITS16_NOACCESS  == vabits16 ||
1622              VA_BITS16_UNDEFINED == vabits16 ||
1623              VA_BITS16_DEFINED   == vabits16);
1624 
1625    // This code should never write PDBs;  ensure this.  (See comment above
1626    // set_vabits2().)
1627    tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1628 
1629    if (lenT == 0)
1630       return;
1631 
1632    if (lenT > 256 * 1024 * 1024) {
1633       if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1634          const HChar* s = "unknown???";
1635          if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1636          if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1637          if (vabits16 == VA_BITS16_DEFINED  ) s = "defined";
1638          VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1639                                   "large range [0x%lx, 0x%lx) (%s)\n",
1640                                   a, a + lenT, s);
1641       }
1642    }
1643 
1644 #ifndef PERF_FAST_SARP
1645    /*------------------ debug-only case ------------------ */
1646    {
1647       // Endianness doesn't matter here because all bytes are being set to
1648       // the same value.
1649       // Nb: We don't have to worry about updating the sec-V-bits table
1650       // after these set_vabits2() calls because this code never writes
1651       // VA_BITS2_PARTDEFINED values.
1652       SizeT i;
1653       for (i = 0; i < lenT; i++) {
1654          set_vabits2(a + i, vabits2);
1655       }
1656       return;
1657    }
1658 #endif
1659 
1660    /*------------------ standard handling ------------------ */
1661 
1662    /* Get the distinguished secondary that we might want
1663       to use (part of the space-compression scheme). */
1664    example_dsm = &sm_distinguished[dsm_num];
1665 
1666    // We have to handle ranges covering various combinations of partial and
1667    // whole sec-maps.  Here is how parts 1, 2 and 3 are used in each case.
1668    // Cases marked with a '*' are common.
1669    //
1670    //   TYPE                                             PARTS USED
1671    //   ----                                             ----------
1672    // * one partial sec-map                  (p)         1
1673    // - one whole sec-map                    (P)         2
1674    //
1675    // * two partial sec-maps                 (pp)        1,3
1676    // - one partial, one whole sec-map       (pP)        1,2
1677    // - one whole, one partial sec-map       (Pp)        2,3
1678    // - two whole sec-maps                   (PP)        2,2
1679    //
1680    // * one partial, one whole, one partial  (pPp)       1,2,3
1681    // - one partial, two whole               (pPP)       1,2,2
1682    // - two whole, one partial               (PPp)       2,2,3
1683    // - three whole                          (PPP)       2,2,2
1684    //
1685    // * one partial, N-2 whole, one partial  (pP...Pp)   1,2...2,3
1686    // - one partial, N-1 whole               (pP...PP)   1,2...2,2
1687    // - N-1 whole, one partial               (PP...Pp)   2,2...2,3
1688    // - N whole                              (PP...PP)   2,2...2,3
1689 
1690    // Break up total length (lenT) into two parts:  length in the first
1691    // sec-map (lenA), and the rest (lenB);   lenT == lenA + lenB.
1692    aNext = start_of_this_sm(a) + SM_SIZE;
1693    len_to_next_secmap = aNext - a;
1694    if ( lenT <= len_to_next_secmap ) {
1695       // Range entirely within one sec-map.  Covers almost all cases.
1696       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP);
1697       lenA = lenT;
1698       lenB = 0;
1699    } else if (is_start_of_sm(a)) {
1700       // Range spans at least one whole sec-map, and starts at the beginning
1701       // of a sec-map; skip to Part 2.
1702       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP);
1703       lenA = 0;
1704       lenB = lenT;
1705       goto part2;
1706    } else {
1707       // Range spans two or more sec-maps, first one is partial.
1708       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS);
1709       lenA = len_to_next_secmap;
1710       lenB = lenT - lenA;
1711    }
1712 
1713    //------------------------------------------------------------------------
1714    // Part 1: Deal with the first sec_map.  Most of the time the range will be
1715    // entirely within a sec_map and this part alone will suffice.  Also,
1716    // doing it this way lets us avoid repeatedly testing for the crossing of
1717    // a sec-map boundary within these loops.
1718    //------------------------------------------------------------------------
1719 
1720    // If it's distinguished, make it undistinguished if necessary.
1721    sm_ptr = get_secmap_ptr(a);
1722    if (is_distinguished_sm(*sm_ptr)) {
1723       if (*sm_ptr == example_dsm) {
1724          // Sec-map already has the V+A bits that we want, so skip.
1725          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK);
1726          a    = aNext;
1727          lenA = 0;
1728       } else {
1729          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1);
1730          *sm_ptr = copy_for_writing(*sm_ptr);
1731       }
1732    }
1733    sm = *sm_ptr;
1734 
1735    // 1 byte steps
1736    while (True) {
1737       if (VG_IS_8_ALIGNED(a)) break;
1738       if (lenA < 1)           break;
1739       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A);
1740       sm_off = SM_OFF(a);
1741       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1742       a    += 1;
1743       lenA -= 1;
1744    }
1745    // 8-aligned, 8 byte steps
1746    while (True) {
1747       if (lenA < 8) break;
1748       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A);
1749       sm_off16 = SM_OFF_16(a);
1750       sm->vabits16[sm_off16] = vabits16;
1751       a    += 8;
1752       lenA -= 8;
1753    }
1754    // 1 byte steps
1755    while (True) {
1756       if (lenA < 1) break;
1757       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B);
1758       sm_off = SM_OFF(a);
1759       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1760       a    += 1;
1761       lenA -= 1;
1762    }
1763 
1764    // We've finished the first sec-map.  Is that it?
1765    if (lenB == 0)
1766       return;
1767 
1768    //------------------------------------------------------------------------
1769    // Part 2: Fast-set entire sec-maps at a time.
1770    //------------------------------------------------------------------------
1771   part2:
1772    // 64KB-aligned, 64KB steps.
1773    // Nb: we can reach here with lenB < SM_SIZE
1774    tl_assert(0 == lenA);
1775    while (True) {
1776       if (lenB < SM_SIZE) break;
1777       tl_assert(is_start_of_sm(a));
1778       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K);
1779       sm_ptr = get_secmap_ptr(a);
1780       if (!is_distinguished_sm(*sm_ptr)) {
1781          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM);
1782          // Free the non-distinguished sec-map that we're replacing.  This
1783          // case happens moderately often, enough to be worthwhile.
1784          SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1785          tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
1786       }
1787       update_SM_counts(*sm_ptr, example_dsm);
1788       // Make the sec-map entry point to the example DSM
1789       *sm_ptr = example_dsm;
1790       lenB -= SM_SIZE;
1791       a    += SM_SIZE;
1792    }
1793 
1794    // We've finished the whole sec-maps.  Is that it?
1795    if (lenB == 0)
1796       return;
1797 
1798    //------------------------------------------------------------------------
1799    // Part 3: Finish off the final partial sec-map, if necessary.
1800    //------------------------------------------------------------------------
1801 
1802    tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1803 
1804    // If it's distinguished, make it undistinguished if necessary.
1805    sm_ptr = get_secmap_ptr(a);
1806    if (is_distinguished_sm(*sm_ptr)) {
1807       if (*sm_ptr == example_dsm) {
1808          // Sec-map already has the V+A bits that we want, so stop.
1809          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK);
1810          return;
1811       } else {
1812          PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2);
1813          *sm_ptr = copy_for_writing(*sm_ptr);
1814       }
1815    }
1816    sm = *sm_ptr;
1817 
1818    // 8-aligned, 8 byte steps
1819    while (True) {
1820       if (lenB < 8) break;
1821       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B);
1822       sm_off16 = SM_OFF_16(a);
1823       sm->vabits16[sm_off16] = vabits16;
1824       a    += 8;
1825       lenB -= 8;
1826    }
1827    // 1 byte steps
1828    while (True) {
1829       if (lenB < 1) return;
1830       PROF_EVENT(MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C);
1831       sm_off = SM_OFF(a);
1832       insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1833       a    += 1;
1834       lenB -= 1;
1835    }
1836 }
1837 
1838 
1839 /* --- Set permissions for arbitrary address ranges --- */
1840 
MC_(make_mem_noaccess)1841 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1842 {
1843    PROF_EVENT(MCPE_MAKE_MEM_NOACCESS);
1844    DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1845    set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1846    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1847       ocache_sarp_Clear_Origins ( a, len );
1848 }
1849 
make_mem_undefined(Addr a,SizeT len)1850 static void make_mem_undefined ( Addr a, SizeT len )
1851 {
1852    PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED);
1853    DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1854    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1855 }
1856 
MC_(make_mem_undefined_w_otag)1857 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1858 {
1859    PROF_EVENT(MCPE_MAKE_MEM_UNDEFINED_W_OTAG);
1860    DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1861    set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1862    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1863       ocache_sarp_Set_Origins ( a, len, otag );
1864 }
1865 
1866 static
make_mem_undefined_w_tid_and_okind(Addr a,SizeT len,ThreadId tid,UInt okind)1867 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1868                                           ThreadId tid, UInt okind )
1869 {
1870    UInt        ecu;
1871    ExeContext* here;
1872    /* VG_(record_ExeContext) checks for validity of tid, and asserts
1873       if it is invalid.  So no need to do it here. */
1874    tl_assert(okind <= 3);
1875    here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1876    tl_assert(here);
1877    ecu = VG_(get_ECU_from_ExeContext)(here);
1878    tl_assert(VG_(is_plausible_ECU)(ecu));
1879    MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1880 }
1881 
1882 static
mc_new_mem_w_tid_make_ECU(Addr a,SizeT len,ThreadId tid)1883 void mc_new_mem_w_tid_make_ECU  ( Addr a, SizeT len, ThreadId tid )
1884 {
1885    make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1886 }
1887 
1888 static
mc_new_mem_w_tid_no_ECU(Addr a,SizeT len,ThreadId tid)1889 void mc_new_mem_w_tid_no_ECU  ( Addr a, SizeT len, ThreadId tid )
1890 {
1891    MC_(make_mem_undefined_w_otag) ( a, len, MC_OKIND_UNKNOWN );
1892 }
1893 
MC_(make_mem_defined)1894 void MC_(make_mem_defined) ( Addr a, SizeT len )
1895 {
1896    PROF_EVENT(MCPE_MAKE_MEM_DEFINED);
1897    DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1898    set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1899    if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1900       ocache_sarp_Clear_Origins ( a, len );
1901 }
1902 
1903 __attribute__((unused))
make_mem_defined_w_tid(Addr a,SizeT len,ThreadId tid)1904 static void make_mem_defined_w_tid ( Addr a, SizeT len, ThreadId tid )
1905 {
1906    MC_(make_mem_defined)(a, len);
1907 }
1908 
1909 /* For each byte in [a,a+len), if the byte is addressable, make it be
1910    defined, but if it isn't addressible, leave it alone.  In other
1911    words a version of MC_(make_mem_defined) that doesn't mess with
1912    addressibility.  Low-performance implementation. */
make_mem_defined_if_addressable(Addr a,SizeT len)1913 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1914 {
1915    SizeT i;
1916    UChar vabits2;
1917    DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1918    for (i = 0; i < len; i++) {
1919       vabits2 = get_vabits2( a+i );
1920       if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1921          set_vabits2(a+i, VA_BITS2_DEFINED);
1922          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1923             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1924          }
1925       }
1926    }
1927 }
1928 
1929 /* Similarly (needed for mprotect handling ..) */
make_mem_defined_if_noaccess(Addr a,SizeT len)1930 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1931 {
1932    SizeT i;
1933    UChar vabits2;
1934    DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1935    for (i = 0; i < len; i++) {
1936       vabits2 = get_vabits2( a+i );
1937       if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1938          set_vabits2(a+i, VA_BITS2_DEFINED);
1939          if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1940             MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1941          }
1942       }
1943    }
1944 }
1945 
1946 /* --- Block-copy permissions (needed for implementing realloc() and
1947        sys_mremap). --- */
1948 
MC_(copy_address_range_state)1949 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
1950 {
1951    SizeT i, j;
1952    UChar vabits2, vabits8;
1953    Bool  aligned, nooverlap;
1954 
1955    DEBUG("MC_(copy_address_range_state)\n");
1956    PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE);
1957 
1958    if (len == 0 || src == dst)
1959       return;
1960 
1961    aligned   = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
1962    nooverlap = src+len <= dst || dst+len <= src;
1963 
1964    if (nooverlap && aligned) {
1965 
1966       /* Vectorised fast case, when no overlap and suitably aligned */
1967       /* vector loop */
1968       i = 0;
1969       while (len >= 4) {
1970          vabits8 = get_vabits8_for_aligned_word32( src+i );
1971          set_vabits8_for_aligned_word32( dst+i, vabits8 );
1972          if (LIKELY(VA_BITS8_DEFINED == vabits8
1973                             || VA_BITS8_UNDEFINED == vabits8
1974                             || VA_BITS8_NOACCESS == vabits8)) {
1975             /* do nothing */
1976          } else {
1977             /* have to copy secondary map info */
1978             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
1979                set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
1980             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
1981                set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
1982             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
1983                set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
1984             if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
1985                set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
1986          }
1987          i += 4;
1988          len -= 4;
1989       }
1990       /* fixup loop */
1991       while (len >= 1) {
1992          vabits2 = get_vabits2( src+i );
1993          set_vabits2( dst+i, vabits2 );
1994          if (VA_BITS2_PARTDEFINED == vabits2) {
1995             set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1996          }
1997          i++;
1998          len--;
1999       }
2000 
2001    } else {
2002 
2003       /* We have to do things the slow way */
2004       if (src < dst) {
2005          for (i = 0, j = len-1; i < len; i++, j--) {
2006             PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1);
2007             vabits2 = get_vabits2( src+j );
2008             set_vabits2( dst+j, vabits2 );
2009             if (VA_BITS2_PARTDEFINED == vabits2) {
2010                set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
2011             }
2012          }
2013       }
2014 
2015       if (src > dst) {
2016          for (i = 0; i < len; i++) {
2017             PROF_EVENT(MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2);
2018             vabits2 = get_vabits2( src+i );
2019             set_vabits2( dst+i, vabits2 );
2020             if (VA_BITS2_PARTDEFINED == vabits2) {
2021                set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
2022             }
2023          }
2024       }
2025    }
2026 
2027 }
2028 
2029 
2030 /*------------------------------------------------------------*/
2031 /*--- Origin tracking stuff - cache basics                 ---*/
2032 /*------------------------------------------------------------*/
2033 
2034 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
2035    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2036 
2037    Note that this implementation draws inspiration from the "origin
2038    tracking by value piggybacking" scheme described in "Tracking Bad
2039    Apples: Reporting the Origin of Null and Undefined Value Errors"
2040    (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
2041    Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
2042    implemented completely differently.
2043 
2044    Origin tags and ECUs -- about the shadow values
2045    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2046 
2047    This implementation tracks the defining point of all uninitialised
2048    values using so called "origin tags", which are 32-bit integers,
2049    rather than using the values themselves to encode the origins.  The
2050    latter, so-called value piggybacking", is what the OOPSLA07 paper
2051    describes.
2052 
2053    Origin tags, as tracked by the machinery below, are 32-bit unsigned
2054    ints (UInts), regardless of the machine's word size.  Each tag
2055    comprises an upper 30-bit ECU field and a lower 2-bit
2056    'kind' field.  The ECU field is a number given out by m_execontext
2057    and has a 1-1 mapping with ExeContext*s.  An ECU can be used
2058    directly as an origin tag (otag), but in fact we want to put
2059    additional information 'kind' field to indicate roughly where the
2060    tag came from.  This helps print more understandable error messages
2061    for the user -- it has no other purpose.  In summary:
2062 
2063    * Both ECUs and origin tags are represented as 32-bit words
2064 
2065    * m_execontext and the core-tool interface deal purely in ECUs.
2066      They have no knowledge of origin tags - that is a purely
2067      Memcheck-internal matter.
2068 
2069    * all valid ECUs have the lowest 2 bits zero and at least
2070      one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
2071 
2072    * to convert from an ECU to an otag, OR in one of the MC_OKIND_
2073      constants defined in mc_include.h.
2074 
2075    * to convert an otag back to an ECU, AND it with ~3
2076 
2077    One important fact is that no valid otag is zero.  A zero otag is
2078    used by the implementation to indicate "no origin", which could
2079    mean that either the value is defined, or it is undefined but the
2080    implementation somehow managed to lose the origin.
2081 
2082    The ECU used for memory created by malloc etc is derived from the
2083    stack trace at the time the malloc etc happens.  This means the
2084    mechanism can show the exact allocation point for heap-created
2085    uninitialised values.
2086 
2087    In contrast, it is simply too expensive to create a complete
2088    backtrace for each stack allocation.  Therefore we merely use a
2089    depth-1 backtrace for stack allocations, which can be done once at
2090    translation time, rather than N times at run time.  The result of
2091    this is that, for stack created uninitialised values, Memcheck can
2092    only show the allocating function, and not what called it.
2093    Furthermore, compilers tend to move the stack pointer just once at
2094    the start of the function, to allocate all locals, and so in fact
2095    the stack origin almost always simply points to the opening brace
2096    of the function.  Net result is, for stack origins, the mechanism
2097    can tell you in which function the undefined value was created, but
2098    that's all.  Users will need to carefully check all locals in the
2099    specified function.
2100 
2101    Shadowing registers and memory
2102    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2103 
2104    Memory is shadowed using a two level cache structure (ocacheL1 and
2105    ocacheL2).  Memory references are first directed to ocacheL1.  This
2106    is a traditional 2-way set associative cache with 32-byte lines and
2107    approximate LRU replacement within each set.
2108 
2109    A naive implementation would require storing one 32 bit otag for
2110    each byte of memory covered, a 4:1 space overhead.  Instead, there
2111    is one otag for every 4 bytes of memory covered, plus a 4-bit mask
2112    that shows which of the 4 bytes have that shadow value and which
2113    have a shadow value of zero (indicating no origin).  Hence a lot of
2114    space is saved, but the cost is that only one different origin per
2115    4 bytes of address space can be represented.  This is a source of
2116    imprecision, but how much of a problem it really is remains to be
2117    seen.
2118 
2119    A cache line that contains all zeroes ("no origins") contains no
2120    useful information, and can be ejected from the L1 cache "for
2121    free", in the sense that a read miss on the L1 causes a line of
2122    zeroes to be installed.  However, ejecting a line containing
2123    nonzeroes risks losing origin information permanently.  In order to
2124    prevent such lossage, ejected nonzero lines are placed in a
2125    secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
2126    lines.  This can grow arbitrarily large, and so should ensure that
2127    Memcheck runs out of memory in preference to losing useful origin
2128    info due to cache size limitations.
2129 
2130    Shadowing registers is a bit tricky, because the shadow values are
2131    32 bits, regardless of the size of the register.  That gives a
2132    problem for registers smaller than 32 bits.  The solution is to
2133    find spaces in the guest state that are unused, and use those to
2134    shadow guest state fragments smaller than 32 bits.  For example, on
2135    ppc32/64, each vector register is 16 bytes long.  If 4 bytes of the
2136    shadow are allocated for the register's otag, then there are still
2137    12 bytes left over which could be used to shadow 3 other values.
2138 
2139    This implies there is some non-obvious mapping from guest state
2140    (start,length) pairs to the relevant shadow offset (for the origin
2141    tags).  And it is unfortunately guest-architecture specific.  The
2142    mapping is contained in mc_machine.c, which is quite lengthy but
2143    straightforward.
2144 
2145    Instrumenting the IR
2146    ~~~~~~~~~~~~~~~~~~~~
2147 
2148    Instrumentation is largely straightforward, and done by the
2149    functions schemeE and schemeS in mc_translate.c.  These generate
2150    code for handling the origin tags of expressions (E) and statements
2151    (S) respectively.  The rather strange names are a reference to the
2152    "compilation schemes" shown in Simon Peyton Jones' book "The
2153    Implementation of Functional Programming Languages" (Prentice Hall,
2154    1987, see
2155    http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
2156 
2157    schemeS merely arranges to move shadow values around the guest
2158    state to track the incoming IR.  schemeE is largely trivial too.
2159    The only significant point is how to compute the otag corresponding
2160    to binary (or ternary, quaternary, etc) operator applications.  The
2161    rule is simple: just take whichever value is larger (32-bit
2162    unsigned max).  Constants get the special value zero.  Hence this
2163    rule always propagates a nonzero (known) otag in preference to a
2164    zero (unknown, or more likely, value-is-defined) tag, as we want.
2165    If two different undefined values are inputs to a binary operator
2166    application, then which is propagated is arbitrary, but that
2167    doesn't matter, since the program is erroneous in using either of
2168    the values, and so there's no point in attempting to propagate
2169    both.
2170 
2171    Since constants are abstracted to (otag) zero, much of the
2172    instrumentation code can be folded out without difficulty by the
2173    generic post-instrumentation IR cleanup pass, using these rules:
2174    Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
2175    constants is evaluated at JIT time.  And the resulting dead code
2176    removal.  In practice this causes surprisingly few Max32Us to
2177    survive through to backend code generation.
2178 
2179    Integration with the V-bits machinery
2180    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2181 
2182    This is again largely straightforward.  Mostly the otag and V bits
2183    stuff are independent.  The only point of interaction is when the V
2184    bits instrumenter creates a call to a helper function to report an
2185    uninitialised value error -- in that case it must first use schemeE
2186    to get hold of the origin tag expression for the value, and pass
2187    that to the helper too.
2188 
2189    There is the usual stuff to do with setting address range
2190    permissions.  When memory is painted undefined, we must also know
2191    the origin tag to paint with, which involves some tedious plumbing,
2192    particularly to do with the fast case stack handlers.  When memory
2193    is painted defined or noaccess then the origin tags must be forced
2194    to zero.
2195 
2196    One of the goals of the implementation was to ensure that the
2197    non-origin tracking mode isn't slowed down at all.  To do this,
2198    various functions to do with memory permissions setting (again,
2199    mostly pertaining to the stack) are duplicated for the with- and
2200    without-otag case.
2201 
2202    Dealing with stack redzones, and the NIA cache
2203    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2204 
2205    This is one of the few non-obvious parts of the implementation.
2206 
2207    Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
2208    reserved area below the stack pointer, that can be used as scratch
2209    space by compiler generated code for functions.  In the Memcheck
2210    sources this is referred to as the "stack redzone".  The important
2211    thing here is that such redzones are considered volatile across
2212    function calls and returns.  So Memcheck takes care to mark them as
2213    undefined for each call and return, on the afflicted platforms.
2214    Past experience shows this is essential in order to get reliable
2215    messages about uninitialised values that come from the stack.
2216 
2217    So the question is, when we paint a redzone undefined, what origin
2218    tag should we use for it?  Consider a function f() calling g().  If
2219    we paint the redzone using an otag derived from the ExeContext of
2220    the CALL/BL instruction in f, then any errors in g causing it to
2221    use uninitialised values that happen to lie in the redzone, will be
2222    reported as having their origin in f.  Which is highly confusing.
2223 
2224    The same applies for returns: if, on a return, we paint the redzone
2225    using a origin tag derived from the ExeContext of the RET/BLR
2226    instruction in g, then any later errors in f causing it to use
2227    uninitialised values in the redzone, will be reported as having
2228    their origin in g.  Which is just as confusing.
2229 
2230    To do it right, in both cases we need to use an origin tag which
2231    pertains to the instruction which dynamically follows the CALL/BL
2232    or RET/BLR.  In short, one derived from the NIA - the "next
2233    instruction address".
2234 
2235    To make this work, Memcheck's redzone-painting helper,
2236    MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
2237    NIA.  It converts the NIA to a 1-element ExeContext, and uses that
2238    ExeContext's ECU as the basis for the otag used to paint the
2239    redzone.  The expensive part of this is converting an NIA into an
2240    ECU, since this happens once for every call and every return.  So
2241    we use a simple 511-line, 2-way set associative cache
2242    (nia_to_ecu_cache) to cache the mappings, and that knocks most of
2243    the cost out.
2244 
2245    Further background comments
2246    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2247 
2248    > Question: why is otag a UInt?  Wouldn't a UWord be better?  Isn't
2249    > it really just the address of the relevant ExeContext?
2250 
2251    Well, it's not the address, but a value which has a 1-1 mapping
2252    with ExeContexts, and is guaranteed not to be zero, since zero
2253    denotes (to memcheck) "unknown origin or defined value".  So these
2254    UInts are just numbers starting at 4 and incrementing by 4; each
2255    ExeContext is given a number when it is created.  (*** NOTE this
2256    confuses otags and ECUs; see comments above ***).
2257 
2258    Making these otags 32-bit regardless of the machine's word size
2259    makes the 64-bit implementation easier (next para).  And it doesn't
2260    really limit us in any way, since for the tags to overflow would
2261    require that the program somehow caused 2^30-1 different
2262    ExeContexts to be created, in which case it is probably in deep
2263    trouble.  Not to mention V will have soaked up many tens of
2264    gigabytes of memory merely to store them all.
2265 
2266    So having 64-bit origins doesn't really buy you anything, and has
2267    the following downsides:
2268 
2269    Suppose that instead, an otag is a UWord.  This would mean that, on
2270    a 64-bit target,
2271 
2272    1. It becomes hard to shadow any element of guest state which is
2273       smaller than 8 bytes.  To do so means you'd need to find some
2274       8-byte-sized hole in the guest state which you don't want to
2275       shadow, and use that instead to hold the otag.  On ppc64, the
2276       condition code register(s) are split into 20 UChar sized pieces,
2277       all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2278       and so that would entail finding 160 bytes somewhere else in the
2279       guest state.
2280 
2281       Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2282       of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2283       same) and so I had to look for 4 untracked otag-sized areas in
2284       the guest state to make that possible.
2285 
2286       The same problem exists of course when origin tags are only 32
2287       bits, but it's less extreme.
2288 
2289    2. (More compelling) it doubles the size of the origin shadow
2290       memory.  Given that the shadow memory is organised as a fixed
2291       size cache, and that accuracy of tracking is limited by origins
2292       falling out the cache due to space conflicts, this isn't good.
2293 
2294    > Another question: is the origin tracking perfect, or are there
2295    > cases where it fails to determine an origin?
2296 
2297    It is imperfect for at least for the following reasons, and
2298    probably more:
2299 
2300    * Insufficient capacity in the origin cache.  When a line is
2301      evicted from the cache it is gone forever, and so subsequent
2302      queries for the line produce zero, indicating no origin
2303      information.  Interestingly, a line containing all zeroes can be
2304      evicted "free" from the cache, since it contains no useful
2305      information, so there is scope perhaps for some cleverer cache
2306      management schemes.  (*** NOTE, with the introduction of the
2307      second level origin tag cache, ocacheL2, this is no longer a
2308      problem. ***)
2309 
2310    * The origin cache only stores one otag per 32-bits of address
2311      space, plus 4 bits indicating which of the 4 bytes has that tag
2312      and which are considered defined.  The result is that if two
2313      undefined bytes in the same word are stored in memory, the first
2314      stored byte's origin will be lost and replaced by the origin for
2315      the second byte.
2316 
2317    * Nonzero origin tags for defined values.  Consider a binary
2318      operator application op(x,y).  Suppose y is undefined (and so has
2319      a valid nonzero origin tag), and x is defined, but erroneously
2320      has a nonzero origin tag (defined values should have tag zero).
2321      If the erroneous tag has a numeric value greater than y's tag,
2322      then the rule for propagating origin tags though binary
2323      operations, which is simply to take the unsigned max of the two
2324      tags, will erroneously propagate x's tag rather than y's.
2325 
2326    * Some obscure uses of x86/amd64 byte registers can cause lossage
2327      or confusion of origins.  %AH .. %DH are treated as different
2328      from, and unrelated to, their parent registers, %EAX .. %EDX.
2329      So some weird sequences like
2330 
2331         movb undefined-value, %AH
2332         movb defined-value, %AL
2333         .. use %AX or %EAX ..
2334 
2335      will cause the origin attributed to %AH to be ignored, since %AL,
2336      %AX, %EAX are treated as the same register, and %AH as a
2337      completely separate one.
2338 
2339    But having said all that, it actually seems to work fairly well in
2340    practice.
2341 */
2342 
2343 static UWord stats_ocacheL1_find           = 0;
2344 static UWord stats_ocacheL1_found_at_1     = 0;
2345 static UWord stats_ocacheL1_found_at_N     = 0;
2346 static UWord stats_ocacheL1_misses         = 0;
2347 static UWord stats_ocacheL1_lossage        = 0;
2348 static UWord stats_ocacheL1_movefwds       = 0;
2349 
2350 static UWord stats__ocacheL2_refs          = 0;
2351 static UWord stats__ocacheL2_misses        = 0;
2352 static UWord stats__ocacheL2_n_nodes_max   = 0;
2353 
2354 /* Cache of 32-bit values, one every 32 bits of address space */
2355 
2356 #define OC_BITS_PER_LINE 5
2357 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2358 
oc_line_offset(Addr a)2359 static INLINE UWord oc_line_offset ( Addr a ) {
2360    return (a >> 2) & (OC_W32S_PER_LINE - 1);
2361 }
is_valid_oc_tag(Addr tag)2362 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2363    return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2364 }
2365 
2366 #define OC_LINES_PER_SET 2
2367 
2368 #define OC_N_SET_BITS    20
2369 #define OC_N_SETS        (1 << OC_N_SET_BITS)
2370 
2371 /* These settings give:
2372    64 bit host: ocache:  100,663,296 sizeB    67,108,864 useful
2373    32 bit host: ocache:   92,274,688 sizeB    67,108,864 useful
2374 */
2375 
2376 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2377 
2378 
2379 typedef
2380    struct {
2381       Addr  tag;
2382       UInt  w32[OC_W32S_PER_LINE];
2383       UChar descr[OC_W32S_PER_LINE];
2384    }
2385    OCacheLine;
2386 
2387 /* Classify and also sanity-check 'line'.  Return 'e' (empty) if not
2388    in use, 'n' (nonzero) if it contains at least one valid origin tag,
2389    and 'z' if all the represented tags are zero. */
classify_OCacheLine(OCacheLine * line)2390 static UChar classify_OCacheLine ( OCacheLine* line )
2391 {
2392    UWord i;
2393    if (line->tag == 1/*invalid*/)
2394       return 'e'; /* EMPTY */
2395    tl_assert(is_valid_oc_tag(line->tag));
2396    for (i = 0; i < OC_W32S_PER_LINE; i++) {
2397       tl_assert(0 == ((~0xF) & line->descr[i]));
2398       if (line->w32[i] > 0 && line->descr[i] > 0)
2399          return 'n'; /* NONZERO - contains useful info */
2400    }
2401    return 'z'; /* ZERO - no useful info */
2402 }
2403 
2404 typedef
2405    struct {
2406       OCacheLine line[OC_LINES_PER_SET];
2407    }
2408    OCacheSet;
2409 
2410 typedef
2411    struct {
2412       OCacheSet set[OC_N_SETS];
2413    }
2414    OCache;
2415 
2416 static OCache* ocacheL1 = NULL;
2417 static UWord   ocacheL1_event_ctr = 0;
2418 
2419 static void init_ocacheL2 ( void ); /* fwds */
init_OCache(void)2420 static void init_OCache ( void )
2421 {
2422    UWord line, set;
2423    tl_assert(MC_(clo_mc_level) >= 3);
2424    tl_assert(ocacheL1 == NULL);
2425    ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2426    if (ocacheL1 == NULL) {
2427       VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2428                                    sizeof(OCache) );
2429    }
2430    tl_assert(ocacheL1 != NULL);
2431    for (set = 0; set < OC_N_SETS; set++) {
2432       for (line = 0; line < OC_LINES_PER_SET; line++) {
2433          ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2434       }
2435    }
2436    init_ocacheL2();
2437 }
2438 
moveLineForwards(OCacheSet * set,UWord lineno)2439 static void moveLineForwards ( OCacheSet* set, UWord lineno )
2440 {
2441    OCacheLine tmp;
2442    stats_ocacheL1_movefwds++;
2443    tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2444    tmp = set->line[lineno-1];
2445    set->line[lineno-1] = set->line[lineno];
2446    set->line[lineno] = tmp;
2447 }
2448 
zeroise_OCacheLine(OCacheLine * line,Addr tag)2449 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2450    UWord i;
2451    for (i = 0; i < OC_W32S_PER_LINE; i++) {
2452       line->w32[i] = 0; /* NO ORIGIN */
2453       line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2454    }
2455    line->tag = tag;
2456 }
2457 
2458 //////////////////////////////////////////////////////////////
2459 //// OCache backing store
2460 
2461 static OSet* ocacheL2 = NULL;
2462 
ocacheL2_malloc(const HChar * cc,SizeT szB)2463 static void* ocacheL2_malloc ( const HChar* cc, SizeT szB ) {
2464    return VG_(malloc)(cc, szB);
2465 }
ocacheL2_free(void * v)2466 static void ocacheL2_free ( void* v ) {
2467    VG_(free)( v );
2468 }
2469 
2470 /* Stats: # nodes currently in tree */
2471 static UWord stats__ocacheL2_n_nodes = 0;
2472 
init_ocacheL2(void)2473 static void init_ocacheL2 ( void )
2474 {
2475    tl_assert(!ocacheL2);
2476    tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2477    tl_assert(0 == offsetof(OCacheLine,tag));
2478    ocacheL2
2479       = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2480                              NULL, /* fast cmp */
2481                              ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
2482    stats__ocacheL2_n_nodes = 0;
2483 }
2484 
2485 /* Find line with the given tag in the tree, or NULL if not found. */
ocacheL2_find_tag(Addr tag)2486 static OCacheLine* ocacheL2_find_tag ( Addr tag )
2487 {
2488    OCacheLine* line;
2489    tl_assert(is_valid_oc_tag(tag));
2490    stats__ocacheL2_refs++;
2491    line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2492    return line;
2493 }
2494 
2495 /* Delete the line with the given tag from the tree, if it is present, and
2496    free up the associated memory. */
ocacheL2_del_tag(Addr tag)2497 static void ocacheL2_del_tag ( Addr tag )
2498 {
2499    OCacheLine* line;
2500    tl_assert(is_valid_oc_tag(tag));
2501    stats__ocacheL2_refs++;
2502    line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2503    if (line) {
2504       VG_(OSetGen_FreeNode)(ocacheL2, line);
2505       tl_assert(stats__ocacheL2_n_nodes > 0);
2506       stats__ocacheL2_n_nodes--;
2507    }
2508 }
2509 
2510 /* Add a copy of the given line to the tree.  It must not already be
2511    present. */
ocacheL2_add_line(OCacheLine * line)2512 static void ocacheL2_add_line ( OCacheLine* line )
2513 {
2514    OCacheLine* copy;
2515    tl_assert(is_valid_oc_tag(line->tag));
2516    copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2517    *copy = *line;
2518    stats__ocacheL2_refs++;
2519    VG_(OSetGen_Insert)( ocacheL2, copy );
2520    stats__ocacheL2_n_nodes++;
2521    if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2522       stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2523 }
2524 
2525 ////
2526 //////////////////////////////////////////////////////////////
2527 
2528 __attribute__((noinline))
find_OCacheLine_SLOW(Addr a)2529 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2530 {
2531    OCacheLine *victim, *inL2;
2532    UChar c;
2533    UWord line;
2534    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2535    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2536    UWord tag     = a & tagmask;
2537    tl_assert(setno >= 0 && setno < OC_N_SETS);
2538 
2539    /* we already tried line == 0; skip therefore. */
2540    for (line = 1; line < OC_LINES_PER_SET; line++) {
2541       if (ocacheL1->set[setno].line[line].tag == tag) {
2542          if (line == 1) {
2543             stats_ocacheL1_found_at_1++;
2544          } else {
2545             stats_ocacheL1_found_at_N++;
2546          }
2547          if (UNLIKELY(0 == (ocacheL1_event_ctr++
2548                             & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2549             moveLineForwards( &ocacheL1->set[setno], line );
2550             line--;
2551          }
2552          return &ocacheL1->set[setno].line[line];
2553       }
2554    }
2555 
2556    /* A miss.  Use the last slot.  Implicitly this means we're
2557       ejecting the line in the last slot. */
2558    stats_ocacheL1_misses++;
2559    tl_assert(line == OC_LINES_PER_SET);
2560    line--;
2561    tl_assert(line > 0);
2562 
2563    /* First, move the to-be-ejected line to the L2 cache. */
2564    victim = &ocacheL1->set[setno].line[line];
2565    c = classify_OCacheLine(victim);
2566    switch (c) {
2567       case 'e':
2568          /* the line is empty (has invalid tag); ignore it. */
2569          break;
2570       case 'z':
2571          /* line contains zeroes.  We must ensure the backing store is
2572             updated accordingly, either by copying the line there
2573             verbatim, or by ensuring it isn't present there.  We
2574             chosse the latter on the basis that it reduces the size of
2575             the backing store. */
2576          ocacheL2_del_tag( victim->tag );
2577          break;
2578       case 'n':
2579          /* line contains at least one real, useful origin.  Copy it
2580             to the backing store. */
2581          stats_ocacheL1_lossage++;
2582          inL2 = ocacheL2_find_tag( victim->tag );
2583          if (inL2) {
2584             *inL2 = *victim;
2585          } else {
2586             ocacheL2_add_line( victim );
2587          }
2588          break;
2589       default:
2590          tl_assert(0);
2591    }
2592 
2593    /* Now we must reload the L1 cache from the backing tree, if
2594       possible. */
2595    tl_assert(tag != victim->tag); /* stay sane */
2596    inL2 = ocacheL2_find_tag( tag );
2597    if (inL2) {
2598       /* We're in luck.  It's in the L2. */
2599       ocacheL1->set[setno].line[line] = *inL2;
2600    } else {
2601       /* Missed at both levels of the cache hierarchy.  We have to
2602          declare it as full of zeroes (unknown origins). */
2603       stats__ocacheL2_misses++;
2604       zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2605    }
2606 
2607    /* Move it one forwards */
2608    moveLineForwards( &ocacheL1->set[setno], line );
2609    line--;
2610 
2611    return &ocacheL1->set[setno].line[line];
2612 }
2613 
find_OCacheLine(Addr a)2614 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2615 {
2616    UWord setno   = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2617    UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2618    UWord tag     = a & tagmask;
2619 
2620    stats_ocacheL1_find++;
2621 
2622    if (OC_ENABLE_ASSERTIONS) {
2623       tl_assert(setno >= 0 && setno < OC_N_SETS);
2624       tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2625    }
2626 
2627    if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2628       return &ocacheL1->set[setno].line[0];
2629    }
2630 
2631    return find_OCacheLine_SLOW( a );
2632 }
2633 
set_aligned_word64_Origin_to_undef(Addr a,UInt otag)2634 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2635 {
2636    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2637    //// Set the origins for a+0 .. a+7
2638    { OCacheLine* line;
2639      UWord lineoff = oc_line_offset(a);
2640      if (OC_ENABLE_ASSERTIONS) {
2641         tl_assert(lineoff >= 0
2642                   && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2643      }
2644      line = find_OCacheLine( a );
2645      line->descr[lineoff+0] = 0xF;
2646      line->descr[lineoff+1] = 0xF;
2647      line->w32[lineoff+0]   = otag;
2648      line->w32[lineoff+1]   = otag;
2649    }
2650    //// END inlined, specialised version of MC_(helperc_b_store8)
2651 }
2652 
2653 
2654 /*------------------------------------------------------------*/
2655 /*--- Aligned fast case permission setters,                ---*/
2656 /*--- for dealing with stacks                              ---*/
2657 /*------------------------------------------------------------*/
2658 
2659 /*--------------------- 32-bit ---------------------*/
2660 
2661 /* Nb: by "aligned" here we mean 4-byte aligned */
2662 
make_aligned_word32_undefined(Addr a)2663 static INLINE void make_aligned_word32_undefined ( Addr a )
2664 {
2665   PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED);
2666 
2667 #ifndef PERF_FAST_STACK2
2668    make_mem_undefined(a, 4);
2669 #else
2670    {
2671       UWord   sm_off;
2672       SecMap* sm;
2673 
2674       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2675          PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW);
2676          make_mem_undefined(a, 4);
2677          return;
2678       }
2679 
2680       sm                  = get_secmap_for_writing_low(a);
2681       sm_off              = SM_OFF(a);
2682       sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2683    }
2684 #endif
2685 }
2686 
2687 static INLINE
make_aligned_word32_undefined_w_otag(Addr a,UInt otag)2688 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2689 {
2690    make_aligned_word32_undefined(a);
2691    //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2692    //// Set the origins for a+0 .. a+3
2693    { OCacheLine* line;
2694      UWord lineoff = oc_line_offset(a);
2695      if (OC_ENABLE_ASSERTIONS) {
2696         tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2697      }
2698      line = find_OCacheLine( a );
2699      line->descr[lineoff] = 0xF;
2700      line->w32[lineoff]   = otag;
2701    }
2702    //// END inlined, specialised version of MC_(helperc_b_store4)
2703 }
2704 
2705 static INLINE
make_aligned_word32_noaccess(Addr a)2706 void make_aligned_word32_noaccess ( Addr a )
2707 {
2708    PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS);
2709 
2710 #ifndef PERF_FAST_STACK2
2711    MC_(make_mem_noaccess)(a, 4);
2712 #else
2713    {
2714       UWord   sm_off;
2715       SecMap* sm;
2716 
2717       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2718          PROF_EVENT(MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW);
2719          MC_(make_mem_noaccess)(a, 4);
2720          return;
2721       }
2722 
2723       sm                  = get_secmap_for_writing_low(a);
2724       sm_off              = SM_OFF(a);
2725       sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2726 
2727       //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2728       //// Set the origins for a+0 .. a+3.
2729       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2730          OCacheLine* line;
2731          UWord lineoff = oc_line_offset(a);
2732          if (OC_ENABLE_ASSERTIONS) {
2733             tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2734          }
2735          line = find_OCacheLine( a );
2736          line->descr[lineoff] = 0;
2737       }
2738       //// END inlined, specialised version of MC_(helperc_b_store4)
2739    }
2740 #endif
2741 }
2742 
2743 /*--------------------- 64-bit ---------------------*/
2744 
2745 /* Nb: by "aligned" here we mean 8-byte aligned */
2746 
make_aligned_word64_undefined(Addr a)2747 static INLINE void make_aligned_word64_undefined ( Addr a )
2748 {
2749    PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED);
2750 
2751 #ifndef PERF_FAST_STACK2
2752    make_mem_undefined(a, 8);
2753 #else
2754    {
2755       UWord   sm_off16;
2756       SecMap* sm;
2757 
2758       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2759          PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW);
2760          make_mem_undefined(a, 8);
2761          return;
2762       }
2763 
2764       sm       = get_secmap_for_writing_low(a);
2765       sm_off16 = SM_OFF_16(a);
2766       sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
2767    }
2768 #endif
2769 }
2770 
2771 static INLINE
make_aligned_word64_undefined_w_otag(Addr a,UInt otag)2772 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2773 {
2774    make_aligned_word64_undefined(a);
2775    //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2776    //// Set the origins for a+0 .. a+7
2777    { OCacheLine* line;
2778      UWord lineoff = oc_line_offset(a);
2779      tl_assert(lineoff >= 0
2780                && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2781      line = find_OCacheLine( a );
2782      line->descr[lineoff+0] = 0xF;
2783      line->descr[lineoff+1] = 0xF;
2784      line->w32[lineoff+0]   = otag;
2785      line->w32[lineoff+1]   = otag;
2786    }
2787    //// END inlined, specialised version of MC_(helperc_b_store8)
2788 }
2789 
2790 static INLINE
make_aligned_word64_noaccess(Addr a)2791 void make_aligned_word64_noaccess ( Addr a )
2792 {
2793    PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS);
2794 
2795 #ifndef PERF_FAST_STACK2
2796    MC_(make_mem_noaccess)(a, 8);
2797 #else
2798    {
2799       UWord   sm_off16;
2800       SecMap* sm;
2801 
2802       if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2803          PROF_EVENT(MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW);
2804          MC_(make_mem_noaccess)(a, 8);
2805          return;
2806       }
2807 
2808       sm       = get_secmap_for_writing_low(a);
2809       sm_off16 = SM_OFF_16(a);
2810       sm->vabits16[sm_off16] = VA_BITS16_NOACCESS;
2811 
2812       //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2813       //// Clear the origins for a+0 .. a+7.
2814       if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2815          OCacheLine* line;
2816          UWord lineoff = oc_line_offset(a);
2817          tl_assert(lineoff >= 0
2818                    && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2819          line = find_OCacheLine( a );
2820          line->descr[lineoff+0] = 0;
2821          line->descr[lineoff+1] = 0;
2822       }
2823       //// END inlined, specialised version of MC_(helperc_b_store8)
2824    }
2825 #endif
2826 }
2827 
2828 
2829 /*------------------------------------------------------------*/
2830 /*--- Stack pointer adjustment                             ---*/
2831 /*------------------------------------------------------------*/
2832 
2833 #ifdef PERF_FAST_STACK
2834 #  define MAYBE_USED
2835 #else
2836 #  define MAYBE_USED __attribute__((unused))
2837 #endif
2838 
2839 /*--------------- adjustment by 4 bytes ---------------*/
2840 
2841 MAYBE_USED
mc_new_mem_stack_4_w_ECU(Addr new_SP,UInt ecu)2842 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2843 {
2844    UInt otag = ecu | MC_OKIND_STACK;
2845    PROF_EVENT(MCPE_NEW_MEM_STACK_4);
2846    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2847       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2848    } else {
2849       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2850    }
2851 }
2852 
2853 MAYBE_USED
mc_new_mem_stack_4(Addr new_SP)2854 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2855 {
2856    PROF_EVENT(MCPE_NEW_MEM_STACK_4);
2857    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2858       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2859    } else {
2860       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2861    }
2862 }
2863 
2864 MAYBE_USED
mc_die_mem_stack_4(Addr new_SP)2865 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2866 {
2867    PROF_EVENT(MCPE_DIE_MEM_STACK_4);
2868    if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2869       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2870    } else {
2871       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2872    }
2873 }
2874 
2875 /*--------------- adjustment by 8 bytes ---------------*/
2876 
2877 MAYBE_USED
mc_new_mem_stack_8_w_ECU(Addr new_SP,UInt ecu)2878 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2879 {
2880    UInt otag = ecu | MC_OKIND_STACK;
2881    PROF_EVENT(MCPE_NEW_MEM_STACK_8);
2882    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2883       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2884    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2885       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2886       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2887    } else {
2888       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2889    }
2890 }
2891 
2892 MAYBE_USED
mc_new_mem_stack_8(Addr new_SP)2893 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2894 {
2895    PROF_EVENT(MCPE_NEW_MEM_STACK_8);
2896    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2897       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2898    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2899       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2900       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2901    } else {
2902       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2903    }
2904 }
2905 
2906 MAYBE_USED
mc_die_mem_stack_8(Addr new_SP)2907 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2908 {
2909    PROF_EVENT(MCPE_DIE_MEM_STACK_8);
2910    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2911       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2912    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2913       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2914       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2915    } else {
2916       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2917    }
2918 }
2919 
2920 /*--------------- adjustment by 12 bytes ---------------*/
2921 
2922 MAYBE_USED
mc_new_mem_stack_12_w_ECU(Addr new_SP,UInt ecu)2923 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2924 {
2925    UInt otag = ecu | MC_OKIND_STACK;
2926    PROF_EVENT(MCPE_NEW_MEM_STACK_12);
2927    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2928       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2929       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2930    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2931       /* from previous test we don't have 8-alignment at offset +0,
2932          hence must have 8 alignment at offsets +4/-4.  Hence safe to
2933          do 4 at +0 and then 8 at +4/. */
2934       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2935       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2936    } else {
2937       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2938    }
2939 }
2940 
2941 MAYBE_USED
mc_new_mem_stack_12(Addr new_SP)2942 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
2943 {
2944    PROF_EVENT(MCPE_NEW_MEM_STACK_12);
2945    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2946       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2947       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2948    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2949       /* from previous test we don't have 8-alignment at offset +0,
2950          hence must have 8 alignment at offsets +4/-4.  Hence safe to
2951          do 4 at +0 and then 8 at +4/. */
2952       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2953       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2954    } else {
2955       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
2956    }
2957 }
2958 
2959 MAYBE_USED
mc_die_mem_stack_12(Addr new_SP)2960 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
2961 {
2962    PROF_EVENT(MCPE_DIE_MEM_STACK_12);
2963    /* Note the -12 in the test */
2964    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
2965       /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2966          -4. */
2967       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2968       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
2969    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2970       /* We have 4-alignment at +0, but we don't have 8-alignment at
2971          -12.  So we must have 8-alignment at -8.  Hence do 4 at -12
2972          and then 8 at -8. */
2973       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2974       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
2975    } else {
2976       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
2977    }
2978 }
2979 
2980 /*--------------- adjustment by 16 bytes ---------------*/
2981 
2982 MAYBE_USED
mc_new_mem_stack_16_w_ECU(Addr new_SP,UInt ecu)2983 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
2984 {
2985    UInt otag = ecu | MC_OKIND_STACK;
2986    PROF_EVENT(MCPE_NEW_MEM_STACK_16);
2987    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2988       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2989       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP  , otag );
2990       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2991    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2992       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2993          Hence do 4 at +0, 8 at +4, 4 at +12. */
2994       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
2995       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2996       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2997    } else {
2998       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
2999    }
3000 }
3001 
3002 MAYBE_USED
mc_new_mem_stack_16(Addr new_SP)3003 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
3004 {
3005    PROF_EVENT(MCPE_NEW_MEM_STACK_16);
3006    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3007       /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
3008       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3009       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3010    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3011       /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
3012          Hence do 4 at +0, 8 at +4, 4 at +12. */
3013       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3014       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4  );
3015       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3016    } else {
3017       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
3018    }
3019 }
3020 
3021 MAYBE_USED
mc_die_mem_stack_16(Addr new_SP)3022 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
3023 {
3024    PROF_EVENT(MCPE_DIE_MEM_STACK_16);
3025    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3026       /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
3027       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3028       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8  );
3029    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3030       /* 8 alignment must be at -12.  Do 4 at -16, 8 at -12, 4 at -4. */
3031       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3032       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3033       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
3034    } else {
3035       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
3036    }
3037 }
3038 
3039 /*--------------- adjustment by 32 bytes ---------------*/
3040 
3041 MAYBE_USED
mc_new_mem_stack_32_w_ECU(Addr new_SP,UInt ecu)3042 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
3043 {
3044    UInt otag = ecu | MC_OKIND_STACK;
3045    PROF_EVENT(MCPE_NEW_MEM_STACK_32);
3046    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3047       /* Straightforward */
3048       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
3049       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3050       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3051       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3052    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3053       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
3054          +0,+28. */
3055       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
3056       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
3057       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
3058       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
3059       make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
3060    } else {
3061       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
3062    }
3063 }
3064 
3065 MAYBE_USED
mc_new_mem_stack_32(Addr new_SP)3066 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
3067 {
3068    PROF_EVENT(MCPE_NEW_MEM_STACK_32);
3069    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3070       /* Straightforward */
3071       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3072       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3073       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3074       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3075    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3076       /* 8 alignment must be at +4.  Hence do 8 at +4,+12,+20 and 4 at
3077          +0,+28. */
3078       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3079       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
3080       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
3081       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
3082       make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
3083    } else {
3084       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
3085    }
3086 }
3087 
3088 MAYBE_USED
mc_die_mem_stack_32(Addr new_SP)3089 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
3090 {
3091    PROF_EVENT(MCPE_DIE_MEM_STACK_32);
3092    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3093       /* Straightforward */
3094       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3095       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3096       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3097       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3098    } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3099       /* 8 alignment must be at -4 etc.  Hence do 8 at -12,-20,-28 and
3100          4 at -32,-4. */
3101       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3102       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
3103       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
3104       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
3105       make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4  );
3106    } else {
3107       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
3108    }
3109 }
3110 
3111 /*--------------- adjustment by 112 bytes ---------------*/
3112 
3113 MAYBE_USED
mc_new_mem_stack_112_w_ECU(Addr new_SP,UInt ecu)3114 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
3115 {
3116    UInt otag = ecu | MC_OKIND_STACK;
3117    PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3118    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3119       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
3120       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3121       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3122       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3123       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3124       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3125       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3126       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3127       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3128       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3129       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3130       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3131       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3132       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3133    } else {
3134       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
3135    }
3136 }
3137 
3138 MAYBE_USED
mc_new_mem_stack_112(Addr new_SP)3139 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
3140 {
3141    PROF_EVENT(MCPE_NEW_MEM_STACK_112);
3142    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3143       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3144       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3145       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3146       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3147       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3148       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3149       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3150       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3151       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3152       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3153       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3154       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3155       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3156       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3157    } else {
3158       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
3159    }
3160 }
3161 
3162 MAYBE_USED
mc_die_mem_stack_112(Addr new_SP)3163 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
3164 {
3165    PROF_EVENT(MCPE_DIE_MEM_STACK_112);
3166    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3167       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3168       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3169       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3170       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3171       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3172       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3173       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3174       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3175       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3176       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3177       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3178       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3179       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3180       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3181    } else {
3182       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
3183    }
3184 }
3185 
3186 /*--------------- adjustment by 128 bytes ---------------*/
3187 
3188 MAYBE_USED
mc_new_mem_stack_128_w_ECU(Addr new_SP,UInt ecu)3189 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
3190 {
3191    UInt otag = ecu | MC_OKIND_STACK;
3192    PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3193    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3194       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP   , otag );
3195       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
3196       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3197       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3198       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3199       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3200       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3201       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3202       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3203       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3204       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3205       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3206       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3207       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3208       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3209       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3210    } else {
3211       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
3212    }
3213 }
3214 
3215 MAYBE_USED
mc_new_mem_stack_128(Addr new_SP)3216 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
3217 {
3218    PROF_EVENT(MCPE_NEW_MEM_STACK_128);
3219    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3220       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3221       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3222       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3223       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3224       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3225       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3226       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3227       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3228       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3229       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3230       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3231       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3232       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3233       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3234       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3235       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3236    } else {
3237       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
3238    }
3239 }
3240 
3241 MAYBE_USED
mc_die_mem_stack_128(Addr new_SP)3242 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
3243 {
3244    PROF_EVENT(MCPE_DIE_MEM_STACK_128);
3245    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3246       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3247       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3248       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3249       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3250       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3251       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3252       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3253       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3254       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3255       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3256       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3257       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3258       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3259       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3260       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3261       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3262    } else {
3263       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
3264    }
3265 }
3266 
3267 /*--------------- adjustment by 144 bytes ---------------*/
3268 
3269 MAYBE_USED
mc_new_mem_stack_144_w_ECU(Addr new_SP,UInt ecu)3270 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
3271 {
3272    UInt otag = ecu | MC_OKIND_STACK;
3273    PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3274    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3275       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
3276       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
3277       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
3278       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
3279       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
3280       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
3281       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
3282       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
3283       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
3284       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
3285       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
3286       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
3287       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
3288       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3289       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3290       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3291       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3292       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3293    } else {
3294       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3295    }
3296 }
3297 
3298 MAYBE_USED
mc_new_mem_stack_144(Addr new_SP)3299 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3300 {
3301    PROF_EVENT(MCPE_NEW_MEM_STACK_144);
3302    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3303       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3304       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3305       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3306       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3307       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3308       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3309       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3310       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3311       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3312       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3313       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3314       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3315       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3316       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3317       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3318       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3319       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3320       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3321    } else {
3322       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3323    }
3324 }
3325 
3326 MAYBE_USED
mc_die_mem_stack_144(Addr new_SP)3327 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3328 {
3329    PROF_EVENT(MCPE_DIE_MEM_STACK_144);
3330    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3331       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3332       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3333       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3334       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3335       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3336       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3337       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3338       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3339       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3340       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3341       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3342       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3343       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3344       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3345       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3346       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3347       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3348       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3349    } else {
3350       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3351    }
3352 }
3353 
3354 /*--------------- adjustment by 160 bytes ---------------*/
3355 
3356 MAYBE_USED
mc_new_mem_stack_160_w_ECU(Addr new_SP,UInt ecu)3357 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3358 {
3359    UInt otag = ecu | MC_OKIND_STACK;
3360    PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3361    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3362       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP,     otag );
3363       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8,   otag );
3364       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16,  otag );
3365       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24,  otag );
3366       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32,  otag );
3367       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40,  otag );
3368       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48,  otag );
3369       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56,  otag );
3370       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64,  otag );
3371       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72,  otag );
3372       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80,  otag );
3373       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88,  otag );
3374       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96,  otag );
3375       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3376       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3377       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3378       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3379       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3380       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3381       make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3382    } else {
3383       MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3384    }
3385 }
3386 
3387 MAYBE_USED
mc_new_mem_stack_160(Addr new_SP)3388 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3389 {
3390    PROF_EVENT(MCPE_NEW_MEM_STACK_160);
3391    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3392       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3393       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3394       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3395       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3396       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3397       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3398       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3399       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3400       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3401       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3402       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3403       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3404       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3405       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3406       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3407       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3408       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3409       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3410       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3411       make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3412    } else {
3413       make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3414    }
3415 }
3416 
3417 MAYBE_USED
mc_die_mem_stack_160(Addr new_SP)3418 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3419 {
3420    PROF_EVENT(MCPE_DIE_MEM_STACK_160);
3421    if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3422       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3423       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3424       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3425       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3426       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3427       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3428       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3429       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3430       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3431       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3432       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3433       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3434       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3435       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3436       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3437       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3438       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3439       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3440       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3441       make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3442    } else {
3443       MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3444    }
3445 }
3446 
3447 /*--------------- adjustment by N bytes ---------------*/
3448 
mc_new_mem_stack_w_ECU(Addr a,SizeT len,UInt ecu)3449 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3450 {
3451    UInt otag = ecu | MC_OKIND_STACK;
3452    PROF_EVENT(MCPE_NEW_MEM_STACK);
3453    MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3454 }
3455 
mc_new_mem_stack(Addr a,SizeT len)3456 static void mc_new_mem_stack ( Addr a, SizeT len )
3457 {
3458    PROF_EVENT(MCPE_NEW_MEM_STACK);
3459    make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3460 }
3461 
mc_die_mem_stack(Addr a,SizeT len)3462 static void mc_die_mem_stack ( Addr a, SizeT len )
3463 {
3464    PROF_EVENT(MCPE_DIE_MEM_STACK);
3465    MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3466 }
3467 
3468 
3469 /* The AMD64 ABI says:
3470 
3471    "The 128-byte area beyond the location pointed to by %rsp is considered
3472     to be reserved and shall not be modified by signal or interrupt
3473     handlers.  Therefore, functions may use this area for temporary data
3474     that is not needed across function calls.  In particular, leaf functions
3475     may use this area for their entire stack frame, rather than adjusting
3476     the stack pointer in the prologue and epilogue.  This area is known as
3477     red zone [sic]."
3478 
3479    So after any call or return we need to mark this redzone as containing
3480    undefined values.
3481 
3482    Consider this:  we're in function f.  f calls g.  g moves rsp down
3483    modestly (say 16 bytes) and writes stuff all over the red zone, making it
3484    defined.  g returns.  f is buggy and reads from parts of the red zone
3485    that it didn't write on.  But because g filled that area in, f is going
3486    to be picking up defined V bits and so any errors from reading bits of
3487    the red zone it didn't write, will be missed.  The only solution I could
3488    think of was to make the red zone undefined when g returns to f.
3489 
3490    This is in accordance with the ABI, which makes it clear the redzone
3491    is volatile across function calls.
3492 
3493    The problem occurs the other way round too: f could fill the RZ up
3494    with defined values and g could mistakenly read them.  So the RZ
3495    also needs to be nuked on function calls.
3496 */
3497 
3498 
3499 /* Here's a simple cache to hold nia -> ECU mappings.  It could be
3500    improved so as to have a lower miss rate. */
3501 
3502 static UWord stats__nia_cache_queries = 0;
3503 static UWord stats__nia_cache_misses  = 0;
3504 
3505 typedef
3506    struct { UWord nia0; UWord ecu0;   /* nia0 maps to ecu0 */
3507             UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3508    WCacheEnt;
3509 
3510 #define N_NIA_TO_ECU_CACHE 511
3511 
3512 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3513 
init_nia_to_ecu_cache(void)3514 static void init_nia_to_ecu_cache ( void )
3515 {
3516    UWord       i;
3517    Addr        zero_addr = 0;
3518    ExeContext* zero_ec;
3519    UInt        zero_ecu;
3520    /* Fill all the slots with an entry for address zero, and the
3521       relevant otags accordingly.  Hence the cache is initially filled
3522       with valid data. */
3523    zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3524    tl_assert(zero_ec);
3525    zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3526    tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3527    for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3528       nia_to_ecu_cache[i].nia0 = zero_addr;
3529       nia_to_ecu_cache[i].ecu0 = zero_ecu;
3530       nia_to_ecu_cache[i].nia1 = zero_addr;
3531       nia_to_ecu_cache[i].ecu1 = zero_ecu;
3532    }
3533 }
3534 
convert_nia_to_ecu(Addr nia)3535 static inline UInt convert_nia_to_ecu ( Addr nia )
3536 {
3537    UWord i;
3538    UInt        ecu;
3539    ExeContext* ec;
3540 
3541    tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3542 
3543    stats__nia_cache_queries++;
3544    i = nia % N_NIA_TO_ECU_CACHE;
3545    tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3546 
3547    if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3548       return nia_to_ecu_cache[i].ecu0;
3549 
3550    if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3551 #     define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3552       SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3553       SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3554 #     undef SWAP
3555       return nia_to_ecu_cache[i].ecu0;
3556    }
3557 
3558    stats__nia_cache_misses++;
3559    ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3560    tl_assert(ec);
3561    ecu = VG_(get_ECU_from_ExeContext)(ec);
3562    tl_assert(VG_(is_plausible_ECU)(ecu));
3563 
3564    nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3565    nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3566 
3567    nia_to_ecu_cache[i].nia0 = nia;
3568    nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3569    return ecu;
3570 }
3571 
3572 
3573 /* This marks the stack as addressible but undefined, after a call or
3574    return for a target that has an ABI defined stack redzone.  It
3575    happens quite a lot and needs to be fast.  This is the version for
3576    origin tracking.  The non-origin-tracking version is below. */
3577 VG_REGPARM(3)
MC_(helperc_MAKE_STACK_UNINIT_w_o)3578 void MC_(helperc_MAKE_STACK_UNINIT_w_o) ( Addr base, UWord len, Addr nia )
3579 {
3580    PROF_EVENT(MCPE_MAKE_STACK_UNINIT_W_O);
3581    if (0)
3582       VG_(printf)("helperc_MAKE_STACK_UNINIT_w_o (%#lx,%lu,nia=%#lx)\n",
3583                   base, len, nia );
3584 
3585    UInt ecu = convert_nia_to_ecu ( nia );
3586    tl_assert(VG_(is_plausible_ECU)(ecu));
3587 
3588    UInt otag = ecu | MC_OKIND_STACK;
3589 
3590 #  if 0
3591    /* Slow(ish) version, which is fairly easily seen to be correct.
3592    */
3593    if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3594       make_aligned_word64_undefined_w_otag(base +   0, otag);
3595       make_aligned_word64_undefined_w_otag(base +   8, otag);
3596       make_aligned_word64_undefined_w_otag(base +  16, otag);
3597       make_aligned_word64_undefined_w_otag(base +  24, otag);
3598 
3599       make_aligned_word64_undefined_w_otag(base +  32, otag);
3600       make_aligned_word64_undefined_w_otag(base +  40, otag);
3601       make_aligned_word64_undefined_w_otag(base +  48, otag);
3602       make_aligned_word64_undefined_w_otag(base +  56, otag);
3603 
3604       make_aligned_word64_undefined_w_otag(base +  64, otag);
3605       make_aligned_word64_undefined_w_otag(base +  72, otag);
3606       make_aligned_word64_undefined_w_otag(base +  80, otag);
3607       make_aligned_word64_undefined_w_otag(base +  88, otag);
3608 
3609       make_aligned_word64_undefined_w_otag(base +  96, otag);
3610       make_aligned_word64_undefined_w_otag(base + 104, otag);
3611       make_aligned_word64_undefined_w_otag(base + 112, otag);
3612       make_aligned_word64_undefined_w_otag(base + 120, otag);
3613    } else {
3614       MC_(make_mem_undefined_w_otag)(base, len, otag);
3615    }
3616 #  endif
3617 
3618    /* Idea is: go fast when
3619          * 8-aligned and length is 128
3620          * the sm is available in the main primary map
3621          * the address range falls entirely with a single secondary map
3622       If all those conditions hold, just update the V+A bits by writing
3623       directly into the vabits array.  (If the sm was distinguished, this
3624       will make a copy and then write to it.)
3625    */
3626    if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3627       /* Now we know the address range is suitably sized and aligned. */
3628       UWord a_lo = (UWord)(base);
3629       UWord a_hi = (UWord)(base + 128 - 1);
3630       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3631       if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
3632          /* Now we know the entire range is within the main primary map. */
3633          UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3634          UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3635          if (LIKELY(pm_off_lo == pm_off_hi)) {
3636            /* Now we know that the entire address range falls within a
3637               single secondary map, and that that secondary 'lives' in
3638               the main primary map. */
3639             SecMap* sm      = get_secmap_for_writing_low(a_lo);
3640             UWord   v_off16 = SM_OFF_16(a_lo);
3641             UShort* p       = &sm->vabits16[v_off16];
3642             p[ 0] = VA_BITS16_UNDEFINED;
3643             p[ 1] = VA_BITS16_UNDEFINED;
3644             p[ 2] = VA_BITS16_UNDEFINED;
3645             p[ 3] = VA_BITS16_UNDEFINED;
3646             p[ 4] = VA_BITS16_UNDEFINED;
3647             p[ 5] = VA_BITS16_UNDEFINED;
3648             p[ 6] = VA_BITS16_UNDEFINED;
3649             p[ 7] = VA_BITS16_UNDEFINED;
3650             p[ 8] = VA_BITS16_UNDEFINED;
3651             p[ 9] = VA_BITS16_UNDEFINED;
3652             p[10] = VA_BITS16_UNDEFINED;
3653             p[11] = VA_BITS16_UNDEFINED;
3654             p[12] = VA_BITS16_UNDEFINED;
3655             p[13] = VA_BITS16_UNDEFINED;
3656             p[14] = VA_BITS16_UNDEFINED;
3657             p[15] = VA_BITS16_UNDEFINED;
3658             set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3659             set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3660             set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3661             set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3662             set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3663             set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3664             set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3665             set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3666             set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3667             set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3668             set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3669             set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3670             set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3671             set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3672             set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3673             set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3674             return;
3675          }
3676       }
3677    }
3678 
3679    /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3680    if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3681       /* Now we know the address range is suitably sized and aligned. */
3682       UWord a_lo = (UWord)(base);
3683       UWord a_hi = (UWord)(base + 288 - 1);
3684       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3685       if (a_hi <= MAX_PRIMARY_ADDRESS) {
3686          UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3687          UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3688          if (LIKELY(pm_off_lo == pm_off_hi)) {
3689            /* Now we know that the entire address range falls within a
3690               single secondary map, and that that secondary 'lives' in
3691               the main primary map. */
3692             SecMap* sm      = get_secmap_for_writing_low(a_lo);
3693             UWord   v_off16 = SM_OFF_16(a_lo);
3694             UShort* p       = &sm->vabits16[v_off16];
3695             p[ 0] = VA_BITS16_UNDEFINED;
3696             p[ 1] = VA_BITS16_UNDEFINED;
3697             p[ 2] = VA_BITS16_UNDEFINED;
3698             p[ 3] = VA_BITS16_UNDEFINED;
3699             p[ 4] = VA_BITS16_UNDEFINED;
3700             p[ 5] = VA_BITS16_UNDEFINED;
3701             p[ 6] = VA_BITS16_UNDEFINED;
3702             p[ 7] = VA_BITS16_UNDEFINED;
3703             p[ 8] = VA_BITS16_UNDEFINED;
3704             p[ 9] = VA_BITS16_UNDEFINED;
3705             p[10] = VA_BITS16_UNDEFINED;
3706             p[11] = VA_BITS16_UNDEFINED;
3707             p[12] = VA_BITS16_UNDEFINED;
3708             p[13] = VA_BITS16_UNDEFINED;
3709             p[14] = VA_BITS16_UNDEFINED;
3710             p[15] = VA_BITS16_UNDEFINED;
3711             p[16] = VA_BITS16_UNDEFINED;
3712             p[17] = VA_BITS16_UNDEFINED;
3713             p[18] = VA_BITS16_UNDEFINED;
3714             p[19] = VA_BITS16_UNDEFINED;
3715             p[20] = VA_BITS16_UNDEFINED;
3716             p[21] = VA_BITS16_UNDEFINED;
3717             p[22] = VA_BITS16_UNDEFINED;
3718             p[23] = VA_BITS16_UNDEFINED;
3719             p[24] = VA_BITS16_UNDEFINED;
3720             p[25] = VA_BITS16_UNDEFINED;
3721             p[26] = VA_BITS16_UNDEFINED;
3722             p[27] = VA_BITS16_UNDEFINED;
3723             p[28] = VA_BITS16_UNDEFINED;
3724             p[29] = VA_BITS16_UNDEFINED;
3725             p[30] = VA_BITS16_UNDEFINED;
3726             p[31] = VA_BITS16_UNDEFINED;
3727             p[32] = VA_BITS16_UNDEFINED;
3728             p[33] = VA_BITS16_UNDEFINED;
3729             p[34] = VA_BITS16_UNDEFINED;
3730             p[35] = VA_BITS16_UNDEFINED;
3731             set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3732             set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3733             set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3734             set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3735             set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3736             set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3737             set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3738             set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3739             set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3740             set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3741             set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3742             set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3743             set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3744             set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3745             set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3746             set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3747             set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3748             set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3749             set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3750             set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3751             set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3752             set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3753             set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3754             set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3755             set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3756             set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3757             set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3758             set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3759             set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3760             set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3761             set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3762             set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3763             set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3764             set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3765             set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3766             set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3767             return;
3768          }
3769       }
3770    }
3771 
3772    /* else fall into slow case */
3773    MC_(make_mem_undefined_w_otag)(base, len, otag);
3774 }
3775 
3776 
3777 /* This is a version of MC_(helperc_MAKE_STACK_UNINIT_w_o) that is
3778    specialised for the non-origin-tracking case. */
3779 VG_REGPARM(2)
MC_(helperc_MAKE_STACK_UNINIT_no_o)3780 void MC_(helperc_MAKE_STACK_UNINIT_no_o) ( Addr base, UWord len )
3781 {
3782    PROF_EVENT(MCPE_MAKE_STACK_UNINIT_NO_O);
3783    if (0)
3784       VG_(printf)("helperc_MAKE_STACK_UNINIT_no_o (%#lx,%lu)\n",
3785                   base, len );
3786 
3787 #  if 0
3788    /* Slow(ish) version, which is fairly easily seen to be correct.
3789    */
3790    if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3791       make_aligned_word64_undefined(base +   0);
3792       make_aligned_word64_undefined(base +   8);
3793       make_aligned_word64_undefined(base +  16);
3794       make_aligned_word64_undefined(base +  24);
3795 
3796       make_aligned_word64_undefined(base +  32);
3797       make_aligned_word64_undefined(base +  40);
3798       make_aligned_word64_undefined(base +  48);
3799       make_aligned_word64_undefined(base +  56);
3800 
3801       make_aligned_word64_undefined(base +  64);
3802       make_aligned_word64_undefined(base +  72);
3803       make_aligned_word64_undefined(base +  80);
3804       make_aligned_word64_undefined(base +  88);
3805 
3806       make_aligned_word64_undefined(base +  96);
3807       make_aligned_word64_undefined(base + 104);
3808       make_aligned_word64_undefined(base + 112);
3809       make_aligned_word64_undefined(base + 120);
3810    } else {
3811       make_mem_undefined(base, len);
3812    }
3813 #  endif
3814 
3815    /* Idea is: go fast when
3816          * 8-aligned and length is 128
3817          * the sm is available in the main primary map
3818          * the address range falls entirely with a single secondary map
3819       If all those conditions hold, just update the V+A bits by writing
3820       directly into the vabits array.  (If the sm was distinguished, this
3821       will make a copy and then write to it.)
3822    */
3823    if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3824       /* Now we know the address range is suitably sized and aligned. */
3825       UWord a_lo = (UWord)(base);
3826       UWord a_hi = (UWord)(base + 128 - 1);
3827       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3828       if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
3829          /* Now we know the entire range is within the main primary map. */
3830          UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3831          UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3832          if (LIKELY(pm_off_lo == pm_off_hi)) {
3833            /* Now we know that the entire address range falls within a
3834               single secondary map, and that that secondary 'lives' in
3835               the main primary map. */
3836             SecMap* sm      = get_secmap_for_writing_low(a_lo);
3837             UWord   v_off16 = SM_OFF_16(a_lo);
3838             UShort* p       = &sm->vabits16[v_off16];
3839             p[ 0] = VA_BITS16_UNDEFINED;
3840             p[ 1] = VA_BITS16_UNDEFINED;
3841             p[ 2] = VA_BITS16_UNDEFINED;
3842             p[ 3] = VA_BITS16_UNDEFINED;
3843             p[ 4] = VA_BITS16_UNDEFINED;
3844             p[ 5] = VA_BITS16_UNDEFINED;
3845             p[ 6] = VA_BITS16_UNDEFINED;
3846             p[ 7] = VA_BITS16_UNDEFINED;
3847             p[ 8] = VA_BITS16_UNDEFINED;
3848             p[ 9] = VA_BITS16_UNDEFINED;
3849             p[10] = VA_BITS16_UNDEFINED;
3850             p[11] = VA_BITS16_UNDEFINED;
3851             p[12] = VA_BITS16_UNDEFINED;
3852             p[13] = VA_BITS16_UNDEFINED;
3853             p[14] = VA_BITS16_UNDEFINED;
3854             p[15] = VA_BITS16_UNDEFINED;
3855             return;
3856          }
3857       }
3858    }
3859 
3860    /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3861    if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3862       /* Now we know the address range is suitably sized and aligned. */
3863       UWord a_lo = (UWord)(base);
3864       UWord a_hi = (UWord)(base + 288 - 1);
3865       tl_assert(a_lo < a_hi);             // paranoia: detect overflow
3866       if (a_hi <= MAX_PRIMARY_ADDRESS) {
3867          UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3868          UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3869          if (LIKELY(pm_off_lo == pm_off_hi)) {
3870            /* Now we know that the entire address range falls within a
3871               single secondary map, and that that secondary 'lives' in
3872               the main primary map. */
3873             SecMap* sm      = get_secmap_for_writing_low(a_lo);
3874             UWord   v_off16 = SM_OFF_16(a_lo);
3875             UShort* p       = &sm->vabits16[v_off16];
3876             p[ 0] = VA_BITS16_UNDEFINED;
3877             p[ 1] = VA_BITS16_UNDEFINED;
3878             p[ 2] = VA_BITS16_UNDEFINED;
3879             p[ 3] = VA_BITS16_UNDEFINED;
3880             p[ 4] = VA_BITS16_UNDEFINED;
3881             p[ 5] = VA_BITS16_UNDEFINED;
3882             p[ 6] = VA_BITS16_UNDEFINED;
3883             p[ 7] = VA_BITS16_UNDEFINED;
3884             p[ 8] = VA_BITS16_UNDEFINED;
3885             p[ 9] = VA_BITS16_UNDEFINED;
3886             p[10] = VA_BITS16_UNDEFINED;
3887             p[11] = VA_BITS16_UNDEFINED;
3888             p[12] = VA_BITS16_UNDEFINED;
3889             p[13] = VA_BITS16_UNDEFINED;
3890             p[14] = VA_BITS16_UNDEFINED;
3891             p[15] = VA_BITS16_UNDEFINED;
3892             p[16] = VA_BITS16_UNDEFINED;
3893             p[17] = VA_BITS16_UNDEFINED;
3894             p[18] = VA_BITS16_UNDEFINED;
3895             p[19] = VA_BITS16_UNDEFINED;
3896             p[20] = VA_BITS16_UNDEFINED;
3897             p[21] = VA_BITS16_UNDEFINED;
3898             p[22] = VA_BITS16_UNDEFINED;
3899             p[23] = VA_BITS16_UNDEFINED;
3900             p[24] = VA_BITS16_UNDEFINED;
3901             p[25] = VA_BITS16_UNDEFINED;
3902             p[26] = VA_BITS16_UNDEFINED;
3903             p[27] = VA_BITS16_UNDEFINED;
3904             p[28] = VA_BITS16_UNDEFINED;
3905             p[29] = VA_BITS16_UNDEFINED;
3906             p[30] = VA_BITS16_UNDEFINED;
3907             p[31] = VA_BITS16_UNDEFINED;
3908             p[32] = VA_BITS16_UNDEFINED;
3909             p[33] = VA_BITS16_UNDEFINED;
3910             p[34] = VA_BITS16_UNDEFINED;
3911             p[35] = VA_BITS16_UNDEFINED;
3912             return;
3913          }
3914       }
3915    }
3916 
3917    /* else fall into slow case */
3918    make_mem_undefined(base, len);
3919 }
3920 
3921 
3922 /* And this is an even more specialised case, for the case where there
3923    is no origin tracking, and the length is 128. */
3924 VG_REGPARM(1)
MC_(helperc_MAKE_STACK_UNINIT_128_no_o)3925 void MC_(helperc_MAKE_STACK_UNINIT_128_no_o) ( Addr base )
3926 {
3927    PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O);
3928    if (0)
3929       VG_(printf)("helperc_MAKE_STACK_UNINIT_128_no_o (%#lx)\n", base );
3930 
3931 #  if 0
3932    /* Slow(ish) version, which is fairly easily seen to be correct.
3933    */
3934    if (LIKELY( VG_IS_8_ALIGNED(base) )) {
3935       make_aligned_word64_undefined(base +   0);
3936       make_aligned_word64_undefined(base +   8);
3937       make_aligned_word64_undefined(base +  16);
3938       make_aligned_word64_undefined(base +  24);
3939 
3940       make_aligned_word64_undefined(base +  32);
3941       make_aligned_word64_undefined(base +  40);
3942       make_aligned_word64_undefined(base +  48);
3943       make_aligned_word64_undefined(base +  56);
3944 
3945       make_aligned_word64_undefined(base +  64);
3946       make_aligned_word64_undefined(base +  72);
3947       make_aligned_word64_undefined(base +  80);
3948       make_aligned_word64_undefined(base +  88);
3949 
3950       make_aligned_word64_undefined(base +  96);
3951       make_aligned_word64_undefined(base + 104);
3952       make_aligned_word64_undefined(base + 112);
3953       make_aligned_word64_undefined(base + 120);
3954    } else {
3955       make_mem_undefined(base, 128);
3956    }
3957 #  endif
3958 
3959    /* Idea is: go fast when
3960          * 16-aligned and length is 128
3961          * the sm is available in the main primary map
3962          * the address range falls entirely with a single secondary map
3963       If all those conditions hold, just update the V+A bits by writing
3964       directly into the vabits array.  (If the sm was distinguished, this
3965       will make a copy and then write to it.)
3966 
3967       Typically this applies to amd64 'ret' instructions, since RSP is
3968       16-aligned (0 % 16) after the instruction (per the amd64-ELF ABI).
3969    */
3970    if (LIKELY( VG_IS_16_ALIGNED(base) )) {
3971       /* Now we know the address range is suitably sized and aligned. */
3972       UWord a_lo = (UWord)(base);
3973       UWord a_hi = (UWord)(base + 128 - 1);
3974       /* FIXME: come up with a sane story on the wraparound case
3975          (which of course cnanot happen, but still..) */
3976       /* tl_assert(a_lo < a_hi); */            // paranoia: detect overflow
3977       if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
3978          /* Now we know the entire range is within the main primary map. */
3979          UWord pm_off_lo = get_primary_map_low_offset(a_lo);
3980          UWord pm_off_hi = get_primary_map_low_offset(a_hi);
3981          if (LIKELY(pm_off_lo == pm_off_hi)) {
3982            /* Now we know that the entire address range falls within a
3983               single secondary map, and that that secondary 'lives' in
3984               the main primary map. */
3985             PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16);
3986             SecMap* sm    = get_secmap_for_writing_low(a_lo);
3987             UWord   v_off = SM_OFF(a_lo);
3988             UInt*   w32   = ASSUME_ALIGNED(UInt*, &sm->vabits8[v_off]);
3989             w32[ 0] = VA_BITS32_UNDEFINED;
3990             w32[ 1] = VA_BITS32_UNDEFINED;
3991             w32[ 2] = VA_BITS32_UNDEFINED;
3992             w32[ 3] = VA_BITS32_UNDEFINED;
3993             w32[ 4] = VA_BITS32_UNDEFINED;
3994             w32[ 5] = VA_BITS32_UNDEFINED;
3995             w32[ 6] = VA_BITS32_UNDEFINED;
3996             w32[ 7] = VA_BITS32_UNDEFINED;
3997             return;
3998          }
3999       }
4000    }
4001 
4002    /* The same, but for when base is 8 % 16, which is the situation
4003       with RSP for amd64-ELF immediately after call instructions.
4004    */
4005    if (LIKELY( VG_IS_16_ALIGNED(base+8) )) { // restricts to 8 aligned
4006       /* Now we know the address range is suitably sized and aligned. */
4007       UWord a_lo = (UWord)(base);
4008       UWord a_hi = (UWord)(base + 128 - 1);
4009       /* FIXME: come up with a sane story on the wraparound case
4010          (which of course cnanot happen, but still..) */
4011       /* tl_assert(a_lo < a_hi); */            // paranoia: detect overflow
4012       if (LIKELY(a_hi <= MAX_PRIMARY_ADDRESS)) {
4013          /* Now we know the entire range is within the main primary map. */
4014          UWord pm_off_lo = get_primary_map_low_offset(a_lo);
4015          UWord pm_off_hi = get_primary_map_low_offset(a_hi);
4016          if (LIKELY(pm_off_lo == pm_off_hi)) {
4017             PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8);
4018            /* Now we know that the entire address range falls within a
4019               single secondary map, and that that secondary 'lives' in
4020               the main primary map. */
4021             SecMap* sm      = get_secmap_for_writing_low(a_lo);
4022             UWord   v_off16 = SM_OFF_16(a_lo);
4023             UShort* w16     = &sm->vabits16[v_off16];
4024             UInt*   w32     = ASSUME_ALIGNED(UInt*, &w16[1]);
4025             /* The following assertion is commented out for obvious
4026                performance reasons, but was verified as valid when
4027                running the entire testsuite and also Firefox. */
4028             /* tl_assert(VG_IS_4_ALIGNED(w32)); */
4029             w16[ 0] = VA_BITS16_UNDEFINED; // w16[0]
4030             w32[ 0] = VA_BITS32_UNDEFINED; // w16[1,2]
4031             w32[ 1] = VA_BITS32_UNDEFINED; // w16[3,4]
4032             w32[ 2] = VA_BITS32_UNDEFINED; // w16[5,6]
4033             w32[ 3] = VA_BITS32_UNDEFINED; // w16[7,8]
4034             w32[ 4] = VA_BITS32_UNDEFINED; // w16[9,10]
4035             w32[ 5] = VA_BITS32_UNDEFINED; // w16[11,12]
4036             w32[ 6] = VA_BITS32_UNDEFINED; // w16[13,14]
4037             w16[15] = VA_BITS16_UNDEFINED; // w16[15]
4038             return;
4039          }
4040       }
4041    }
4042 
4043    /* else fall into slow case */
4044    PROF_EVENT(MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE);
4045    make_mem_undefined(base, 128);
4046 }
4047 
4048 
4049 /*------------------------------------------------------------*/
4050 /*--- Checking memory                                      ---*/
4051 /*------------------------------------------------------------*/
4052 
4053 typedef
4054    enum {
4055       MC_Ok = 5,
4056       MC_AddrErr = 6,
4057       MC_ValueErr = 7
4058    }
4059    MC_ReadResult;
4060 
4061 
4062 /* Check permissions for address range.  If inadequate permissions
4063    exist, *bad_addr is set to the offending address, so the caller can
4064    know what it is. */
4065 
4066 /* Returns True if [a .. a+len) is not addressible.  Otherwise,
4067    returns False, and if bad_addr is non-NULL, sets *bad_addr to
4068    indicate the lowest failing address.  Functions below are
4069    similar. */
MC_(check_mem_is_noaccess)4070 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
4071 {
4072    SizeT i;
4073    UWord vabits2;
4074 
4075    PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS);
4076    for (i = 0; i < len; i++) {
4077       PROF_EVENT(MCPE_CHECK_MEM_IS_NOACCESS_LOOP);
4078       vabits2 = get_vabits2(a);
4079       if (VA_BITS2_NOACCESS != vabits2) {
4080          if (bad_addr != NULL) *bad_addr = a;
4081          return False;
4082       }
4083       a++;
4084    }
4085    return True;
4086 }
4087 
is_mem_addressable(Addr a,SizeT len,Addr * bad_addr)4088 static Bool is_mem_addressable ( Addr a, SizeT len,
4089                                  /*OUT*/Addr* bad_addr )
4090 {
4091    SizeT i;
4092    UWord vabits2;
4093 
4094    PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE);
4095    for (i = 0; i < len; i++) {
4096       PROF_EVENT(MCPE_IS_MEM_ADDRESSABLE_LOOP);
4097       vabits2 = get_vabits2(a);
4098       if (VA_BITS2_NOACCESS == vabits2) {
4099          if (bad_addr != NULL) *bad_addr = a;
4100          return False;
4101       }
4102       a++;
4103    }
4104    return True;
4105 }
4106 
is_mem_defined(Addr a,SizeT len,Addr * bad_addr,UInt * otag)4107 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
4108                                       /*OUT*/Addr* bad_addr,
4109                                       /*OUT*/UInt* otag )
4110 {
4111    SizeT i;
4112    UWord vabits2;
4113 
4114    PROF_EVENT(MCPE_IS_MEM_DEFINED);
4115    DEBUG("is_mem_defined\n");
4116 
4117    if (otag)     *otag = 0;
4118    if (bad_addr) *bad_addr = 0;
4119    for (i = 0; i < len; i++) {
4120       PROF_EVENT(MCPE_IS_MEM_DEFINED_LOOP);
4121       vabits2 = get_vabits2(a);
4122       if (VA_BITS2_DEFINED != vabits2) {
4123          // Error!  Nb: Report addressability errors in preference to
4124          // definedness errors.  And don't report definedeness errors unless
4125          // --undef-value-errors=yes.
4126          if (bad_addr) {
4127             *bad_addr = a;
4128          }
4129          if (VA_BITS2_NOACCESS == vabits2) {
4130             return MC_AddrErr;
4131          }
4132          if (MC_(clo_mc_level) >= 2) {
4133             if (otag && MC_(clo_mc_level) == 3) {
4134                *otag = MC_(helperc_b_load1)( a );
4135             }
4136             return MC_ValueErr;
4137          }
4138       }
4139       a++;
4140    }
4141    return MC_Ok;
4142 }
4143 
4144 
4145 /* Like is_mem_defined but doesn't give up at the first uninitialised
4146    byte -- the entire range is always checked.  This is important for
4147    detecting errors in the case where a checked range strays into
4148    invalid memory, but that fact is not detected by the ordinary
4149    is_mem_defined(), because of an undefined section that precedes the
4150    out of range section, possibly as a result of an alignment hole in
4151    the checked data.  This version always checks the entire range and
4152    can report both a definedness and an accessbility error, if
4153    necessary. */
is_mem_defined_comprehensive(Addr a,SizeT len,Bool * errorV,Addr * bad_addrV,UInt * otagV,Bool * errorA,Addr * bad_addrA)4154 static void is_mem_defined_comprehensive (
4155                Addr a, SizeT len,
4156                /*OUT*/Bool* errorV,    /* is there a definedness err? */
4157                /*OUT*/Addr* bad_addrV, /* if so where? */
4158                /*OUT*/UInt* otagV,     /* and what's its otag? */
4159                /*OUT*/Bool* errorA,    /* is there an addressability err? */
4160                /*OUT*/Addr* bad_addrA  /* if so where? */
4161             )
4162 {
4163    SizeT i;
4164    UWord vabits2;
4165    Bool  already_saw_errV = False;
4166 
4167    PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE);
4168    DEBUG("is_mem_defined_comprehensive\n");
4169 
4170    tl_assert(!(*errorV || *errorA));
4171 
4172    for (i = 0; i < len; i++) {
4173       PROF_EVENT(MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP);
4174       vabits2 = get_vabits2(a);
4175       switch (vabits2) {
4176          case VA_BITS2_DEFINED:
4177             a++;
4178             break;
4179          case VA_BITS2_UNDEFINED:
4180          case VA_BITS2_PARTDEFINED:
4181             if (!already_saw_errV) {
4182                *errorV    = True;
4183                *bad_addrV = a;
4184                if (MC_(clo_mc_level) == 3) {
4185                   *otagV = MC_(helperc_b_load1)( a );
4186                } else {
4187                   *otagV = 0;
4188                }
4189                already_saw_errV = True;
4190             }
4191             a++; /* keep going */
4192             break;
4193          case VA_BITS2_NOACCESS:
4194             *errorA    = True;
4195             *bad_addrA = a;
4196             return; /* give up now. */
4197          default:
4198             tl_assert(0);
4199       }
4200    }
4201 }
4202 
4203 
4204 /* Check a zero-terminated ascii string.  Tricky -- don't want to
4205    examine the actual bytes, to find the end, until we're sure it is
4206    safe to do so. */
4207 
mc_is_defined_asciiz(Addr a,Addr * bad_addr,UInt * otag)4208 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
4209 {
4210    UWord vabits2;
4211 
4212    PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ);
4213    DEBUG("mc_is_defined_asciiz\n");
4214 
4215    if (otag)     *otag = 0;
4216    if (bad_addr) *bad_addr = 0;
4217    while (True) {
4218       PROF_EVENT(MCPE_IS_DEFINED_ASCIIZ_LOOP);
4219       vabits2 = get_vabits2(a);
4220       if (VA_BITS2_DEFINED != vabits2) {
4221          // Error!  Nb: Report addressability errors in preference to
4222          // definedness errors.  And don't report definedeness errors unless
4223          // --undef-value-errors=yes.
4224          if (bad_addr) {
4225             *bad_addr = a;
4226          }
4227          if (VA_BITS2_NOACCESS == vabits2) {
4228             return MC_AddrErr;
4229          }
4230          if (MC_(clo_mc_level) >= 2) {
4231             if (otag && MC_(clo_mc_level) == 3) {
4232                *otag = MC_(helperc_b_load1)( a );
4233             }
4234             return MC_ValueErr;
4235          }
4236       }
4237       /* Ok, a is safe to read. */
4238       if (* ((UChar*)a) == 0) {
4239          return MC_Ok;
4240       }
4241       a++;
4242    }
4243 }
4244 
4245 
4246 /*------------------------------------------------------------*/
4247 /*--- Memory event handlers                                ---*/
4248 /*------------------------------------------------------------*/
4249 
4250 static
check_mem_is_addressable(CorePart part,ThreadId tid,const HChar * s,Addr base,SizeT size)4251 void check_mem_is_addressable ( CorePart part, ThreadId tid, const HChar* s,
4252                                 Addr base, SizeT size )
4253 {
4254    Addr bad_addr;
4255    Bool ok = is_mem_addressable ( base, size, &bad_addr );
4256 
4257    if (!ok) {
4258       switch (part) {
4259       case Vg_CoreSysCall:
4260          MC_(record_memparam_error) ( tid, bad_addr,
4261                                       /*isAddrErr*/True, s, 0/*otag*/ );
4262          break;
4263 
4264       case Vg_CoreSignal:
4265          MC_(record_core_mem_error)( tid, s );
4266          break;
4267 
4268       default:
4269          VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
4270       }
4271    }
4272 }
4273 
4274 static
check_mem_is_defined(CorePart part,ThreadId tid,const HChar * s,Addr base,SizeT size)4275 void check_mem_is_defined ( CorePart part, ThreadId tid, const HChar* s,
4276                             Addr base, SizeT size )
4277 {
4278    UInt otag = 0;
4279    Addr bad_addr;
4280    MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
4281 
4282    if (MC_Ok != res) {
4283       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
4284 
4285       switch (part) {
4286       case Vg_CoreSysCall:
4287          MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4288                                       isAddrErr ? 0 : otag );
4289          break;
4290 
4291       case Vg_CoreSysCallArgInMem:
4292          MC_(record_regparam_error) ( tid, s, otag );
4293          break;
4294 
4295       /* If we're being asked to jump to a silly address, record an error
4296          message before potentially crashing the entire system. */
4297       case Vg_CoreTranslate:
4298          MC_(record_jump_error)( tid, bad_addr );
4299          break;
4300 
4301       default:
4302          VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
4303       }
4304    }
4305 }
4306 
4307 static
check_mem_is_defined_asciiz(CorePart part,ThreadId tid,const HChar * s,Addr str)4308 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
4309                                    const HChar* s, Addr str )
4310 {
4311    MC_ReadResult res;
4312    Addr bad_addr = 0;   // shut GCC up
4313    UInt otag = 0;
4314 
4315    tl_assert(part == Vg_CoreSysCall);
4316    res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
4317    if (MC_Ok != res) {
4318       Bool isAddrErr = ( MC_AddrErr == res ? True : False );
4319       MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
4320                                    isAddrErr ? 0 : otag );
4321    }
4322 }
4323 
4324 /* Handling of mmap and mprotect is not as simple as it seems.
4325 
4326    The underlying semantics are that memory obtained from mmap is
4327    always initialised, but may be inaccessible.  And changes to the
4328    protection of memory do not change its contents and hence not its
4329    definedness state.  Problem is we can't model
4330    inaccessible-but-with-some-definedness state; once we mark memory
4331    as inaccessible we lose all info about definedness, and so can't
4332    restore that if it is later made accessible again.
4333 
4334    One obvious thing to do is this:
4335 
4336       mmap/mprotect NONE  -> noaccess
4337       mmap/mprotect other -> defined
4338 
4339    The problem case here is: taking accessible memory, writing
4340    uninitialised data to it, mprotecting it NONE and later mprotecting
4341    it back to some accessible state causes the undefinedness to be
4342    lost.
4343 
4344    A better proposal is:
4345 
4346      (1) mmap NONE       ->  make noaccess
4347      (2) mmap other      ->  make defined
4348 
4349      (3) mprotect NONE   ->  # no change
4350      (4) mprotect other  ->  change any "noaccess" to "defined"
4351 
4352    (2) is OK because memory newly obtained from mmap really is defined
4353        (zeroed out by the kernel -- doing anything else would
4354        constitute a massive security hole.)
4355 
4356    (1) is OK because the only way to make the memory usable is via
4357        (4), in which case we also wind up correctly marking it all as
4358        defined.
4359 
4360    (3) is the weak case.  We choose not to change memory state.
4361        (presumably the range is in some mixture of "defined" and
4362        "undefined", viz, accessible but with arbitrary V bits).  Doing
4363        nothing means we retain the V bits, so that if the memory is
4364        later mprotected "other", the V bits remain unchanged, so there
4365        can be no false negatives.  The bad effect is that if there's
4366        an access in the area, then MC cannot warn; but at least we'll
4367        get a SEGV to show, so it's better than nothing.
4368 
4369    Consider the sequence (3) followed by (4).  Any memory that was
4370    "defined" or "undefined" previously retains its state (as
4371    required).  Any memory that was "noaccess" before can only have
4372    been made that way by (1), and so it's OK to change it to
4373    "defined".
4374 
4375    See https://bugs.kde.org/show_bug.cgi?id=205541
4376    and https://bugs.kde.org/show_bug.cgi?id=210268
4377 */
4378 static
mc_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)4379 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
4380                        ULong di_handle )
4381 {
4382    if (rr || ww || xx) {
4383       /* (2) mmap/mprotect other -> defined */
4384       MC_(make_mem_defined)(a, len);
4385    } else {
4386       /* (1) mmap/mprotect NONE  -> noaccess */
4387       MC_(make_mem_noaccess)(a, len);
4388    }
4389 }
4390 
4391 static
mc_new_mem_mprotect(Addr a,SizeT len,Bool rr,Bool ww,Bool xx)4392 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
4393 {
4394    if (rr || ww || xx) {
4395       /* (4) mprotect other  ->  change any "noaccess" to "defined" */
4396       make_mem_defined_if_noaccess(a, len);
4397    } else {
4398       /* (3) mprotect NONE   ->  # no change */
4399       /* do nothing */
4400    }
4401 }
4402 
4403 
4404 static
mc_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)4405 void mc_new_mem_startup( Addr a, SizeT len,
4406                          Bool rr, Bool ww, Bool xx, ULong di_handle )
4407 {
4408    // Because code is defined, initialised variables get put in the data
4409    // segment and are defined, and uninitialised variables get put in the
4410    // bss segment and are auto-zeroed (and so defined).
4411    //
4412    // It's possible that there will be padding between global variables.
4413    // This will also be auto-zeroed, and marked as defined by Memcheck.  If
4414    // a program uses it, Memcheck will not complain.  This is arguably a
4415    // false negative, but it's a grey area -- the behaviour is defined (the
4416    // padding is zeroed) but it's probably not what the user intended.  And
4417    // we can't avoid it.
4418    //
4419    // Note: we generally ignore RWX permissions, because we can't track them
4420    // without requiring more than one A bit which would slow things down a
4421    // lot.  But on Darwin the 0th page is mapped but !R and !W and !X.
4422    // So we mark any such pages as "unaddressable".
4423    DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
4424          a, (ULong)len, rr, ww, xx);
4425    mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
4426 }
4427 
4428 static
mc_post_mem_write(CorePart part,ThreadId tid,Addr a,SizeT len)4429 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
4430 {
4431    MC_(make_mem_defined)(a, len);
4432 }
4433 
4434 
4435 /*------------------------------------------------------------*/
4436 /*--- Register event handlers                              ---*/
4437 /*------------------------------------------------------------*/
4438 
4439 /* Try and get a nonzero origin for the guest state section of thread
4440    tid characterised by (offset,size).  Return 0 if nothing to show
4441    for it. */
mb_get_origin_for_guest_offset(ThreadId tid,Int offset,SizeT size)4442 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
4443                                              Int offset, SizeT size )
4444 {
4445    Int   sh2off;
4446    UInt  area[3];
4447    UInt  otag;
4448    sh2off = MC_(get_otrack_shadow_offset)( offset, size );
4449    if (sh2off == -1)
4450       return 0;  /* This piece of guest state is not tracked */
4451    tl_assert(sh2off >= 0);
4452    tl_assert(0 == (sh2off % 4));
4453    area[0] = 0x31313131;
4454    area[2] = 0x27272727;
4455    VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
4456    tl_assert(area[0] == 0x31313131);
4457    tl_assert(area[2] == 0x27272727);
4458    otag = area[1];
4459    return otag;
4460 }
4461 
4462 
4463 /* When some chunk of guest state is written, mark the corresponding
4464    shadow area as valid.  This is used to initialise arbitrarily large
4465    chunks of guest state, hence the _SIZE value, which has to be as
4466    big as the biggest guest state.
4467 */
mc_post_reg_write(CorePart part,ThreadId tid,PtrdiffT offset,SizeT size)4468 static void mc_post_reg_write ( CorePart part, ThreadId tid,
4469                                 PtrdiffT offset, SizeT size)
4470 {
4471 #  define MAX_REG_WRITE_SIZE 1728
4472    UChar area[MAX_REG_WRITE_SIZE];
4473    tl_assert(size <= MAX_REG_WRITE_SIZE);
4474    VG_(memset)(area, V_BITS8_DEFINED, size);
4475    VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
4476 #  undef MAX_REG_WRITE_SIZE
4477 }
4478 
4479 static
mc_post_reg_write_clientcall(ThreadId tid,PtrdiffT offset,SizeT size,Addr f)4480 void mc_post_reg_write_clientcall ( ThreadId tid,
4481                                     PtrdiffT offset, SizeT size, Addr f)
4482 {
4483    mc_post_reg_write(/*dummy*/0, tid, offset, size);
4484 }
4485 
4486 /* Look at the definedness of the guest's shadow state for
4487    [offset, offset+len).  If any part of that is undefined, record
4488    a parameter error.
4489 */
mc_pre_reg_read(CorePart part,ThreadId tid,const HChar * s,PtrdiffT offset,SizeT size)4490 static void mc_pre_reg_read ( CorePart part, ThreadId tid, const HChar* s,
4491                               PtrdiffT offset, SizeT size)
4492 {
4493    Int   i;
4494    Bool  bad;
4495    UInt  otag;
4496 
4497    UChar area[16];
4498    tl_assert(size <= 16);
4499 
4500    VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
4501 
4502    bad = False;
4503    for (i = 0; i < size; i++) {
4504       if (area[i] != V_BITS8_DEFINED) {
4505          bad = True;
4506          break;
4507       }
4508    }
4509 
4510    if (!bad)
4511       return;
4512 
4513    /* We've found some undefinedness.  See if we can also find an
4514       origin for it. */
4515    otag = mb_get_origin_for_guest_offset( tid, offset, size );
4516    MC_(record_regparam_error) ( tid, s, otag );
4517 }
4518 
4519 
4520 /*------------------------------------------------------------*/
4521 /*--- Register-memory event handlers                       ---*/
4522 /*------------------------------------------------------------*/
4523 
mc_copy_mem_to_reg(CorePart part,ThreadId tid,Addr a,PtrdiffT guest_state_offset,SizeT size)4524 static void mc_copy_mem_to_reg ( CorePart part, ThreadId tid, Addr a,
4525                                  PtrdiffT guest_state_offset, SizeT size )
4526 {
4527    SizeT i;
4528    UChar vbits8;
4529    Int offset;
4530    UInt d32;
4531 
4532    /* Slow loop. */
4533    for (i = 0; i < size; i++) {
4534       get_vbits8( a+i, &vbits8 );
4535       VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/, guest_state_offset+i,
4536                                  1, &vbits8 );
4537    }
4538 
4539    if (MC_(clo_mc_level) != 3)
4540       return;
4541 
4542    /* Track origins. */
4543    offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4544    if (offset == -1)
4545       return;
4546 
4547    switch (size) {
4548    case 1:
4549       d32 = MC_(helperc_b_load1)( a );
4550       break;
4551    case 2:
4552       d32 = MC_(helperc_b_load2)( a );
4553       break;
4554    case 4:
4555       d32 = MC_(helperc_b_load4)( a );
4556       break;
4557    case 8:
4558       d32 = MC_(helperc_b_load8)( a );
4559       break;
4560    case 16:
4561       d32 = MC_(helperc_b_load16)( a );
4562       break;
4563    case 32:
4564       d32 = MC_(helperc_b_load32)( a );
4565       break;
4566    default:
4567       tl_assert(0);
4568    }
4569 
4570    VG_(set_shadow_regs_area)( tid, 2/*shadowNo*/, offset, 4, (UChar*)&d32 );
4571 }
4572 
mc_copy_reg_to_mem(CorePart part,ThreadId tid,PtrdiffT guest_state_offset,Addr a,SizeT size)4573 static void mc_copy_reg_to_mem ( CorePart part, ThreadId tid,
4574                                  PtrdiffT guest_state_offset, Addr a,
4575                                  SizeT size )
4576 {
4577    SizeT i;
4578    UChar vbits8;
4579    Int offset;
4580    UInt d32;
4581 
4582    /* Slow loop. */
4583    for (i = 0; i < size; i++) {
4584       VG_(get_shadow_regs_area)( tid, &vbits8, 1/*shadowNo*/,
4585                                  guest_state_offset+i, 1 );
4586       set_vbits8( a+i, vbits8 );
4587    }
4588 
4589    if (MC_(clo_mc_level) != 3)
4590       return;
4591 
4592    /* Track origins. */
4593    offset = MC_(get_otrack_shadow_offset)( guest_state_offset, size );
4594    if (offset == -1)
4595       return;
4596 
4597    VG_(get_shadow_regs_area)( tid, (UChar*)&d32, 2/*shadowNo*/, offset, 4 );
4598    switch (size) {
4599    case 1:
4600       MC_(helperc_b_store1)( a, d32 );
4601       break;
4602    case 2:
4603       MC_(helperc_b_store2)( a, d32 );
4604       break;
4605    case 4:
4606       MC_(helperc_b_store4)( a, d32 );
4607       break;
4608    case 8:
4609       MC_(helperc_b_store8)( a, d32 );
4610       break;
4611    case 16:
4612       MC_(helperc_b_store16)( a, d32 );
4613       break;
4614    case 32:
4615       MC_(helperc_b_store32)( a, d32 );
4616       break;
4617    default:
4618       tl_assert(0);
4619    }
4620 }
4621 
4622 
4623 /*------------------------------------------------------------*/
4624 /*--- Some static assertions                               ---*/
4625 /*------------------------------------------------------------*/
4626 
4627 /* The handwritten assembly helpers below have baked-in assumptions
4628    about various constant values.  These assertions attempt to make
4629    that a bit safer by checking those values and flagging changes that
4630    would make the assembly invalid.  Not perfect but it's better than
4631    nothing. */
4632 
4633 STATIC_ASSERT(SM_CHUNKS * 4 == 65536);
4634 
4635 STATIC_ASSERT(VA_BITS8_DEFINED   == 0xAA);
4636 STATIC_ASSERT(VA_BITS8_UNDEFINED == 0x55);
4637 
4638 STATIC_ASSERT(V_BITS32_DEFINED   == 0x00000000);
4639 STATIC_ASSERT(V_BITS32_UNDEFINED == 0xFFFFFFFF);
4640 
4641 STATIC_ASSERT(VA_BITS4_DEFINED == 0xA);
4642 STATIC_ASSERT(VA_BITS4_UNDEFINED == 0x5);
4643 
4644 STATIC_ASSERT(V_BITS16_DEFINED == 0x0000);
4645 STATIC_ASSERT(V_BITS16_UNDEFINED == 0xFFFF);
4646 
4647 STATIC_ASSERT(VA_BITS2_DEFINED == 2);
4648 STATIC_ASSERT(VA_BITS2_UNDEFINED == 1);
4649 
4650 STATIC_ASSERT(V_BITS8_DEFINED == 0x00);
4651 STATIC_ASSERT(V_BITS8_UNDEFINED == 0xFF);
4652 
4653 
4654 /*------------------------------------------------------------*/
4655 /*--- Functions called directly from generated code:       ---*/
4656 /*--- Load/store handlers.                                 ---*/
4657 /*------------------------------------------------------------*/
4658 
4659 /* Types:  LOADV32, LOADV16, LOADV8 are:
4660                UWord fn ( Addr a )
4661    so they return 32-bits on 32-bit machines and 64-bits on
4662    64-bit machines.  Addr has the same size as a host word.
4663 
4664    LOADV64 is always  ULong fn ( Addr a )
4665 
4666    Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4667    are a UWord, and for STOREV64 they are a ULong.
4668 */
4669 
4670 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4671    naturally '_sz/8'-aligned, or it exceeds the range covered by the
4672    primary map.  This is all very tricky (and important!), so let's
4673    work through the maths by hand (below), *and* assert for these
4674    values at startup. */
4675 #define MASK(_szInBytes) \
4676    ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4677 
4678 /* MASK only exists so as to define this macro. */
4679 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4680    ((_a) & MASK((_szInBits>>3)))
4681 
4682 /* On a 32-bit machine:
4683 
4684    N_PRIMARY_BITS          == 16, so
4685    N_PRIMARY_MAP           == 0x10000, so
4686    N_PRIMARY_MAP-1         == 0xFFFF, so
4687    (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4688 
4689    MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4690            = ~ ( 0xFFFF | 0xFFFF0000 )
4691            = ~ 0xFFFF'FFFF
4692            = 0
4693 
4694    MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4695            = ~ ( 0xFFFE | 0xFFFF0000 )
4696            = ~ 0xFFFF'FFFE
4697            = 1
4698 
4699    MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4700            = ~ ( 0xFFFC | 0xFFFF0000 )
4701            = ~ 0xFFFF'FFFC
4702            = 3
4703 
4704    MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4705            = ~ ( 0xFFF8 | 0xFFFF0000 )
4706            = ~ 0xFFFF'FFF8
4707            = 7
4708 
4709    Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4710    precisely when a is not 1/2/4/8-bytes aligned.  And obviously, for
4711    the 1-byte alignment case, it is always a zero value, since MASK(1)
4712    is zero.  All as expected.
4713 
4714    On a 64-bit machine, it's more complex, since we're testing
4715    simultaneously for misalignment and for the address being at or
4716    above 64G:
4717 
4718    N_PRIMARY_BITS          == 20, so
4719    N_PRIMARY_MAP           == 0x100000, so
4720    N_PRIMARY_MAP-1         == 0xFFFFF, so
4721    (N_PRIMARY_MAP-1) << 16 == 0xF'FFFF'0000, and so
4722 
4723    MASK(1) = ~ ( (0x10000 - 1) | 0xF'FFFF'0000 )
4724            = ~ ( 0xFFFF | 0xF'FFFF'0000 )
4725            = ~ 0xF'FFFF'FFFF
4726            = 0xFFFF'FFF0'0000'0000
4727 
4728    MASK(2) = ~ ( (0x10000 - 2) | 0xF'FFFF'0000 )
4729            = ~ ( 0xFFFE | 0xF'FFFF'0000 )
4730            = ~ 0xF'FFFF'FFFE
4731            = 0xFFFF'FFF0'0000'0001
4732 
4733    MASK(4) = ~ ( (0x10000 - 4) | 0xF'FFFF'0000 )
4734            = ~ ( 0xFFFC | 0xF'FFFF'0000 )
4735            = ~ 0xF'FFFF'FFFC
4736            = 0xFFFF'FFF0'0000'0003
4737 
4738    MASK(8) = ~ ( (0x10000 - 8) | 0xF'FFFF'0000 )
4739            = ~ ( 0xFFF8 | 0xF'FFFF'0000 )
4740            = ~ 0xF'FFFF'FFF8
4741            = 0xFFFF'FFF0'0000'0007
4742 */
4743 
4744 /*------------------------------------------------------------*/
4745 /*--- LOADV256 and LOADV128                                ---*/
4746 /*------------------------------------------------------------*/
4747 
4748 static INLINE
mc_LOADV_128_or_256(ULong * res,Addr a,SizeT nBits,Bool isBigEndian)4749 void mc_LOADV_128_or_256 ( /*OUT*/ULong* res,
4750                            Addr a, SizeT nBits, Bool isBigEndian )
4751 {
4752    PROF_EVENT(MCPE_LOADV_128_OR_256);
4753 
4754 #ifndef PERF_FAST_LOADV
4755    mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4756    return;
4757 #else
4758    {
4759       UWord   sm_off16, vabits16, j;
4760       UWord   nBytes  = nBits / 8;
4761       UWord   nULongs = nBytes / 8;
4762       SecMap* sm;
4763 
4764       if (UNLIKELY( UNALIGNED_OR_HIGH(a,nBits) )) {
4765          PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW1);
4766          mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4767          return;
4768       }
4769 
4770       /* Handle common cases quickly: a (and a+8 and a+16 etc.) is
4771          suitably aligned, is mapped, and addressible. */
4772       for (j = 0; j < nULongs; j++) {
4773          sm       = get_secmap_for_reading_low(a + 8*j);
4774          sm_off16 = SM_OFF_16(a + 8*j);
4775          vabits16 = sm->vabits16[sm_off16];
4776 
4777          // Convert V bits from compact memory form to expanded
4778          // register form.
4779          if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4780             res[j] = V_BITS64_DEFINED;
4781          } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4782             res[j] = V_BITS64_UNDEFINED;
4783          } else {
4784             /* Slow case: some block of 8 bytes are not all-defined or
4785                all-undefined. */
4786             PROF_EVENT(MCPE_LOADV_128_OR_256_SLOW2);
4787             mc_LOADV_128_or_256_slow( res, a, nBits, isBigEndian );
4788             return;
4789          }
4790       }
4791       return;
4792    }
4793 #endif
4794 }
4795 
MC_(helperc_LOADV256be)4796 VG_REGPARM(2) void MC_(helperc_LOADV256be) ( /*OUT*/V256* res, Addr a )
4797 {
4798    mc_LOADV_128_or_256(&res->w64[0], a, 256, True);
4799 }
MC_(helperc_LOADV256le)4800 VG_REGPARM(2) void MC_(helperc_LOADV256le) ( /*OUT*/V256* res, Addr a )
4801 {
4802    mc_LOADV_128_or_256(&res->w64[0], a, 256, False);
4803 }
4804 
MC_(helperc_LOADV128be)4805 VG_REGPARM(2) void MC_(helperc_LOADV128be) ( /*OUT*/V128* res, Addr a )
4806 {
4807    mc_LOADV_128_or_256(&res->w64[0], a, 128, True);
4808 }
MC_(helperc_LOADV128le)4809 VG_REGPARM(2) void MC_(helperc_LOADV128le) ( /*OUT*/V128* res, Addr a )
4810 {
4811    mc_LOADV_128_or_256(&res->w64[0], a, 128, False);
4812 }
4813 
4814 /*------------------------------------------------------------*/
4815 /*--- LOADV64                                              ---*/
4816 /*------------------------------------------------------------*/
4817 
4818 static INLINE
mc_LOADV64(Addr a,Bool isBigEndian)4819 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4820 {
4821    PROF_EVENT(MCPE_LOADV64);
4822 
4823 #ifndef PERF_FAST_LOADV
4824    return mc_LOADVn_slow( a, 64, isBigEndian );
4825 #else
4826    {
4827       UWord   sm_off16, vabits16;
4828       SecMap* sm;
4829 
4830       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4831          PROF_EVENT(MCPE_LOADV64_SLOW1);
4832          return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4833       }
4834 
4835       sm       = get_secmap_for_reading_low(a);
4836       sm_off16 = SM_OFF_16(a);
4837       vabits16 = sm->vabits16[sm_off16];
4838 
4839       // Handle common case quickly: a is suitably aligned, is mapped, and
4840       // addressible.
4841       // Convert V bits from compact memory form to expanded register form.
4842       if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4843          return V_BITS64_DEFINED;
4844       } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4845          return V_BITS64_UNDEFINED;
4846       } else {
4847          /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4848          PROF_EVENT(MCPE_LOADV64_SLOW2);
4849          return mc_LOADVn_slow( a, 64, isBigEndian );
4850       }
4851    }
4852 #endif
4853 }
4854 
4855 // Generic for all platforms
MC_(helperc_LOADV64be)4856 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4857 {
4858    return mc_LOADV64(a, True);
4859 }
4860 
4861 // Non-generic assembly for arm32-linux
4862 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4863     && defined(VGP_arm_linux)
4864 __asm__( /* Derived from the 32 bit assembly helper */
4865 ".text                                  \n"
4866 ".align 2                               \n"
4867 ".global vgMemCheck_helperc_LOADV64le   \n"
4868 ".type   vgMemCheck_helperc_LOADV64le, %function \n"
4869 "vgMemCheck_helperc_LOADV64le:          \n"
4870 "      tst    r0, #7                    \n"
4871 "      movw   r3, #:lower16:primary_map \n"
4872 "      bne    .LLV64LEc4                \n" // if misaligned
4873 "      lsr    r2, r0, #16               \n"
4874 "      movt   r3, #:upper16:primary_map \n"
4875 "      ldr    r2, [r3, r2, lsl #2]      \n"
4876 "      uxth   r1, r0                    \n" // r1 is 0-(16)-0 X-(13)-X 000
4877 "      movw   r3, #0xAAAA               \n"
4878 "      lsr    r1, r1, #2                \n" // r1 is 0-(16)-0 00 X-(13)-X 0
4879 "      ldrh   r1, [r2, r1]              \n"
4880 "      cmp    r1, r3                    \n" // 0xAAAA == VA_BITS16_DEFINED
4881 "      bne    .LLV64LEc0                \n" // if !all_defined
4882 "      mov    r1, #0x0                  \n" // 0x0 == V_BITS32_DEFINED
4883 "      mov    r0, #0x0                  \n" // 0x0 == V_BITS32_DEFINED
4884 "      bx     lr                        \n"
4885 ".LLV64LEc0:                            \n"
4886 "      movw   r3, #0x5555               \n"
4887 "      cmp    r1, r3                    \n" // 0x5555 == VA_BITS16_UNDEFINED
4888 "      bne    .LLV64LEc4                \n" // if !all_undefined
4889 "      mov    r1, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4890 "      mov    r0, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
4891 "      bx     lr                        \n"
4892 ".LLV64LEc4:                            \n"
4893 "      push   {r4, lr}                  \n"
4894 "      mov    r2, #0                    \n"
4895 "      mov    r1, #64                   \n"
4896 "      bl     mc_LOADVn_slow            \n"
4897 "      pop    {r4, pc}                  \n"
4898 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le \n"
4899 ".previous\n"
4900 );
4901 
4902 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
4903       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
4904 __asm__(
4905 ".text\n"
4906 ".align 16\n"
4907 ".global vgMemCheck_helperc_LOADV64le\n"
4908 ".type   vgMemCheck_helperc_LOADV64le, @function\n"
4909 "vgMemCheck_helperc_LOADV64le:\n"
4910 "      test   $0x7,  %eax\n"
4911 "      jne    .LLV64LE2\n"          /* jump if not aligned */
4912 "      mov    %eax,  %ecx\n"
4913 "      movzwl %ax,   %edx\n"
4914 "      shr    $0x10, %ecx\n"
4915 "      mov    primary_map(,%ecx,4), %ecx\n"
4916 "      shr    $0x3,  %edx\n"
4917 "      movzwl (%ecx,%edx,2), %edx\n"
4918 "      cmp    $0xaaaa, %edx\n"
4919 "      jne    .LLV64LE1\n"          /* jump if not all defined */
4920 "      xor    %eax, %eax\n"         /* return 0 in edx:eax */
4921 "      xor    %edx, %edx\n"
4922 "      ret\n"
4923 ".LLV64LE1:\n"
4924 "      cmp    $0x5555, %edx\n"
4925 "      jne    .LLV64LE2\n"         /* jump if not all undefined */
4926 "      or     $0xffffffff, %eax\n" /* else return all bits set in edx:eax */
4927 "      or     $0xffffffff, %edx\n"
4928 "      ret\n"
4929 ".LLV64LE2:\n"
4930 "      xor    %ecx,  %ecx\n"  /* tail call to mc_LOADVn_slow(a, 64, 0) */
4931 "      mov    $64,   %edx\n"
4932 "      jmp    mc_LOADVn_slow\n"
4933 ".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le\n"
4934 ".previous\n"
4935 );
4936 
4937 #else
4938 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
MC_(helperc_LOADV64le)4939 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4940 {
4941    return mc_LOADV64(a, False);
4942 }
4943 #endif
4944 
4945 /*------------------------------------------------------------*/
4946 /*--- STOREV64                                             ---*/
4947 /*------------------------------------------------------------*/
4948 
4949 static INLINE
mc_STOREV64(Addr a,ULong vbits64,Bool isBigEndian)4950 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4951 {
4952    PROF_EVENT(MCPE_STOREV64);
4953 
4954 #ifndef PERF_FAST_STOREV
4955    // XXX: this slow case seems to be marginally faster than the fast case!
4956    // Investigate further.
4957    mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4958 #else
4959    {
4960       UWord   sm_off16, vabits16;
4961       SecMap* sm;
4962 
4963       if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4964          PROF_EVENT(MCPE_STOREV64_SLOW1);
4965          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4966          return;
4967       }
4968 
4969       sm       = get_secmap_for_reading_low(a);
4970       sm_off16 = SM_OFF_16(a);
4971       vabits16 = sm->vabits16[sm_off16];
4972 
4973       // To understand the below cleverness, see the extensive comments
4974       // in MC_(helperc_STOREV8).
4975       if (LIKELY(V_BITS64_DEFINED == vbits64)) {
4976          if (LIKELY(vabits16 == (UShort)VA_BITS16_DEFINED)) {
4977             return;
4978          }
4979          if (!is_distinguished_sm(sm) && VA_BITS16_UNDEFINED == vabits16) {
4980             sm->vabits16[sm_off16] = VA_BITS16_DEFINED;
4981             return;
4982          }
4983          PROF_EVENT(MCPE_STOREV64_SLOW2);
4984          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4985          return;
4986       }
4987       if (V_BITS64_UNDEFINED == vbits64) {
4988          if (vabits16 == (UShort)VA_BITS16_UNDEFINED) {
4989             return;
4990          }
4991          if (!is_distinguished_sm(sm) && VA_BITS16_DEFINED == vabits16) {
4992             sm->vabits16[sm_off16] = VA_BITS16_UNDEFINED;
4993             return;
4994          }
4995          PROF_EVENT(MCPE_STOREV64_SLOW3);
4996          mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4997          return;
4998       }
4999 
5000       PROF_EVENT(MCPE_STOREV64_SLOW4);
5001       mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
5002    }
5003 #endif
5004 }
5005 
MC_(helperc_STOREV64be)5006 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
5007 {
5008    mc_STOREV64(a, vbits64, True);
5009 }
MC_(helperc_STOREV64le)5010 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
5011 {
5012    mc_STOREV64(a, vbits64, False);
5013 }
5014 
5015 /*------------------------------------------------------------*/
5016 /*--- LOADV32                                              ---*/
5017 /*------------------------------------------------------------*/
5018 
5019 static INLINE
mc_LOADV32(Addr a,Bool isBigEndian)5020 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
5021 {
5022    PROF_EVENT(MCPE_LOADV32);
5023 
5024 #ifndef PERF_FAST_LOADV
5025    return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5026 #else
5027    {
5028       UWord   sm_off, vabits8;
5029       SecMap* sm;
5030 
5031       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
5032          PROF_EVENT(MCPE_LOADV32_SLOW1);
5033          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5034       }
5035 
5036       sm      = get_secmap_for_reading_low(a);
5037       sm_off  = SM_OFF(a);
5038       vabits8 = sm->vabits8[sm_off];
5039 
5040       // Handle common case quickly: a is suitably aligned, is mapped, and the
5041       // entire word32 it lives in is addressible.
5042       // Convert V bits from compact memory form to expanded register form.
5043       // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
5044       // Almost certainly not necessary, but be paranoid.
5045       if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5046          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
5047       } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
5048          return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
5049       } else {
5050          /* Slow case: the 4 bytes are not all-defined or all-undefined. */
5051          PROF_EVENT(MCPE_LOADV32_SLOW2);
5052          return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
5053       }
5054    }
5055 #endif
5056 }
5057 
5058 // Generic for all platforms
MC_(helperc_LOADV32be)5059 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
5060 {
5061    return mc_LOADV32(a, True);
5062 }
5063 
5064 // Non-generic assembly for arm32-linux
5065 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5066     && defined(VGP_arm_linux)
5067 __asm__( /* Derived from NCode template */
5068 ".text                                  \n"
5069 ".align 2                               \n"
5070 ".global vgMemCheck_helperc_LOADV32le   \n"
5071 ".type   vgMemCheck_helperc_LOADV32le, %function \n"
5072 "vgMemCheck_helperc_LOADV32le:          \n"
5073 "      tst    r0, #3                    \n" // 1
5074 "      movw   r3, #:lower16:primary_map \n" // 1
5075 "      bne    .LLV32LEc4                \n" // 2  if misaligned
5076 "      lsr    r2, r0, #16               \n" // 3
5077 "      movt   r3, #:upper16:primary_map \n" // 3
5078 "      ldr    r2, [r3, r2, lsl #2]      \n" // 4
5079 "      uxth   r1, r0                    \n" // 4
5080 "      ldrb   r1, [r2, r1, lsr #2]      \n" // 5
5081 "      cmp    r1, #0xAA                 \n" // 6  0xAA == VA_BITS8_DEFINED
5082 "      bne    .LLV32LEc0                \n" // 7  if !all_defined
5083 "      mov    r0, #0x0                  \n" // 8  0x0 == V_BITS32_DEFINED
5084 "      bx     lr                        \n" // 9
5085 ".LLV32LEc0:                            \n"
5086 "      cmp    r1, #0x55                 \n" // 0x55 == VA_BITS8_UNDEFINED
5087 "      bne    .LLV32LEc4                \n" // if !all_undefined
5088 "      mov    r0, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
5089 "      bx     lr                        \n"
5090 ".LLV32LEc4:                            \n"
5091 "      push   {r4, lr}                  \n"
5092 "      mov    r2, #0                    \n"
5093 "      mov    r1, #32                   \n"
5094 "      bl     mc_LOADVn_slow            \n"
5095 "      pop    {r4, pc}                  \n"
5096 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le \n"
5097 ".previous\n"
5098 );
5099 
5100 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5101       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5102 __asm__(
5103 ".text\n"
5104 ".align 16\n"
5105 ".global vgMemCheck_helperc_LOADV32le\n"
5106 ".type   vgMemCheck_helperc_LOADV32le, @function\n"
5107 "vgMemCheck_helperc_LOADV32le:\n"
5108 "      test   $0x3,  %eax\n"
5109 "      jnz    .LLV32LE2\n"         /* jump if misaligned */
5110 "      mov    %eax,  %edx\n"
5111 "      shr    $16,   %edx\n"
5112 "      mov    primary_map(,%edx,4), %ecx\n"
5113 "      movzwl %ax,   %edx\n"
5114 "      shr    $2,    %edx\n"
5115 "      movzbl (%ecx,%edx,1), %edx\n"
5116 "      cmp    $0xaa, %edx\n"       /* compare to VA_BITS8_DEFINED */
5117 "      jne    .LLV32LE1\n"         /* jump if not completely defined */
5118 "      xor    %eax,  %eax\n"       /* else return V_BITS32_DEFINED */
5119 "      ret\n"
5120 ".LLV32LE1:\n"
5121 "      cmp    $0x55, %edx\n"       /* compare to VA_BITS8_UNDEFINED */
5122 "      jne    .LLV32LE2\n"         /* jump if not completely undefined */
5123 "      or     $0xffffffff, %eax\n" /* else return V_BITS32_UNDEFINED */
5124 "      ret\n"
5125 ".LLV32LE2:\n"
5126 "      xor    %ecx,  %ecx\n"       /* tail call mc_LOADVn_slow(a, 32, 0) */
5127 "      mov    $32,   %edx\n"
5128 "      jmp    mc_LOADVn_slow\n"
5129 ".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le\n"
5130 ".previous\n"
5131 );
5132 
5133 #else
5134 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
MC_(helperc_LOADV32le)5135 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
5136 {
5137    return mc_LOADV32(a, False);
5138 }
5139 #endif
5140 
5141 /*------------------------------------------------------------*/
5142 /*--- STOREV32                                             ---*/
5143 /*------------------------------------------------------------*/
5144 
5145 static INLINE
mc_STOREV32(Addr a,UWord vbits32,Bool isBigEndian)5146 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
5147 {
5148    PROF_EVENT(MCPE_STOREV32);
5149 
5150 #ifndef PERF_FAST_STOREV
5151    mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5152 #else
5153    {
5154       UWord   sm_off, vabits8;
5155       SecMap* sm;
5156 
5157       if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
5158          PROF_EVENT(MCPE_STOREV32_SLOW1);
5159          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5160          return;
5161       }
5162 
5163       sm      = get_secmap_for_reading_low(a);
5164       sm_off  = SM_OFF(a);
5165       vabits8 = sm->vabits8[sm_off];
5166 
5167       // To understand the below cleverness, see the extensive comments
5168       // in MC_(helperc_STOREV8).
5169       if (LIKELY(V_BITS32_DEFINED == vbits32)) {
5170          if (LIKELY(vabits8 == (UInt)VA_BITS8_DEFINED)) {
5171             return;
5172          }
5173          if (!is_distinguished_sm(sm)  && VA_BITS8_UNDEFINED == vabits8) {
5174             sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
5175             return;
5176          }
5177          PROF_EVENT(MCPE_STOREV32_SLOW2);
5178          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5179          return;
5180       }
5181       if (V_BITS32_UNDEFINED == vbits32) {
5182          if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
5183             return;
5184          }
5185          if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
5186             sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
5187             return;
5188          }
5189          PROF_EVENT(MCPE_STOREV32_SLOW3);
5190          mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5191          return;
5192       }
5193 
5194       PROF_EVENT(MCPE_STOREV32_SLOW4);
5195       mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
5196    }
5197 #endif
5198 }
5199 
MC_(helperc_STOREV32be)5200 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
5201 {
5202    mc_STOREV32(a, vbits32, True);
5203 }
MC_(helperc_STOREV32le)5204 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
5205 {
5206    mc_STOREV32(a, vbits32, False);
5207 }
5208 
5209 /*------------------------------------------------------------*/
5210 /*--- LOADV16                                              ---*/
5211 /*------------------------------------------------------------*/
5212 
5213 static INLINE
mc_LOADV16(Addr a,Bool isBigEndian)5214 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
5215 {
5216    PROF_EVENT(MCPE_LOADV16);
5217 
5218 #ifndef PERF_FAST_LOADV
5219    return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5220 #else
5221    {
5222       UWord   sm_off, vabits8;
5223       SecMap* sm;
5224 
5225       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
5226          PROF_EVENT(MCPE_LOADV16_SLOW1);
5227          return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5228       }
5229 
5230       sm      = get_secmap_for_reading_low(a);
5231       sm_off  = SM_OFF(a);
5232       vabits8 = sm->vabits8[sm_off];
5233       // Handle common case quickly: a is suitably aligned, is mapped, and is
5234       // addressible.
5235       // Convert V bits from compact memory form to expanded register form
5236       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS16_DEFINED;   }
5237       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
5238       else {
5239          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5240          // the two sub-bytes.
5241          UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
5242          if      (vabits4 == VA_BITS4_DEFINED  ) { return V_BITS16_DEFINED;   }
5243          else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
5244          else {
5245             /* Slow case: the two bytes are not all-defined or all-undefined. */
5246             PROF_EVENT(MCPE_LOADV16_SLOW2);
5247             return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
5248          }
5249       }
5250    }
5251 #endif
5252 }
5253 
5254 // Generic for all platforms
MC_(helperc_LOADV16be)5255 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
5256 {
5257    return mc_LOADV16(a, True);
5258 }
5259 
5260 // Non-generic assembly for arm32-linux
5261 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5262     && defined(VGP_arm_linux)
5263 __asm__( /* Derived from NCode template */
5264 ".text                                  \n"
5265 ".align 2                               \n"
5266 ".global vgMemCheck_helperc_LOADV16le   \n"
5267 ".type   vgMemCheck_helperc_LOADV16le, %function \n"
5268 "vgMemCheck_helperc_LOADV16le:          \n" //
5269 "      tst    r0, #1                    \n" //
5270 "      bne    .LLV16LEc12               \n" // if misaligned
5271 "      lsr    r2, r0, #16               \n" // r2 = pri-map-ix
5272 "      movw   r3, #:lower16:primary_map \n" //
5273 "      uxth   r1, r0                    \n" // r1 = sec-map-offB
5274 "      movt   r3, #:upper16:primary_map \n" //
5275 "      ldr    r2, [r3, r2, lsl #2]      \n" // r2 = sec-map
5276 "      ldrb   r1, [r2, r1, lsr #2]      \n" // r1 = sec-map-VABITS8
5277 "      cmp    r1, #0xAA                 \n" // r1 == VA_BITS8_DEFINED?
5278 "      bne    .LLV16LEc0                \n" // no, goto .LLV16LEc0
5279 ".LLV16LEh9:                            \n" //
5280 "      mov    r0, #0xFFFFFFFF           \n" //
5281 "      lsl    r0, r0, #16               \n" // V_BITS16_DEFINED | top16safe
5282 "      bx     lr                        \n" //
5283 ".LLV16LEc0:                            \n" //
5284 "      cmp    r1, #0x55                 \n" // VA_BITS8_UNDEFINED
5285 "      bne    .LLV16LEc4                \n" //
5286 ".LLV16LEc2:                            \n" //
5287 "      mov    r0, #0xFFFFFFFF           \n" // V_BITS16_UNDEFINED | top16safe
5288 "      bx     lr                        \n" //
5289 ".LLV16LEc4:                            \n" //
5290        // r1 holds sec-map-VABITS8.  r0 holds the address and is 2-aligned.
5291        // Extract the relevant 4 bits and inspect.
5292 "      and    r2, r0, #2       \n" // addr & 2
5293 "      add    r2, r2, r2       \n" // 2 * (addr & 2)
5294 "      lsr    r1, r1, r2       \n" // sec-map-VABITS8 >> (2 * (addr & 2))
5295 "      and    r1, r1, #15      \n" // (sec-map-VABITS8 >> (2 * (addr & 2))) & 15
5296 
5297 "      cmp    r1, #0xA                  \n" // VA_BITS4_DEFINED
5298 "      beq    .LLV16LEh9                \n" //
5299 
5300 "      cmp    r1, #0x5                  \n" // VA_BITS4_UNDEFINED
5301 "      beq    .LLV16LEc2                \n" //
5302 
5303 ".LLV16LEc12:                           \n" //
5304 "      push   {r4, lr}                  \n" //
5305 "      mov    r2, #0                    \n" //
5306 "      mov    r1, #16                   \n" //
5307 "      bl     mc_LOADVn_slow            \n" //
5308 "      pop    {r4, pc}                  \n" //
5309 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5310 ".previous\n"
5311 );
5312 
5313 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5314       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5315 __asm__(
5316 ".text\n"
5317 ".align 16\n"
5318 ".global vgMemCheck_helperc_LOADV16le\n"
5319 ".type   vgMemCheck_helperc_LOADV16le, @function\n"
5320 "vgMemCheck_helperc_LOADV16le:\n"
5321 "      test   $0x1,  %eax\n"
5322 "      jne    .LLV16LE5\n"          /* jump if not aligned */
5323 "      mov    %eax,  %edx\n"
5324 "      shr    $0x10, %edx\n"
5325 "      mov    primary_map(,%edx,4), %ecx\n"
5326 "      movzwl %ax,   %edx\n"
5327 "      shr    $0x2,  %edx\n"
5328 "      movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5329 "      cmp    $0xaa, %edx\n"        /* compare to VA_BITS8_DEFINED */
5330 "      jne    .LLV16LE2\n"          /* jump if not all 32bits defined */
5331 ".LLV16LE1:\n"
5332 "      mov    $0xffff0000,%eax\n"   /* V_BITS16_DEFINED | top16safe */
5333 "      ret\n"
5334 ".LLV16LE2:\n"
5335 "      cmp    $0x55, %edx\n"        /* compare to VA_BITS8_UNDEFINED */
5336 "      jne    .LLV16LE4\n"          /* jump if not all 32bits undefined */
5337 ".LLV16LE3:\n"
5338 "      or     $0xffffffff,%eax\n"   /* V_BITS16_UNDEFINED | top16safe */
5339 "      ret\n"
5340 ".LLV16LE4:\n"
5341 "      mov    %eax,  %ecx\n"
5342 "      and    $0x2,  %ecx\n"
5343 "      add    %ecx,  %ecx\n"
5344 "      sar    %cl,   %edx\n"
5345 "      and    $0xf,  %edx\n"
5346 "      cmp    $0xa,  %edx\n"
5347 "      je     .LLV16LE1\n"          /* jump if all 16bits are defined */
5348 "      cmp    $0x5,  %edx\n"
5349 "      je     .LLV16LE3\n"          /* jump if all 16bits are undefined */
5350 ".LLV16LE5:\n"
5351 "      xor    %ecx,  %ecx\n"        /* tail call mc_LOADVn_slow(a, 16, 0) */
5352 "      mov    $16,   %edx\n"
5353 "      jmp    mc_LOADVn_slow\n"
5354 ".size vgMemCheck_helperc_LOADV16le, .-vgMemCheck_helperc_LOADV16le \n"
5355 ".previous\n"
5356 );
5357 
5358 #else
5359 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
MC_(helperc_LOADV16le)5360 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
5361 {
5362    return mc_LOADV16(a, False);
5363 }
5364 #endif
5365 
5366 /*------------------------------------------------------------*/
5367 /*--- STOREV16                                             ---*/
5368 /*------------------------------------------------------------*/
5369 
5370 /* True if the vabits4 in vabits8 indicate a and a+1 are accessible. */
5371 static INLINE
accessible_vabits4_in_vabits8(Addr a,UChar vabits8)5372 Bool accessible_vabits4_in_vabits8 ( Addr a, UChar vabits8 )
5373 {
5374    UInt shift;
5375    tl_assert(VG_IS_2_ALIGNED(a));      // Must be 2-aligned
5376    shift = (a & 2) << 1;               // shift by 0 or 4
5377    vabits8 >>= shift;                  // shift the four bits to the bottom
5378     // check 2 x vabits2 != VA_BITS2_NOACCESS
5379    return ((0x3 & vabits8) != VA_BITS2_NOACCESS)
5380       &&  ((0xc & vabits8) != VA_BITS2_NOACCESS << 2);
5381 }
5382 
5383 static INLINE
mc_STOREV16(Addr a,UWord vbits16,Bool isBigEndian)5384 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
5385 {
5386    PROF_EVENT(MCPE_STOREV16);
5387 
5388 #ifndef PERF_FAST_STOREV
5389    mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5390 #else
5391    {
5392       UWord   sm_off, vabits8;
5393       SecMap* sm;
5394 
5395       if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
5396          PROF_EVENT(MCPE_STOREV16_SLOW1);
5397          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5398          return;
5399       }
5400 
5401       sm      = get_secmap_for_reading_low(a);
5402       sm_off  = SM_OFF(a);
5403       vabits8 = sm->vabits8[sm_off];
5404 
5405       // To understand the below cleverness, see the extensive comments
5406       // in MC_(helperc_STOREV8).
5407       if (LIKELY(V_BITS16_DEFINED == vbits16)) {
5408          if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5409             return;
5410          }
5411          if (!is_distinguished_sm(sm)
5412              && accessible_vabits4_in_vabits8(a, vabits8)) {
5413             insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED,
5414                                          &(sm->vabits8[sm_off]) );
5415             return;
5416          }
5417          PROF_EVENT(MCPE_STOREV16_SLOW2);
5418          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5419       }
5420       if (V_BITS16_UNDEFINED == vbits16) {
5421          if (vabits8 == VA_BITS8_UNDEFINED) {
5422             return;
5423          }
5424          if (!is_distinguished_sm(sm)
5425              && accessible_vabits4_in_vabits8(a, vabits8)) {
5426             insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
5427                                          &(sm->vabits8[sm_off]) );
5428             return;
5429          }
5430          PROF_EVENT(MCPE_STOREV16_SLOW3);
5431          mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5432          return;
5433       }
5434 
5435       PROF_EVENT(MCPE_STOREV16_SLOW4);
5436       mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
5437    }
5438 #endif
5439 }
5440 
5441 
MC_(helperc_STOREV16be)5442 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
5443 {
5444    mc_STOREV16(a, vbits16, True);
5445 }
MC_(helperc_STOREV16le)5446 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
5447 {
5448    mc_STOREV16(a, vbits16, False);
5449 }
5450 
5451 /*------------------------------------------------------------*/
5452 /*--- LOADV8                                               ---*/
5453 /*------------------------------------------------------------*/
5454 
5455 /* Note: endianness is irrelevant for size == 1 */
5456 
5457 // Non-generic assembly for arm32-linux
5458 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5459     && defined(VGP_arm_linux)
5460 __asm__( /* Derived from NCode template */
5461 ".text                                  \n"
5462 ".align 2                               \n"
5463 ".global vgMemCheck_helperc_LOADV8      \n"
5464 ".type   vgMemCheck_helperc_LOADV8, %function \n"
5465 "vgMemCheck_helperc_LOADV8:             \n" //
5466 "      lsr    r2, r0, #16               \n" // r2 = pri-map-ix
5467 "      movw   r3, #:lower16:primary_map \n" //
5468 "      uxth   r1, r0                    \n" // r1 = sec-map-offB
5469 "      movt   r3, #:upper16:primary_map \n" //
5470 "      ldr    r2, [r3, r2, lsl #2]      \n" // r2 = sec-map
5471 "      ldrb   r1, [r2, r1, lsr #2]      \n" // r1 = sec-map-VABITS8
5472 "      cmp    r1, #0xAA                 \n" // r1 == VA_BITS8_DEFINED?
5473 "      bne    .LLV8c0                   \n" // no, goto .LLV8c0
5474 ".LLV8h9:                               \n" //
5475 "      mov    r0, #0xFFFFFF00           \n" // V_BITS8_DEFINED | top24safe
5476 "      bx     lr                        \n" //
5477 ".LLV8c0:                               \n" //
5478 "      cmp    r1, #0x55                 \n" // VA_BITS8_UNDEFINED
5479 "      bne    .LLV8c4                   \n" //
5480 ".LLV8c2:                               \n" //
5481 "      mov    r0, #0xFFFFFFFF           \n" // V_BITS8_UNDEFINED | top24safe
5482 "      bx     lr                        \n" //
5483 ".LLV8c4:                               \n" //
5484        // r1 holds sec-map-VABITS8
5485        // r0 holds the address.  Extract the relevant 2 bits and inspect.
5486 "      and    r2, r0, #3       \n" // addr & 3
5487 "      add    r2, r2, r2       \n" // 2 * (addr & 3)
5488 "      lsr    r1, r1, r2       \n" // sec-map-VABITS8 >> (2 * (addr & 3))
5489 "      and    r1, r1, #3       \n" // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
5490 
5491 "      cmp    r1, #2                    \n" // VA_BITS2_DEFINED
5492 "      beq    .LLV8h9                   \n" //
5493 
5494 "      cmp    r1, #1                    \n" // VA_BITS2_UNDEFINED
5495 "      beq    .LLV8c2                   \n" //
5496 
5497 "      push   {r4, lr}                  \n" //
5498 "      mov    r2, #0                    \n" //
5499 "      mov    r1, #8                    \n" //
5500 "      bl     mc_LOADVn_slow            \n" //
5501 "      pop    {r4, pc}                  \n" //
5502 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8 \n"
5503 ".previous\n"
5504 );
5505 
5506 /* Non-generic assembly for x86-linux */
5507 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
5508       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
5509 __asm__(
5510 ".text\n"
5511 ".align 16\n"
5512 ".global vgMemCheck_helperc_LOADV8\n"
5513 ".type   vgMemCheck_helperc_LOADV8, @function\n"
5514 "vgMemCheck_helperc_LOADV8:\n"
5515 "      mov    %eax,  %edx\n"
5516 "      shr    $0x10, %edx\n"
5517 "      mov    primary_map(,%edx,4), %ecx\n"
5518 "      movzwl %ax,   %edx\n"
5519 "      shr    $0x2,  %edx\n"
5520 "      movzbl (%ecx,%edx,1), %edx\n"/* edx = VA bits for 32bit */
5521 "      cmp    $0xaa, %edx\n"        /* compare to VA_BITS8_DEFINED? */
5522 "      jne    .LLV8LE2\n"           /* jump if not defined */
5523 ".LLV8LE1:\n"
5524 "      mov    $0xffffff00, %eax\n"  /* V_BITS8_DEFINED | top24safe */
5525 "      ret\n"
5526 ".LLV8LE2:\n"
5527 "      cmp    $0x55, %edx\n"        /* compare to VA_BITS8_UNDEFINED */
5528 "      jne    .LLV8LE4\n"           /* jump if not all 32bits are undefined */
5529 ".LLV8LE3:\n"
5530 "      or     $0xffffffff, %eax\n"  /* V_BITS8_UNDEFINED | top24safe */
5531 "      ret\n"
5532 ".LLV8LE4:\n"
5533 "      mov    %eax,  %ecx\n"
5534 "      and    $0x3,  %ecx\n"
5535 "      add    %ecx,  %ecx\n"
5536 "      sar    %cl,   %edx\n"
5537 "      and    $0x3,  %edx\n"
5538 "      cmp    $0x2,  %edx\n"
5539 "      je     .LLV8LE1\n"           /* jump if all 8bits are defined */
5540 "      cmp    $0x1,  %edx\n"
5541 "      je     .LLV8LE3\n"           /* jump if all 8bits are undefined */
5542 "      xor    %ecx,  %ecx\n"        /* tail call to mc_LOADVn_slow(a, 8, 0) */
5543 "      mov    $0x8,  %edx\n"
5544 "      jmp    mc_LOADVn_slow\n"
5545 ".size vgMemCheck_helperc_LOADV8, .-vgMemCheck_helperc_LOADV8\n"
5546 ".previous\n"
5547 );
5548 
5549 #else
5550 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
5551 VG_REGPARM(1)
MC_(helperc_LOADV8)5552 UWord MC_(helperc_LOADV8) ( Addr a )
5553 {
5554    PROF_EVENT(MCPE_LOADV8);
5555 
5556 #ifndef PERF_FAST_LOADV
5557    return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5558 #else
5559    {
5560       UWord   sm_off, vabits8;
5561       SecMap* sm;
5562 
5563       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5564          PROF_EVENT(MCPE_LOADV8_SLOW1);
5565          return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5566       }
5567 
5568       sm      = get_secmap_for_reading_low(a);
5569       sm_off  = SM_OFF(a);
5570       vabits8 = sm->vabits8[sm_off];
5571       // Convert V bits from compact memory form to expanded register form
5572       // Handle common case quickly: a is mapped, and the entire
5573       // word32 it lives in is addressible.
5574       if      (LIKELY(vabits8 == VA_BITS8_DEFINED  )) { return V_BITS8_DEFINED;   }
5575       else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
5576       else {
5577          // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
5578          // the single byte.
5579          UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
5580          if      (vabits2 == VA_BITS2_DEFINED  ) { return V_BITS8_DEFINED;   }
5581          else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
5582          else {
5583             /* Slow case: the byte is not all-defined or all-undefined. */
5584             PROF_EVENT(MCPE_LOADV8_SLOW2);
5585             return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
5586          }
5587       }
5588    }
5589 #endif
5590 }
5591 #endif
5592 
5593 /*------------------------------------------------------------*/
5594 /*--- STOREV8                                              ---*/
5595 /*------------------------------------------------------------*/
5596 
5597 VG_REGPARM(2)
MC_(helperc_STOREV8)5598 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
5599 {
5600    PROF_EVENT(MCPE_STOREV8);
5601 
5602 #ifndef PERF_FAST_STOREV
5603    mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5604 #else
5605    {
5606       UWord   sm_off, vabits8;
5607       SecMap* sm;
5608 
5609       if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
5610          PROF_EVENT(MCPE_STOREV8_SLOW1);
5611          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5612          return;
5613       }
5614 
5615       sm      = get_secmap_for_reading_low(a);
5616       sm_off  = SM_OFF(a);
5617       vabits8 = sm->vabits8[sm_off];
5618 
5619       // Clevernesses to speed up storing V bits.
5620       // The 64/32/16 bit cases also have similar clevernesses, but it
5621       // works a little differently to the code below.
5622       //
5623       // Cleverness 1:  sometimes we don't have to write the shadow memory at
5624       // all, if we can tell that what we want to write is the same as what is
5625       // already there. These cases are marked below as "defined on defined" and
5626       // "undefined on undefined".
5627       //
5628       // Cleverness 2:
5629       // We also avoid to call mc_STOREVn_slow if the V bits can directly
5630       // be written in the secondary map. V bits can be directly written
5631       // if 4 conditions are respected:
5632       //   * The address for which V bits are written is naturally aligned
5633       //        on 1 byte  for STOREV8 (this is always true)
5634       //        on 2 bytes for STOREV16
5635       //        on 4 bytes for STOREV32
5636       //        on 8 bytes for STOREV64.
5637       //   * V bits being written are either fully defined or fully undefined.
5638       //     (for partially defined V bits, V bits cannot be directly written,
5639       //      as the secondary vbits table must be maintained).
5640       //   * the secmap is not distinguished (distinguished maps cannot be
5641       //     modified).
5642       //   * the memory corresponding to the V bits being written is
5643       //     accessible (if one or more bytes are not accessible,
5644       //     we must call mc_STOREVn_slow in order to report accessibility
5645       //     errors).
5646       //     Note that for STOREV32 and STOREV64, it is too expensive
5647       //     to verify the accessibility of each byte for the benefit it
5648       //     brings. Instead, a quicker check is done by comparing to
5649       //     VA_BITS(8|16)_(UN)DEFINED. This guarantees accessibility,
5650       //     but misses some opportunity of direct modifications.
5651       //     Checking each byte accessibility was measured for
5652       //     STOREV32+perf tests and was slowing down all perf tests.
5653       // The cases corresponding to cleverness 2 are marked below as
5654       // "direct mod".
5655       if (LIKELY(V_BITS8_DEFINED == vbits8)) {
5656          if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
5657             return; // defined on defined
5658          }
5659          if (!is_distinguished_sm(sm)
5660              && VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8)) {
5661             // direct mod
5662             insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
5663                                          &(sm->vabits8[sm_off]) );
5664             return;
5665          }
5666          PROF_EVENT(MCPE_STOREV8_SLOW2);
5667          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5668          return;
5669       }
5670       if (V_BITS8_UNDEFINED == vbits8) {
5671          if (vabits8 == VA_BITS8_UNDEFINED) {
5672             return; // undefined on undefined
5673          }
5674          if (!is_distinguished_sm(sm)
5675              && (VA_BITS2_NOACCESS
5676                  != extract_vabits2_from_vabits8(a, vabits8))) {
5677             // direct mod
5678             insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
5679                                          &(sm->vabits8[sm_off]) );
5680             return;
5681          }
5682          PROF_EVENT(MCPE_STOREV8_SLOW3);
5683          mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5684          return;
5685       }
5686 
5687       // Partially defined word
5688       PROF_EVENT(MCPE_STOREV8_SLOW4);
5689       mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
5690    }
5691 #endif
5692 }
5693 
5694 
5695 /*------------------------------------------------------------*/
5696 /*--- Functions called directly from generated code:       ---*/
5697 /*--- Value-check failure handlers.                        ---*/
5698 /*------------------------------------------------------------*/
5699 
5700 /* Call these ones when an origin is available ... */
5701 VG_REGPARM(1)
MC_(helperc_value_check0_fail_w_o)5702 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
5703    MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
5704 }
5705 
5706 VG_REGPARM(1)
MC_(helperc_value_check1_fail_w_o)5707 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
5708    MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
5709 }
5710 
5711 VG_REGPARM(1)
MC_(helperc_value_check4_fail_w_o)5712 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
5713    MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
5714 }
5715 
5716 VG_REGPARM(1)
MC_(helperc_value_check8_fail_w_o)5717 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
5718    MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
5719 }
5720 
5721 VG_REGPARM(2)
MC_(helperc_value_checkN_fail_w_o)5722 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
5723    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
5724 }
5725 
5726 /* ... and these when an origin isn't available. */
5727 
5728 VG_REGPARM(0)
MC_(helperc_value_check0_fail_no_o)5729 void MC_(helperc_value_check0_fail_no_o) ( void ) {
5730    MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
5731 }
5732 
5733 VG_REGPARM(0)
MC_(helperc_value_check1_fail_no_o)5734 void MC_(helperc_value_check1_fail_no_o) ( void ) {
5735    MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
5736 }
5737 
5738 VG_REGPARM(0)
MC_(helperc_value_check4_fail_no_o)5739 void MC_(helperc_value_check4_fail_no_o) ( void ) {
5740    MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
5741 }
5742 
5743 VG_REGPARM(0)
MC_(helperc_value_check8_fail_no_o)5744 void MC_(helperc_value_check8_fail_no_o) ( void ) {
5745    MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
5746 }
5747 
5748 VG_REGPARM(1)
MC_(helperc_value_checkN_fail_no_o)5749 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
5750    MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
5751 }
5752 
5753 
5754 /*------------------------------------------------------------*/
5755 /*--- Metadata get/set functions, for client requests.     ---*/
5756 /*------------------------------------------------------------*/
5757 
5758 // Nb: this expands the V+A bits out into register-form V bits, even though
5759 // they're in memory.  This is for backward compatibility, and because it's
5760 // probably what the user wants.
5761 
5762 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
5763    error [no longer used], 3 == addressing error. */
5764 /* Nb: We used to issue various definedness/addressability errors from here,
5765    but we took them out because they ranged from not-very-helpful to
5766    downright annoying, and they complicated the error data structures. */
mc_get_or_set_vbits_for_client(Addr a,Addr vbits,SizeT szB,Bool setting,Bool is_client_request)5767 static Int mc_get_or_set_vbits_for_client (
5768    Addr a,
5769    Addr vbits,
5770    SizeT szB,
5771    Bool setting, /* True <=> set vbits,  False <=> get vbits */
5772    Bool is_client_request /* True <=> real user request
5773                              False <=> internal call from gdbserver */
5774 )
5775 {
5776    SizeT i;
5777    Bool  ok;
5778    UChar vbits8;
5779 
5780    /* Check that arrays are addressible before doing any getting/setting.
5781       vbits to be checked only for real user request. */
5782    for (i = 0; i < szB; i++) {
5783       if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
5784           (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
5785          return 3;
5786       }
5787    }
5788 
5789    /* Do the copy */
5790    if (setting) {
5791       /* setting */
5792       for (i = 0; i < szB; i++) {
5793          ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
5794          tl_assert(ok);
5795       }
5796    } else {
5797       /* getting */
5798       for (i = 0; i < szB; i++) {
5799          ok = get_vbits8(a + i, &vbits8);
5800          tl_assert(ok);
5801          ((UChar*)vbits)[i] = vbits8;
5802       }
5803       if (is_client_request)
5804         // The bytes in vbits[] have now been set, so mark them as such.
5805         MC_(make_mem_defined)(vbits, szB);
5806    }
5807 
5808    return 1;
5809 }
5810 
5811 
5812 /*------------------------------------------------------------*/
5813 /*--- Detecting leaked (unreachable) malloc'd blocks.      ---*/
5814 /*------------------------------------------------------------*/
5815 
5816 /* For the memory leak detector, say whether an entire 64k chunk of
5817    address space is possibly in use, or not.  If in doubt return
5818    True.
5819 */
MC_(is_within_valid_secondary)5820 Bool MC_(is_within_valid_secondary) ( Addr a )
5821 {
5822    SecMap* sm = maybe_get_secmap_for ( a );
5823    if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
5824       /* Definitely not in use. */
5825       return False;
5826    } else {
5827       return True;
5828    }
5829 }
5830 
5831 
5832 /* For the memory leak detector, say whether or not a given word
5833    address is to be regarded as valid. */
MC_(is_valid_aligned_word)5834 Bool MC_(is_valid_aligned_word) ( Addr a )
5835 {
5836    tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
5837    tl_assert(VG_IS_WORD_ALIGNED(a));
5838    if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
5839       return False;
5840    if (sizeof(UWord) == 8) {
5841       if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
5842          return False;
5843    }
5844    if (UNLIKELY(MC_(in_ignored_range)(a)))
5845       return False;
5846    else
5847       return True;
5848 }
5849 
5850 
5851 /*------------------------------------------------------------*/
5852 /*--- Initialisation                                       ---*/
5853 /*------------------------------------------------------------*/
5854 
init_shadow_memory(void)5855 static void init_shadow_memory ( void )
5856 {
5857    Int     i;
5858    SecMap* sm;
5859 
5860    tl_assert(V_BIT_UNDEFINED   == 1);
5861    tl_assert(V_BIT_DEFINED     == 0);
5862    tl_assert(V_BITS8_UNDEFINED == 0xFF);
5863    tl_assert(V_BITS8_DEFINED   == 0);
5864 
5865    /* Build the 3 distinguished secondaries */
5866    sm = &sm_distinguished[SM_DIST_NOACCESS];
5867    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
5868 
5869    sm = &sm_distinguished[SM_DIST_UNDEFINED];
5870    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
5871 
5872    sm = &sm_distinguished[SM_DIST_DEFINED];
5873    for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
5874 
5875    /* Set up the primary map. */
5876    /* These entries gradually get overwritten as the used address
5877       space expands. */
5878    for (i = 0; i < N_PRIMARY_MAP; i++)
5879       primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
5880 
5881    /* Auxiliary primary maps */
5882    init_auxmap_L1_L2();
5883 
5884    /* auxmap_size = auxmap_used = 0;
5885       no ... these are statically initialised */
5886 
5887    /* Secondary V bit table */
5888    secVBitTable = createSecVBitTable();
5889 }
5890 
5891 
5892 /*------------------------------------------------------------*/
5893 /*--- Sanity check machinery (permanently engaged)         ---*/
5894 /*------------------------------------------------------------*/
5895 
mc_cheap_sanity_check(void)5896 static Bool mc_cheap_sanity_check ( void )
5897 {
5898    n_sanity_cheap++;
5899    PROF_EVENT(MCPE_CHEAP_SANITY_CHECK);
5900    /* Check for sane operating level */
5901    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5902       return False;
5903    /* nothing else useful we can rapidly check */
5904    return True;
5905 }
5906 
mc_expensive_sanity_check(void)5907 static Bool mc_expensive_sanity_check ( void )
5908 {
5909    Int     i;
5910    Word    n_secmaps_found;
5911    SecMap* sm;
5912    const HChar*  errmsg;
5913    Bool    bad = False;
5914 
5915    if (0) VG_(printf)("expensive sanity check\n");
5916    if (0) return True;
5917 
5918    n_sanity_expensive++;
5919    PROF_EVENT(MCPE_EXPENSIVE_SANITY_CHECK);
5920 
5921    /* Check for sane operating level */
5922    if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
5923       return False;
5924 
5925    /* Check that the 3 distinguished SMs are still as they should be. */
5926 
5927    /* Check noaccess DSM. */
5928    sm = &sm_distinguished[SM_DIST_NOACCESS];
5929    for (i = 0; i < SM_CHUNKS; i++)
5930       if (sm->vabits8[i] != VA_BITS8_NOACCESS)
5931          bad = True;
5932 
5933    /* Check undefined DSM. */
5934    sm = &sm_distinguished[SM_DIST_UNDEFINED];
5935    for (i = 0; i < SM_CHUNKS; i++)
5936       if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
5937          bad = True;
5938 
5939    /* Check defined DSM. */
5940    sm = &sm_distinguished[SM_DIST_DEFINED];
5941    for (i = 0; i < SM_CHUNKS; i++)
5942       if (sm->vabits8[i] != VA_BITS8_DEFINED)
5943          bad = True;
5944 
5945    if (bad) {
5946       VG_(printf)("memcheck expensive sanity: "
5947                   "distinguished_secondaries have changed\n");
5948       return False;
5949    }
5950 
5951    /* If we're not checking for undefined value errors, the secondary V bit
5952     * table should be empty. */
5953    if (MC_(clo_mc_level) == 1) {
5954       if (0 != VG_(OSetGen_Size)(secVBitTable))
5955          return False;
5956    }
5957 
5958    /* check the auxiliary maps, very thoroughly */
5959    n_secmaps_found = 0;
5960    errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
5961    if (errmsg) {
5962       VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
5963       return False;
5964    }
5965 
5966    /* n_secmaps_found is now the number referred to by the auxiliary
5967       primary map.  Now add on the ones referred to by the main
5968       primary map. */
5969    for (i = 0; i < N_PRIMARY_MAP; i++) {
5970       if (primary_map[i] == NULL) {
5971          bad = True;
5972       } else {
5973          if (!is_distinguished_sm(primary_map[i]))
5974             n_secmaps_found++;
5975       }
5976    }
5977 
5978    /* check that the number of secmaps issued matches the number that
5979       are reachable (iow, no secmap leaks) */
5980    if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
5981       bad = True;
5982 
5983    if (bad) {
5984       VG_(printf)("memcheck expensive sanity: "
5985                   "apparent secmap leakage\n");
5986       return False;
5987    }
5988 
5989    if (bad) {
5990       VG_(printf)("memcheck expensive sanity: "
5991                   "auxmap covers wrong address space\n");
5992       return False;
5993    }
5994 
5995    /* there is only one pointer to each secmap (expensive) */
5996 
5997    return True;
5998 }
5999 
6000 /*------------------------------------------------------------*/
6001 /*--- Command line args                                    ---*/
6002 /*------------------------------------------------------------*/
6003 
6004 /* 31 Aug 2015: Vectorised code is now so widespread that
6005    --partial-loads-ok needs to be enabled by default on all platforms.
6006    Not doing so causes lots of false errors. */
6007 Bool          MC_(clo_partial_loads_ok)       = True;
6008 Long          MC_(clo_freelist_vol)           = 20*1000*1000LL;
6009 Long          MC_(clo_freelist_big_blocks)    =  1*1000*1000LL;
6010 LeakCheckMode MC_(clo_leak_check)             = LC_Summary;
6011 VgRes         MC_(clo_leak_resolution)        = Vg_HighRes;
6012 UInt          MC_(clo_show_leak_kinds)        = R2S(Possible) | R2S(Unreached);
6013 UInt          MC_(clo_error_for_leak_kinds)   = R2S(Possible) | R2S(Unreached);
6014 UInt          MC_(clo_leak_check_heuristics)  =   H2S(LchStdString)
6015                                                 | H2S( LchLength64)
6016                                                 | H2S( LchNewArray)
6017                                                 | H2S( LchMultipleInheritance);
6018 Bool          MC_(clo_xtree_leak)             = False;
6019 const HChar*  MC_(clo_xtree_leak_file) = "xtleak.kcg.%p";
6020 Bool          MC_(clo_workaround_gcc296_bugs) = False;
6021 Int           MC_(clo_malloc_fill)            = -1;
6022 Int           MC_(clo_free_fill)              = -1;
6023 KeepStacktraces MC_(clo_keep_stacktraces)     = KS_alloc_and_free;
6024 Int           MC_(clo_mc_level)               = 2;
6025 Bool          MC_(clo_show_mismatched_frees)  = True;
6026 Bool          MC_(clo_expensive_definedness_checks) = False;
6027 Bool          MC_(clo_ignore_range_below_sp)               = False;
6028 UInt          MC_(clo_ignore_range_below_sp__first_offset) = 0;
6029 UInt          MC_(clo_ignore_range_below_sp__last_offset)  = 0;
6030 
6031 static const HChar * MC_(parse_leak_heuristics_tokens) =
6032    "-,stdstring,length64,newarray,multipleinheritance";
6033 /* The first heuristic value (LchNone) has no keyword, as this is
6034    a fake heuristic used to collect the blocks found without any
6035    heuristic. */
6036 
mc_process_cmd_line_options(const HChar * arg)6037 static Bool mc_process_cmd_line_options(const HChar* arg)
6038 {
6039    const HChar* tmp_str;
6040    Int   tmp_show;
6041 
6042    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
6043 
6044    /* Set MC_(clo_mc_level):
6045          1 = A bit tracking only
6046          2 = A and V bit tracking, but no V bit origins
6047          3 = A and V bit tracking, and V bit origins
6048 
6049       Do this by inspecting --undef-value-errors= and
6050       --track-origins=.  Reject the case --undef-value-errors=no
6051       --track-origins=yes as meaningless.
6052    */
6053    if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
6054       if (MC_(clo_mc_level) == 3) {
6055          goto bad_level;
6056       } else {
6057          MC_(clo_mc_level) = 1;
6058          return True;
6059       }
6060    }
6061    if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
6062       if (MC_(clo_mc_level) == 1)
6063          MC_(clo_mc_level) = 2;
6064       return True;
6065    }
6066    if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
6067       if (MC_(clo_mc_level) == 3)
6068          MC_(clo_mc_level) = 2;
6069       return True;
6070    }
6071    if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
6072       if (MC_(clo_mc_level) == 1) {
6073          goto bad_level;
6074       } else {
6075          MC_(clo_mc_level) = 3;
6076          return True;
6077       }
6078    }
6079 
6080         if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
6081    else if VG_USET_CLO(arg, "--errors-for-leak-kinds",
6082                        MC_(parse_leak_kinds_tokens),
6083                        MC_(clo_error_for_leak_kinds)) {}
6084    else if VG_USET_CLO(arg, "--show-leak-kinds",
6085                        MC_(parse_leak_kinds_tokens),
6086                        MC_(clo_show_leak_kinds)) {}
6087    else if VG_USET_CLO(arg, "--leak-check-heuristics",
6088                        MC_(parse_leak_heuristics_tokens),
6089                        MC_(clo_leak_check_heuristics)) {}
6090    else if (VG_BOOL_CLO(arg, "--show-reachable", tmp_show)) {
6091       if (tmp_show) {
6092          MC_(clo_show_leak_kinds) = MC_(all_Reachedness)();
6093       } else {
6094          MC_(clo_show_leak_kinds) &= ~R2S(Reachable);
6095       }
6096    }
6097    else if VG_BOOL_CLO(arg, "--show-possibly-lost", tmp_show) {
6098       if (tmp_show) {
6099          MC_(clo_show_leak_kinds) |= R2S(Possible);
6100       } else {
6101          MC_(clo_show_leak_kinds) &= ~R2S(Possible);
6102       }
6103    }
6104    else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
6105                                             MC_(clo_workaround_gcc296_bugs)) {}
6106 
6107    else if VG_BINT_CLO(arg, "--freelist-vol",  MC_(clo_freelist_vol),
6108                                                0, 10*1000*1000*1000LL) {}
6109 
6110    else if VG_BINT_CLO(arg, "--freelist-big-blocks",
6111                        MC_(clo_freelist_big_blocks),
6112                        0, 10*1000*1000*1000LL) {}
6113 
6114    else if VG_XACT_CLO(arg, "--leak-check=no",
6115                             MC_(clo_leak_check), LC_Off) {}
6116    else if VG_XACT_CLO(arg, "--leak-check=summary",
6117                             MC_(clo_leak_check), LC_Summary) {}
6118    else if VG_XACT_CLO(arg, "--leak-check=yes",
6119                             MC_(clo_leak_check), LC_Full) {}
6120    else if VG_XACT_CLO(arg, "--leak-check=full",
6121                             MC_(clo_leak_check), LC_Full) {}
6122 
6123    else if VG_XACT_CLO(arg, "--leak-resolution=low",
6124                             MC_(clo_leak_resolution), Vg_LowRes) {}
6125    else if VG_XACT_CLO(arg, "--leak-resolution=med",
6126                             MC_(clo_leak_resolution), Vg_MedRes) {}
6127    else if VG_XACT_CLO(arg, "--leak-resolution=high",
6128                             MC_(clo_leak_resolution), Vg_HighRes) {}
6129 
6130    else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
6131       Bool ok = parse_ignore_ranges(tmp_str);
6132       if (!ok) {
6133          VG_(message)(Vg_DebugMsg,
6134             "ERROR: --ignore-ranges: "
6135             "invalid syntax, or end <= start in range\n");
6136          return False;
6137       }
6138       if (gIgnoredAddressRanges) {
6139          UInt i;
6140          for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
6141             UWord val     = IAR_INVALID;
6142             UWord key_min = ~(UWord)0;
6143             UWord key_max = (UWord)0;
6144             VG_(indexRangeMap)( &key_min, &key_max, &val,
6145                                 gIgnoredAddressRanges, i );
6146             tl_assert(key_min <= key_max);
6147             UWord limit = 0x4000000; /* 64M - entirely arbitrary limit */
6148             if (key_max - key_min > limit && val == IAR_CommandLine) {
6149                VG_(message)(Vg_DebugMsg,
6150                   "ERROR: --ignore-ranges: suspiciously large range:\n");
6151                VG_(message)(Vg_DebugMsg,
6152                    "       0x%lx-0x%lx (size %lu)\n", key_min, key_max,
6153                    key_max - key_min + 1);
6154                return False;
6155             }
6156          }
6157       }
6158    }
6159 
6160    else if VG_STR_CLO(arg, "--ignore-range-below-sp", tmp_str) {
6161       /* This seems at first a bit weird, but: in order to imply
6162          a non-wrapped-around address range, the first offset needs to be
6163          larger than the second one.  For example
6164             --ignore-range-below-sp=8192,8189
6165          would cause accesses to in the range [SP-8192, SP-8189] to be
6166          ignored. */
6167       UInt offs1 = 0, offs2 = 0;
6168       Bool ok = parse_UInt_pair(&tmp_str, &offs1, &offs2);
6169       // Ensure we used all the text after the '=' sign.
6170       if (ok && *tmp_str != 0) ok = False;
6171       if (!ok) {
6172          VG_(message)(Vg_DebugMsg,
6173                       "ERROR: --ignore-range-below-sp: invalid syntax. "
6174                       " Expected \"...=decimalnumber-decimalnumber\".\n");
6175          return False;
6176       }
6177       if (offs1 > 1000*1000 /*arbitrary*/ || offs2 > 1000*1000 /*ditto*/) {
6178          VG_(message)(Vg_DebugMsg,
6179                       "ERROR: --ignore-range-below-sp: suspiciously large "
6180                       "offset(s): %u and %u\n", offs1, offs2);
6181          return False;
6182       }
6183       if (offs1 <= offs2) {
6184          VG_(message)(Vg_DebugMsg,
6185                       "ERROR: --ignore-range-below-sp: invalid offsets "
6186                       "(the first must be larger): %u and %u\n", offs1, offs2);
6187          return False;
6188       }
6189       tl_assert(offs1 > offs2);
6190       if (offs1 - offs2 > 4096 /*arbitrary*/) {
6191          VG_(message)(Vg_DebugMsg,
6192                       "ERROR: --ignore-range-below-sp: suspiciously large "
6193                       "range: %u-%u (size %u)\n", offs1, offs2, offs1 - offs2);
6194          return False;
6195       }
6196       MC_(clo_ignore_range_below_sp) = True;
6197       MC_(clo_ignore_range_below_sp__first_offset) = offs1;
6198       MC_(clo_ignore_range_below_sp__last_offset)  = offs2;
6199       return True;
6200    }
6201 
6202    else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
6203    else if VG_BHEX_CLO(arg, "--free-fill",   MC_(clo_free_fill),   0x00,0xFF) {}
6204 
6205    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc",
6206                        MC_(clo_keep_stacktraces), KS_alloc) {}
6207    else if VG_XACT_CLO(arg, "--keep-stacktraces=free",
6208                        MC_(clo_keep_stacktraces), KS_free) {}
6209    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-and-free",
6210                        MC_(clo_keep_stacktraces), KS_alloc_and_free) {}
6211    else if VG_XACT_CLO(arg, "--keep-stacktraces=alloc-then-free",
6212                        MC_(clo_keep_stacktraces), KS_alloc_then_free) {}
6213    else if VG_XACT_CLO(arg, "--keep-stacktraces=none",
6214                        MC_(clo_keep_stacktraces), KS_none) {}
6215 
6216    else if VG_BOOL_CLO(arg, "--show-mismatched-frees",
6217                        MC_(clo_show_mismatched_frees)) {}
6218    else if VG_BOOL_CLO(arg, "--expensive-definedness-checks",
6219                        MC_(clo_expensive_definedness_checks)) {}
6220 
6221    else if VG_BOOL_CLO(arg, "--xtree-leak",
6222                        MC_(clo_xtree_leak)) {}
6223    else if VG_STR_CLO (arg, "--xtree-leak-file",
6224                        MC_(clo_xtree_leak_file)) {}
6225 
6226    else
6227       return VG_(replacement_malloc_process_cmd_line_option)(arg);
6228 
6229    return True;
6230 
6231 
6232   bad_level:
6233    VG_(fmsg_bad_option)(arg,
6234       "--track-origins=yes has no effect when --undef-value-errors=no.\n");
6235 }
6236 
mc_print_usage(void)6237 static void mc_print_usage(void)
6238 {
6239    VG_(printf)(
6240 "    --leak-check=no|summary|full     search for memory leaks at exit?  [summary]\n"
6241 "    --leak-resolution=low|med|high   differentiation of leak stack traces [high]\n"
6242 "    --show-leak-kinds=kind1,kind2,.. which leak kinds to show?\n"
6243 "                                            [definite,possible]\n"
6244 "    --errors-for-leak-kinds=kind1,kind2,..  which leak kinds are errors?\n"
6245 "                                            [definite,possible]\n"
6246 "        where kind is one of:\n"
6247 "          definite indirect possible reachable all none\n"
6248 "    --leak-check-heuristics=heur1,heur2,... which heuristics to use for\n"
6249 "        improving leak search false positive [all]\n"
6250 "        where heur is one of:\n"
6251 "          stdstring length64 newarray multipleinheritance all none\n"
6252 "    --show-reachable=yes             same as --show-leak-kinds=all\n"
6253 "    --show-reachable=no --show-possibly-lost=yes\n"
6254 "                                     same as --show-leak-kinds=definite,possible\n"
6255 "    --show-reachable=no --show-possibly-lost=no\n"
6256 "                                     same as --show-leak-kinds=definite\n"
6257 "    --xtree-leak=no|yes              output leak result in xtree format? [no]\n"
6258 "    --xtree-leak-file=<file>         xtree leak report file [xtleak.kcg.%%p]\n"
6259 "    --undef-value-errors=no|yes      check for undefined value errors [yes]\n"
6260 "    --track-origins=no|yes           show origins of undefined values? [no]\n"
6261 "    --partial-loads-ok=no|yes        too hard to explain here; see manual [yes]\n"
6262 "    --expensive-definedness-checks=no|yes\n"
6263 "                                     Use extra-precise definedness tracking [no]\n"
6264 "    --freelist-vol=<number>          volume of freed blocks queue     [20000000]\n"
6265 "    --freelist-big-blocks=<number>   releases first blocks with size>= [1000000]\n"
6266 "    --workaround-gcc296-bugs=no|yes  self explanatory [no].  Deprecated.\n"
6267 "                                     Use --ignore-range-below-sp instead.\n"
6268 "    --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS]   assume given addresses are OK\n"
6269 "    --ignore-range-below-sp=<number>-<number>  do not report errors for\n"
6270 "                                     accesses at the given offsets below SP\n"
6271 "    --malloc-fill=<hexnumber>        fill malloc'd areas with given value\n"
6272 "    --free-fill=<hexnumber>          fill free'd areas with given value\n"
6273 "    --keep-stacktraces=alloc|free|alloc-and-free|alloc-then-free|none\n"
6274 "        stack trace(s) to keep for malloc'd/free'd areas       [alloc-and-free]\n"
6275 "    --show-mismatched-frees=no|yes   show frees that don't match the allocator? [yes]\n"
6276    );
6277 }
6278 
mc_print_debug_usage(void)6279 static void mc_print_debug_usage(void)
6280 {
6281    VG_(printf)(
6282 "    (none)\n"
6283    );
6284 }
6285 
6286 
6287 /*------------------------------------------------------------*/
6288 /*--- Client blocks                                        ---*/
6289 /*------------------------------------------------------------*/
6290 
6291 /* Client block management:
6292 
6293    This is managed as an expanding array of client block descriptors.
6294    Indices of live descriptors are issued to the client, so it can ask
6295    to free them later.  Therefore we cannot slide live entries down
6296    over dead ones.  Instead we must use free/inuse flags and scan for
6297    an empty slot at allocation time.  This in turn means allocation is
6298    relatively expensive, so we hope this does not happen too often.
6299 
6300    An unused block has start == size == 0
6301 */
6302 
6303 /* type CGenBlock is defined in mc_include.h */
6304 
6305 /* This subsystem is self-initialising. */
6306 static UWord      cgb_size = 0;
6307 static UWord      cgb_used = 0;
6308 static CGenBlock* cgbs     = NULL;
6309 
6310 /* Stats for this subsystem. */
6311 static ULong cgb_used_MAX = 0;   /* Max in use. */
6312 static ULong cgb_allocs   = 0;   /* Number of allocs. */
6313 static ULong cgb_discards = 0;   /* Number of discards. */
6314 static ULong cgb_search   = 0;   /* Number of searches. */
6315 
6316 
6317 /* Get access to the client block array. */
MC_(get_ClientBlock_array)6318 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
6319                                  /*OUT*/UWord* nBlocks )
6320 {
6321    *blocks  = cgbs;
6322    *nBlocks = cgb_used;
6323 }
6324 
6325 
6326 static
alloc_client_block(void)6327 Int alloc_client_block ( void )
6328 {
6329    UWord      i, sz_new;
6330    CGenBlock* cgbs_new;
6331 
6332    cgb_allocs++;
6333 
6334    for (i = 0; i < cgb_used; i++) {
6335       cgb_search++;
6336       if (cgbs[i].start == 0 && cgbs[i].size == 0)
6337          return i;
6338    }
6339 
6340    /* Not found.  Try to allocate one at the end. */
6341    if (cgb_used < cgb_size) {
6342       cgb_used++;
6343       return cgb_used-1;
6344    }
6345 
6346    /* Ok, we have to allocate a new one. */
6347    tl_assert(cgb_used == cgb_size);
6348    sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
6349 
6350    cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
6351    for (i = 0; i < cgb_used; i++)
6352       cgbs_new[i] = cgbs[i];
6353 
6354    if (cgbs != NULL)
6355       VG_(free)( cgbs );
6356    cgbs = cgbs_new;
6357 
6358    cgb_size = sz_new;
6359    cgb_used++;
6360    if (cgb_used > cgb_used_MAX)
6361       cgb_used_MAX = cgb_used;
6362    return cgb_used-1;
6363 }
6364 
6365 
show_client_block_stats(void)6366 static void show_client_block_stats ( void )
6367 {
6368    VG_(message)(Vg_DebugMsg,
6369       "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
6370       cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
6371    );
6372 }
print_monitor_help(void)6373 static void print_monitor_help ( void )
6374 {
6375    VG_(gdb_printf)
6376       (
6377 "\n"
6378 "memcheck monitor commands:\n"
6379 "  xb <addr> [<len>]\n"
6380 "        prints validity bits for <len> (or 1) bytes at <addr>\n"
6381 "            bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
6382 "        Then prints the bytes values below the corresponding validity bits\n"
6383 "        in a layout similar to the gdb command 'x /<len>xb <addr>'\n"
6384 "        Example: xb 0x8049c78 10\n"
6385 "  get_vbits <addr> [<len>]\n"
6386 "        Similar to xb, but only prints the validity bytes by group of 4.\n"
6387 "  make_memory [noaccess|undefined\n"
6388 "                     |defined|Definedifaddressable] <addr> [<len>]\n"
6389 "        mark <len> (or 1) bytes at <addr> with the given accessibility\n"
6390 "  check_memory [addressable|defined] <addr> [<len>]\n"
6391 "        check that <len> (or 1) bytes at <addr> have the given accessibility\n"
6392 "            and outputs a description of <addr>\n"
6393 "  leak_check [full*|summary|xtleak]\n"
6394 "                [kinds kind1,kind2,...|reachable|possibleleak*|definiteleak]\n"
6395 "                [heuristics heur1,heur2,...]\n"
6396 "                [increased*|changed|any]\n"
6397 "                [unlimited*|limited <max_loss_records_output>]\n"
6398 "            * = defaults\n"
6399 "         xtleak produces an xtree full leak result in xtleak.kcg.%%p.%%n\n"
6400 "       where kind is one of:\n"
6401 "         definite indirect possible reachable all none\n"
6402 "       where heur is one of:\n"
6403 "         stdstring length64 newarray multipleinheritance all none*\n"
6404 "       Examples: leak_check\n"
6405 "                 leak_check summary any\n"
6406 "                 leak_check full kinds indirect,possible\n"
6407 "                 leak_check full reachable any limited 100\n"
6408 "  block_list <loss_record_nr>|<loss_record_nr_from>..<loss_record_nr_to>\n"
6409 "                [unlimited*|limited <max_blocks>]\n"
6410 "                [heuristics heur1,heur2,...]\n"
6411 "        after a leak search, shows the list of blocks of <loss_record_nr>\n"
6412 "        (or of the range <loss_record_nr_from>..<loss_record_nr_to>).\n"
6413 "        With heuristics, only shows the blocks found via heur1,heur2,...\n"
6414 "            * = defaults\n"
6415 "  who_points_at <addr> [<len>]\n"
6416 "        shows places pointing inside <len> (default 1) bytes at <addr>\n"
6417 "        (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
6418 "         with len > 1, will also show \"interior pointers\")\n"
6419 "  xtmemory [<filename>]\n"
6420 "        dump xtree memory profile in <filename> (default xtmemory.kcg.%%p.%%n)\n"
6421 "\n");
6422 }
6423 
6424 /* Print szB bytes at address, with a format similar to the gdb command
6425    x /<szB>xb address.
6426    res[i] == 1 indicates the corresponding byte is addressable. */
gdb_xb(Addr address,SizeT szB,Int res[])6427 static void gdb_xb (Addr address, SizeT szB, Int res[])
6428 {
6429    UInt i;
6430 
6431    for (i = 0; i < szB; i++) {
6432       UInt bnr = i % 8;
6433       if (bnr == 0) {
6434          if (i != 0)
6435             VG_(printf) ("\n"); // Terminate previous line
6436          VG_(printf) ("%p:", (void*)(address+i));
6437       }
6438       if (res[i] == 1)
6439          VG_(printf) ("\t0x%02x", *(UChar*)(address+i));
6440       else
6441          VG_(printf) ("\t0x??");
6442    }
6443    VG_(printf) ("\n"); // Terminate previous line
6444 }
6445 
6446 
6447 /* Returns the address of the next non space character,
6448    or address of the string terminator. */
next_non_space(HChar * s)6449 static HChar* next_non_space (HChar *s)
6450 {
6451    while (*s && *s == ' ')
6452       s++;
6453    return s;
6454 }
6455 
6456 /* Parse an integer slice, i.e. a single integer or a range of integer.
6457    Syntax is:
6458        <integer>[..<integer> ]
6459    (spaces are allowed before and/or after ..).
6460    Return True if range correctly parsed, False otherwise. */
VG_(parse_slice)6461 static Bool VG_(parse_slice) (HChar* s, HChar** saveptr,
6462                               UInt *from, UInt *to)
6463 {
6464    HChar* wl;
6465    HChar *endptr;
6466    endptr = NULL;////
6467    wl = VG_(strtok_r) (s, " ", saveptr);
6468 
6469    /* slice must start with an integer. */
6470    if (wl == NULL) {
6471       VG_(gdb_printf) ("expecting integer or slice <from>..<to>\n");
6472       return False;
6473    }
6474    *from = VG_(strtoull10) (wl, &endptr);
6475    if (endptr == wl) {
6476       VG_(gdb_printf) ("invalid integer or slice <from>..<to>\n");
6477       return False;
6478    }
6479 
6480    if (*endptr == '\0' && *next_non_space(*saveptr) != '.') {
6481       /* wl token is an integer terminating the string
6482          or else next token does not start with .
6483          In both cases, the slice is a single integer. */
6484       *to = *from;
6485       return True;
6486    }
6487 
6488    if (*endptr == '\0') {
6489       // iii ..    => get the next token
6490       wl =  VG_(strtok_r) (NULL, " .", saveptr);
6491    } else {
6492       // It must be iii..
6493       if (*endptr != '.' && *(endptr+1) != '.') {
6494          VG_(gdb_printf) ("expecting slice <from>..<to>\n");
6495          return False;
6496       }
6497       if ( *(endptr+2) == ' ') {
6498          // It must be iii.. jjj  => get the next token
6499          wl =  VG_(strtok_r) (NULL, " .", saveptr);
6500       } else {
6501          // It must be iii..jjj
6502          wl = endptr+2;
6503       }
6504    }
6505 
6506    *to = VG_(strtoull10) (wl, &endptr);
6507    if (*endptr != '\0') {
6508       VG_(gdb_printf) ("missing/wrong 'to' of slice <from>..<to>\n");
6509       return False;
6510    }
6511 
6512    if (*from > *to) {
6513       VG_(gdb_printf) ("<from> cannot be bigger than <to> "
6514                        "in slice <from>..<to>\n");
6515       return False;
6516    }
6517 
6518    return True;
6519 }
6520 
6521 /* return True if request recognised, False otherwise */
handle_gdb_monitor_command(ThreadId tid,HChar * req)6522 static Bool handle_gdb_monitor_command (ThreadId tid, HChar *req)
6523 {
6524    HChar* wcmd;
6525    HChar s[VG_(strlen)(req) + 1]; /* copy for strtok_r */
6526    HChar *ssaveptr;
6527 
6528    VG_(strcpy) (s, req);
6529 
6530    wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
6531    /* NB: if possible, avoid introducing a new command below which
6532       starts with the same first letter(s) as an already existing
6533       command. This ensures a shorter abbreviation for the user. */
6534    switch (VG_(keyword_id)
6535            ("help get_vbits leak_check make_memory check_memory "
6536             "block_list who_points_at xb xtmemory",
6537             wcmd, kwd_report_duplicated_matches)) {
6538    case -2: /* multiple matches */
6539       return True;
6540    case -1: /* not found */
6541       return False;
6542    case  0: /* help */
6543       print_monitor_help();
6544       return True;
6545    case  1: { /* get_vbits */
6546       Addr address;
6547       SizeT szB = 1;
6548       if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6549          UChar vbits;
6550          Int i;
6551          Int unaddressable = 0;
6552          for (i = 0; i < szB; i++) {
6553             Int res = mc_get_or_set_vbits_for_client
6554                (address+i, (Addr) &vbits, 1,
6555                 False, /* get them */
6556                 False  /* is client request */ );
6557             /* we are before the first character on next line, print a \n. */
6558             if ((i % 32) == 0 && i != 0)
6559                VG_(printf) ("\n");
6560             /* we are before the next block of 4 starts, print a space. */
6561             else if ((i % 4) == 0 && i != 0)
6562                VG_(printf) (" ");
6563             if (res == 1) {
6564                VG_(printf) ("%02x", vbits);
6565             } else {
6566                tl_assert(3 == res);
6567                unaddressable++;
6568                VG_(printf) ("__");
6569             }
6570          }
6571          VG_(printf) ("\n");
6572          if (unaddressable) {
6573             VG_(printf)
6574                ("Address %p len %lu has %d bytes unaddressable\n",
6575                 (void *)address, szB, unaddressable);
6576          }
6577       }
6578       return True;
6579    }
6580    case  2: { /* leak_check */
6581       Int err = 0;
6582       LeakCheckParams lcp;
6583       HChar* xt_filename = NULL;
6584       HChar* kw;
6585 
6586       lcp.mode               = LC_Full;
6587       lcp.show_leak_kinds    = R2S(Possible) | R2S(Unreached);
6588       lcp.errors_for_leak_kinds = 0; // no errors for interactive leak search.
6589       lcp.heuristics         = 0;
6590       lcp.deltamode          = LCD_Increased;
6591       lcp.max_loss_records_output = 999999999;
6592       lcp.requested_by_monitor_command = True;
6593       lcp.xt_filename = NULL;
6594 
6595       for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
6596            kw != NULL;
6597            kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6598          switch (VG_(keyword_id)
6599                  ("full summary xtleak "
6600                   "kinds reachable possibleleak definiteleak "
6601                   "heuristics "
6602                   "increased changed any "
6603                   "unlimited limited ",
6604                   kw, kwd_report_all)) {
6605          case -2: err++; break;
6606          case -1: err++; break;
6607          case  0: /* full */
6608             lcp.mode = LC_Full; break;
6609          case  1: /* summary */
6610             lcp.mode = LC_Summary; break;
6611          case  2: /* xtleak */
6612             lcp.mode = LC_Full;
6613             xt_filename
6614                = VG_(expand_file_name)("--xtleak-mc_main.c",
6615                                        "xtleak.kcg.%p.%n");
6616             lcp.xt_filename = xt_filename;
6617             break;
6618          case  3: { /* kinds */
6619             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6620             if (wcmd == NULL
6621                 || !VG_(parse_enum_set)(MC_(parse_leak_kinds_tokens),
6622                                         True/*allow_all*/,
6623                                         wcmd,
6624                                         &lcp.show_leak_kinds)) {
6625                VG_(gdb_printf) ("missing or malformed leak kinds set\n");
6626                err++;
6627             }
6628             break;
6629          }
6630          case  4: /* reachable */
6631             lcp.show_leak_kinds = MC_(all_Reachedness)();
6632             break;
6633          case  5: /* possibleleak */
6634             lcp.show_leak_kinds
6635                = R2S(Possible) | R2S(IndirectLeak) | R2S(Unreached);
6636             break;
6637          case  6: /* definiteleak */
6638             lcp.show_leak_kinds = R2S(Unreached);
6639             break;
6640          case  7: { /* heuristics */
6641             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6642             if (wcmd == NULL
6643                 || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6644                                         True,/*allow_all*/
6645                                         wcmd,
6646                                         &lcp.heuristics)) {
6647                VG_(gdb_printf) ("missing or malformed heuristics set\n");
6648                err++;
6649             }
6650             break;
6651          }
6652          case  8: /* increased */
6653             lcp.deltamode = LCD_Increased; break;
6654          case  9: /* changed */
6655             lcp.deltamode = LCD_Changed; break;
6656          case 10: /* any */
6657             lcp.deltamode = LCD_Any; break;
6658          case 11: /* unlimited */
6659             lcp.max_loss_records_output = 999999999; break;
6660          case 12: { /* limited */
6661             Int int_value;
6662             const HChar* endptr;
6663 
6664             wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6665             if (wcmd == NULL) {
6666                int_value = 0;
6667                endptr = "empty"; /* to report an error below */
6668             } else {
6669                HChar *the_end;
6670                int_value = VG_(strtoll10) (wcmd, &the_end);
6671                endptr = the_end;
6672             }
6673             if (*endptr != '\0')
6674                VG_(gdb_printf) ("missing or malformed integer value\n");
6675             else if (int_value > 0)
6676                lcp.max_loss_records_output = (UInt) int_value;
6677             else
6678                VG_(gdb_printf) ("max_loss_records_output must be >= 1,"
6679                                 " got %d\n", int_value);
6680             break;
6681          }
6682          default:
6683             tl_assert (0);
6684          }
6685       }
6686       if (!err)
6687          MC_(detect_memory_leaks)(tid, &lcp);
6688       if (xt_filename != NULL)
6689          VG_(free)(xt_filename);
6690       return True;
6691    }
6692 
6693    case  3: { /* make_memory */
6694       Addr address;
6695       SizeT szB = 1;
6696       Int kwdid = VG_(keyword_id)
6697          ("noaccess undefined defined Definedifaddressable",
6698           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6699       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6700          return True;
6701       switch (kwdid) {
6702       case -2: break;
6703       case -1: break;
6704       case  0: MC_(make_mem_noaccess) (address, szB); break;
6705       case  1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
6706                                                     MC_OKIND_USER ); break;
6707       case  2: MC_(make_mem_defined) ( address, szB ); break;
6708       case  3: make_mem_defined_if_addressable ( address, szB ); break;;
6709       default: tl_assert(0);
6710       }
6711       return True;
6712    }
6713 
6714    case  4: { /* check_memory */
6715       Addr address;
6716       SizeT szB = 1;
6717       Addr bad_addr;
6718       UInt okind;
6719       const HChar* src;
6720       UInt otag;
6721       UInt ecu;
6722       ExeContext* origin_ec;
6723       MC_ReadResult res;
6724 
6725       Int kwdid = VG_(keyword_id)
6726          ("addressable defined",
6727           VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
6728       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6729          return True;
6730       switch (kwdid) {
6731       case -2: break;
6732       case -1: break;
6733       case  0: /* addressable */
6734          if (is_mem_addressable ( address, szB, &bad_addr ))
6735             VG_(printf) ("Address %p len %lu addressable\n",
6736                              (void *)address, szB);
6737          else
6738             VG_(printf)
6739                ("Address %p len %lu not addressable:\nbad address %p\n",
6740                 (void *)address, szB, (void *) bad_addr);
6741          MC_(pp_describe_addr) (address);
6742          break;
6743       case  1: /* defined */
6744          res = is_mem_defined ( address, szB, &bad_addr, &otag );
6745          if (MC_AddrErr == res)
6746             VG_(printf)
6747                ("Address %p len %lu not addressable:\nbad address %p\n",
6748                 (void *)address, szB, (void *) bad_addr);
6749          else if (MC_ValueErr == res) {
6750             okind = otag & 3;
6751             switch (okind) {
6752             case MC_OKIND_STACK:
6753                src = " was created by a stack allocation"; break;
6754             case MC_OKIND_HEAP:
6755                src = " was created by a heap allocation"; break;
6756             case MC_OKIND_USER:
6757                src = " was created by a client request"; break;
6758             case MC_OKIND_UNKNOWN:
6759                src = ""; break;
6760             default: tl_assert(0);
6761             }
6762             VG_(printf)
6763                ("Address %p len %lu not defined:\n"
6764                 "Uninitialised value at %p%s\n",
6765                 (void *)address, szB, (void *) bad_addr, src);
6766             ecu = otag & ~3;
6767             if (VG_(is_plausible_ECU)(ecu)) {
6768                origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
6769                VG_(pp_ExeContext)( origin_ec );
6770             }
6771          }
6772          else
6773             VG_(printf) ("Address %p len %lu defined\n",
6774                          (void *)address, szB);
6775          MC_(pp_describe_addr) (address);
6776          break;
6777       default: tl_assert(0);
6778       }
6779       return True;
6780    }
6781 
6782    case  5: { /* block_list */
6783       HChar* wl;
6784       HChar *the_end;
6785       UInt lr_nr_from = 0;
6786       UInt lr_nr_to = 0;
6787 
6788       if (VG_(parse_slice) (NULL, &ssaveptr, &lr_nr_from, &lr_nr_to)) {
6789          UInt limit_blocks = 999999999;
6790          Int int_value;
6791          UInt heuristics = 0;
6792 
6793          for (wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
6794               wl != NULL;
6795               wl = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
6796             switch (VG_(keyword_id) ("unlimited limited heuristics ",
6797                                      wl,  kwd_report_all)) {
6798             case -2: return True;
6799             case -1: return True;
6800             case  0: /* unlimited */
6801                limit_blocks = 999999999; break;
6802             case  1: /* limited */
6803                wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6804                if (wcmd == NULL) {
6805                   VG_(gdb_printf) ("missing integer value\n");
6806                   return True;
6807                }
6808                int_value = VG_(strtoll10) (wcmd, &the_end);
6809                if (*the_end != '\0') {
6810                   VG_(gdb_printf) ("malformed integer value\n");
6811                   return True;
6812                }
6813                if (int_value <= 0) {
6814                   VG_(gdb_printf) ("max_blocks must be >= 1,"
6815                                    " got %d\n", int_value);
6816                   return True;
6817                }
6818                limit_blocks = (UInt) int_value;
6819                break;
6820             case  2: /* heuristics */
6821                wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
6822                if (wcmd == NULL
6823                    || !VG_(parse_enum_set)(MC_(parse_leak_heuristics_tokens),
6824                                            True,/*allow_all*/
6825                                            wcmd,
6826                                            &heuristics)) {
6827                   VG_(gdb_printf) ("missing or malformed heuristics set\n");
6828                   return True;
6829                }
6830                break;
6831             default:
6832                tl_assert (0);
6833             }
6834          }
6835          /* substract 1 from lr_nr_from/lr_nr_to  as what is shown to the user
6836             is 1 more than the index in lr_array. */
6837          if (lr_nr_from == 0 || ! MC_(print_block_list) (lr_nr_from-1,
6838                                                          lr_nr_to-1,
6839                                                          limit_blocks,
6840                                                          heuristics))
6841             VG_(gdb_printf) ("invalid loss record nr\n");
6842       }
6843       return True;
6844    }
6845 
6846    case  6: { /* who_points_at */
6847       Addr address;
6848       SizeT szB = 1;
6849 
6850       if (!VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr))
6851          return True;
6852       if (address == (Addr) 0) {
6853          VG_(gdb_printf) ("Cannot search who points at 0x0\n");
6854          return True;
6855       }
6856       MC_(who_points_at) (address, szB);
6857       return True;
6858    }
6859 
6860    case  7: { /* xb */
6861       Addr address;
6862       SizeT szB = 1;
6863       if (VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr)) {
6864          UChar vbits[8];
6865          Int res[8];
6866          Int i;
6867          Int unaddressable = 0;
6868          for (i = 0; i < szB; i++) {
6869             Int bnr = i % 8;
6870             res[bnr] = mc_get_or_set_vbits_for_client
6871                (address+i, (Addr) &vbits[bnr], 1,
6872                 False, /* get them */
6873                 False  /* is client request */ );
6874             /* We going to print the first vabits of a new line.
6875                Terminate the previous line if needed: prints a line with the
6876                address and the data. */
6877             if (bnr == 0) {
6878                if (i != 0) {
6879                   VG_(printf) ("\n");
6880                   gdb_xb (address + i - 8, 8, res);
6881                }
6882                VG_(printf) ("\t"); // To align VABITS with gdb_xb layout
6883             }
6884             if (res[bnr] == 1) {
6885                VG_(printf) ("\t  %02x", vbits[bnr]);
6886             } else {
6887                tl_assert(3 == res[bnr]);
6888                unaddressable++;
6889                VG_(printf) ("\t  __");
6890             }
6891          }
6892          VG_(printf) ("\n");
6893          if (szB % 8 == 0 && szB > 0)
6894             gdb_xb (address + szB - 8, 8, res);
6895          else
6896             gdb_xb (address + szB - szB % 8, szB % 8, res);
6897          if (unaddressable) {
6898             VG_(printf)
6899                ("Address %p len %lu has %d bytes unaddressable\n",
6900                 (void *)address, szB, unaddressable);
6901          }
6902       }
6903       return True;
6904    }
6905 
6906    case  8: { /* xtmemory */
6907       HChar* filename;
6908       filename = VG_(strtok_r) (NULL, " ", &ssaveptr);
6909       MC_(xtmemory_report)(filename, False);
6910       return True;
6911    }
6912 
6913    default:
6914       tl_assert(0);
6915       return False;
6916    }
6917 }
6918 
6919 /*------------------------------------------------------------*/
6920 /*--- Client requests                                      ---*/
6921 /*------------------------------------------------------------*/
6922 
mc_handle_client_request(ThreadId tid,UWord * arg,UWord * ret)6923 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
6924 {
6925    Int   i;
6926    Addr  bad_addr;
6927 
6928    if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
6929        && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
6930        && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
6931        && VG_USERREQ__FREELIKE_BLOCK   != arg[0]
6932        && VG_USERREQ__CREATE_MEMPOOL   != arg[0]
6933        && VG_USERREQ__DESTROY_MEMPOOL  != arg[0]
6934        && VG_USERREQ__MEMPOOL_ALLOC    != arg[0]
6935        && VG_USERREQ__MEMPOOL_FREE     != arg[0]
6936        && VG_USERREQ__MEMPOOL_TRIM     != arg[0]
6937        && VG_USERREQ__MOVE_MEMPOOL     != arg[0]
6938        && VG_USERREQ__MEMPOOL_CHANGE   != arg[0]
6939        && VG_USERREQ__MEMPOOL_EXISTS   != arg[0]
6940        && VG_USERREQ__GDB_MONITOR_COMMAND   != arg[0]
6941        && VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0]
6942        && VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE != arg[0])
6943       return False;
6944 
6945    switch (arg[0]) {
6946       case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE: {
6947          Bool ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
6948          if (!ok)
6949             MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
6950          *ret = ok ? (UWord)NULL : bad_addr;
6951          break;
6952       }
6953 
6954       case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
6955          Bool errorV    = False;
6956          Addr bad_addrV = 0;
6957          UInt otagV     = 0;
6958          Bool errorA    = False;
6959          Addr bad_addrA = 0;
6960          is_mem_defined_comprehensive(
6961             arg[1], arg[2],
6962             &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
6963          );
6964          if (errorV) {
6965             MC_(record_user_error) ( tid, bad_addrV,
6966                                      /*isAddrErr*/False, otagV );
6967          }
6968          if (errorA) {
6969             MC_(record_user_error) ( tid, bad_addrA,
6970                                      /*isAddrErr*/True, 0 );
6971          }
6972          /* Return the lower of the two erring addresses, if any. */
6973          *ret = 0;
6974          if (errorV && !errorA) {
6975             *ret = bad_addrV;
6976          }
6977          if (!errorV && errorA) {
6978             *ret = bad_addrA;
6979          }
6980          if (errorV && errorA) {
6981             *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
6982          }
6983          break;
6984       }
6985 
6986       case VG_USERREQ__DO_LEAK_CHECK: {
6987          LeakCheckParams lcp;
6988 
6989          if (arg[1] == 0)
6990             lcp.mode = LC_Full;
6991          else if (arg[1] == 1)
6992             lcp.mode = LC_Summary;
6993          else {
6994             VG_(message)(Vg_UserMsg,
6995                          "Warning: unknown memcheck leak search mode\n");
6996             lcp.mode = LC_Full;
6997          }
6998 
6999          lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
7000          lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
7001          lcp.heuristics = MC_(clo_leak_check_heuristics);
7002 
7003          if (arg[2] == 0)
7004             lcp.deltamode = LCD_Any;
7005          else if (arg[2] == 1)
7006             lcp.deltamode = LCD_Increased;
7007          else if (arg[2] == 2)
7008             lcp.deltamode = LCD_Changed;
7009          else {
7010             VG_(message)
7011                (Vg_UserMsg,
7012                 "Warning: unknown memcheck leak search deltamode\n");
7013             lcp.deltamode = LCD_Any;
7014          }
7015          lcp.max_loss_records_output = 999999999;
7016          lcp.requested_by_monitor_command = False;
7017          lcp.xt_filename = NULL;
7018 
7019          MC_(detect_memory_leaks)(tid, &lcp);
7020          *ret = 0; /* return value is meaningless */
7021          break;
7022       }
7023 
7024       case VG_USERREQ__MAKE_MEM_NOACCESS:
7025          MC_(make_mem_noaccess) ( arg[1], arg[2] );
7026          *ret = -1;
7027          break;
7028 
7029       case VG_USERREQ__MAKE_MEM_UNDEFINED:
7030          make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
7031                                               MC_OKIND_USER );
7032          *ret = -1;
7033          break;
7034 
7035       case VG_USERREQ__MAKE_MEM_DEFINED:
7036          MC_(make_mem_defined) ( arg[1], arg[2] );
7037          *ret = -1;
7038          break;
7039 
7040       case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
7041          make_mem_defined_if_addressable ( arg[1], arg[2] );
7042          *ret = -1;
7043          break;
7044 
7045       case VG_USERREQ__CREATE_BLOCK: /* describe a block */
7046          if (arg[1] != 0 && arg[2] != 0) {
7047             i = alloc_client_block();
7048             /* VG_(printf)("allocated %d %p\n", i, cgbs); */
7049             cgbs[i].start = arg[1];
7050             cgbs[i].size  = arg[2];
7051             cgbs[i].desc  = VG_(strdup)("mc.mhcr.1", (HChar *)arg[3]);
7052             cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
7053             *ret = i;
7054          } else
7055             *ret = -1;
7056          break;
7057 
7058       case VG_USERREQ__DISCARD: /* discard */
7059          if (cgbs == NULL
7060              || arg[2] >= cgb_used ||
7061              (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
7062             *ret = 1;
7063          } else {
7064             tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
7065             cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
7066             VG_(free)(cgbs[arg[2]].desc);
7067             cgb_discards++;
7068             *ret = 0;
7069          }
7070          break;
7071 
7072       case VG_USERREQ__GET_VBITS:
7073          *ret = mc_get_or_set_vbits_for_client
7074                    ( arg[1], arg[2], arg[3],
7075                      False /* get them */,
7076                      True /* is client request */ );
7077          break;
7078 
7079       case VG_USERREQ__SET_VBITS:
7080          *ret = mc_get_or_set_vbits_for_client
7081                    ( arg[1], arg[2], arg[3],
7082                      True /* set them */,
7083                      True /* is client request */ );
7084          break;
7085 
7086       case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
7087          UWord** argp = (UWord**)arg;
7088          // MC_(bytes_leaked) et al were set by the last leak check (or zero
7089          // if no prior leak checks performed).
7090          *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
7091          *argp[2] = MC_(bytes_dubious);
7092          *argp[3] = MC_(bytes_reachable);
7093          *argp[4] = MC_(bytes_suppressed);
7094          // there is no argp[5]
7095          //*argp[5] = MC_(bytes_indirect);
7096          // XXX need to make *argp[1-4] defined;  currently done in the
7097          // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
7098          *ret = 0;
7099          return True;
7100       }
7101       case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
7102          UWord** argp = (UWord**)arg;
7103          // MC_(blocks_leaked) et al were set by the last leak check (or zero
7104          // if no prior leak checks performed).
7105          *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
7106          *argp[2] = MC_(blocks_dubious);
7107          *argp[3] = MC_(blocks_reachable);
7108          *argp[4] = MC_(blocks_suppressed);
7109          // there is no argp[5]
7110          //*argp[5] = MC_(blocks_indirect);
7111          // XXX need to make *argp[1-4] defined;  currently done in the
7112          // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
7113          *ret = 0;
7114          return True;
7115       }
7116       case VG_USERREQ__MALLOCLIKE_BLOCK: {
7117          Addr p         = (Addr)arg[1];
7118          SizeT sizeB    =       arg[2];
7119          UInt rzB       =       arg[3];
7120          Bool is_zeroed = (Bool)arg[4];
7121 
7122          MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
7123                           MC_AllocCustom, MC_(malloc_list) );
7124          if (rzB > 0) {
7125             MC_(make_mem_noaccess) ( p - rzB, rzB);
7126             MC_(make_mem_noaccess) ( p + sizeB, rzB);
7127          }
7128          return True;
7129       }
7130       case VG_USERREQ__RESIZEINPLACE_BLOCK: {
7131          Addr p         = (Addr)arg[1];
7132          SizeT oldSizeB =       arg[2];
7133          SizeT newSizeB =       arg[3];
7134          UInt rzB       =       arg[4];
7135 
7136          MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
7137          return True;
7138       }
7139       case VG_USERREQ__FREELIKE_BLOCK: {
7140          Addr p         = (Addr)arg[1];
7141          UInt rzB       =       arg[2];
7142 
7143          MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
7144          return True;
7145       }
7146 
7147       case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
7148          HChar* s  = (HChar*)arg[1];
7149          Addr  dst = (Addr) arg[2];
7150          Addr  src = (Addr) arg[3];
7151          SizeT len = (SizeT)arg[4];
7152          MC_(record_overlap_error)(tid, s, src, dst, len);
7153          return True;
7154       }
7155 
7156       case VG_USERREQ__CREATE_MEMPOOL: {
7157          Addr pool      = (Addr)arg[1];
7158          UInt rzB       =       arg[2];
7159          Bool is_zeroed = (Bool)arg[3];
7160          UInt flags     =       arg[4];
7161 
7162          // The create_mempool function does not know these mempool flags,
7163          // pass as booleans.
7164          MC_(create_mempool) ( pool, rzB, is_zeroed,
7165                                (flags & VALGRIND_MEMPOOL_AUTO_FREE),
7166                                (flags & VALGRIND_MEMPOOL_METAPOOL) );
7167          return True;
7168       }
7169 
7170       case VG_USERREQ__DESTROY_MEMPOOL: {
7171          Addr pool      = (Addr)arg[1];
7172 
7173          MC_(destroy_mempool) ( pool );
7174          return True;
7175       }
7176 
7177       case VG_USERREQ__MEMPOOL_ALLOC: {
7178          Addr pool      = (Addr)arg[1];
7179          Addr addr      = (Addr)arg[2];
7180          UInt size      =       arg[3];
7181 
7182          MC_(mempool_alloc) ( tid, pool, addr, size );
7183          return True;
7184       }
7185 
7186       case VG_USERREQ__MEMPOOL_FREE: {
7187          Addr pool      = (Addr)arg[1];
7188          Addr addr      = (Addr)arg[2];
7189 
7190          MC_(mempool_free) ( pool, addr );
7191          return True;
7192       }
7193 
7194       case VG_USERREQ__MEMPOOL_TRIM: {
7195          Addr pool      = (Addr)arg[1];
7196          Addr addr      = (Addr)arg[2];
7197          UInt size      =       arg[3];
7198 
7199          MC_(mempool_trim) ( pool, addr, size );
7200          return True;
7201       }
7202 
7203       case VG_USERREQ__MOVE_MEMPOOL: {
7204          Addr poolA     = (Addr)arg[1];
7205          Addr poolB     = (Addr)arg[2];
7206 
7207          MC_(move_mempool) ( poolA, poolB );
7208          return True;
7209       }
7210 
7211       case VG_USERREQ__MEMPOOL_CHANGE: {
7212          Addr pool      = (Addr)arg[1];
7213          Addr addrA     = (Addr)arg[2];
7214          Addr addrB     = (Addr)arg[3];
7215          UInt size      =       arg[4];
7216 
7217          MC_(mempool_change) ( pool, addrA, addrB, size );
7218          return True;
7219       }
7220 
7221       case VG_USERREQ__MEMPOOL_EXISTS: {
7222          Addr pool      = (Addr)arg[1];
7223 
7224          *ret = (UWord) MC_(mempool_exists) ( pool );
7225 	 return True;
7226       }
7227 
7228       case VG_USERREQ__GDB_MONITOR_COMMAND: {
7229          Bool handled = handle_gdb_monitor_command (tid, (HChar*)arg[1]);
7230          if (handled)
7231             *ret = 1;
7232          else
7233             *ret = 0;
7234          return handled;
7235       }
7236 
7237       case VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE:
7238       case VG_USERREQ__ENABLE_ADDR_ERROR_REPORTING_IN_RANGE: {
7239          Bool addRange
7240             = arg[0] == VG_USERREQ__DISABLE_ADDR_ERROR_REPORTING_IN_RANGE;
7241          Bool ok
7242             = modify_ignore_ranges(addRange, arg[1], arg[2]);
7243          *ret = ok ? 1 : 0;
7244          return True;
7245       }
7246 
7247       default:
7248          VG_(message)(
7249             Vg_UserMsg,
7250             "Warning: unknown memcheck client request code %llx\n",
7251             (ULong)arg[0]
7252          );
7253          return False;
7254    }
7255    return True;
7256 }
7257 
7258 
7259 /*------------------------------------------------------------*/
7260 /*--- Crude profiling machinery.                           ---*/
7261 /*------------------------------------------------------------*/
7262 
7263 // We track a number of interesting events (using PROF_EVENT)
7264 // if MC_PROFILE_MEMORY is defined.
7265 
7266 #ifdef MC_PROFILE_MEMORY
7267 
7268 ULong  MC_(event_ctr)[MCPE_LAST];
7269 
7270 /* Event counter names. Use the name of the function that increases the
7271    event counter. Drop any MC_() and mc_ prefices. */
7272 static const HChar* MC_(event_ctr_name)[MCPE_LAST] = {
7273    [MCPE_LOADVN_SLOW] = "LOADVn_slow",
7274    [MCPE_LOADVN_SLOW_LOOP] = "LOADVn_slow_loop",
7275    [MCPE_STOREVN_SLOW] = "STOREVn_slow",
7276    [MCPE_STOREVN_SLOW_LOOP] = "STOREVn_slow(loop)",
7277    [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED] = "make_aligned_word32_undefined",
7278    [MCPE_MAKE_ALIGNED_WORD32_UNDEFINED_SLOW] =
7279         "make_aligned_word32_undefined_slow",
7280    [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED] = "make_aligned_word64_undefined",
7281    [MCPE_MAKE_ALIGNED_WORD64_UNDEFINED_SLOW] =
7282         "make_aligned_word64_undefined_slow",
7283    [MCPE_MAKE_ALIGNED_WORD32_NOACCESS] = "make_aligned_word32_noaccess",
7284    [MCPE_MAKE_ALIGNED_WORD32_NOACCESS_SLOW] =
7285          "make_aligned_word32_noaccess_slow",
7286    [MCPE_MAKE_ALIGNED_WORD64_NOACCESS] = "make_aligned_word64_noaccess",
7287    [MCPE_MAKE_ALIGNED_WORD64_NOACCESS_SLOW] =
7288         "make_aligned_word64_noaccess_slow",
7289    [MCPE_MAKE_MEM_NOACCESS] = "make_mem_noaccess",
7290    [MCPE_MAKE_MEM_UNDEFINED] = "make_mem_undefined",
7291    [MCPE_MAKE_MEM_UNDEFINED_W_OTAG] = "make_mem_undefined_w_otag",
7292    [MCPE_MAKE_MEM_DEFINED] = "make_mem_defined",
7293    [MCPE_CHEAP_SANITY_CHECK] = "cheap_sanity_check",
7294    [MCPE_EXPENSIVE_SANITY_CHECK] = "expensive_sanity_check",
7295    [MCPE_COPY_ADDRESS_RANGE_STATE] = "copy_address_range_state",
7296    [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP1] = "copy_address_range_state(loop1)",
7297    [MCPE_COPY_ADDRESS_RANGE_STATE_LOOP2] = "copy_address_range_state(loop2)",
7298    [MCPE_CHECK_MEM_IS_NOACCESS] = "check_mem_is_noaccess",
7299    [MCPE_CHECK_MEM_IS_NOACCESS_LOOP] = "check_mem_is_noaccess(loop)",
7300    [MCPE_IS_MEM_ADDRESSABLE] = "is_mem_addressable",
7301    [MCPE_IS_MEM_ADDRESSABLE_LOOP] = "is_mem_addressable(loop)",
7302    [MCPE_IS_MEM_DEFINED] = "is_mem_defined",
7303    [MCPE_IS_MEM_DEFINED_LOOP] = "is_mem_defined(loop)",
7304    [MCPE_IS_MEM_DEFINED_COMPREHENSIVE] = "is_mem_defined_comprehensive",
7305    [MCPE_IS_MEM_DEFINED_COMPREHENSIVE_LOOP] =
7306         "is_mem_defined_comprehensive(loop)",
7307    [MCPE_IS_DEFINED_ASCIIZ] = "is_defined_asciiz",
7308    [MCPE_IS_DEFINED_ASCIIZ_LOOP] = "is_defined_asciiz(loop)",
7309    [MCPE_FIND_CHUNK_FOR_OLD] = "find_chunk_for_OLD",
7310    [MCPE_FIND_CHUNK_FOR_OLD_LOOP] = "find_chunk_for_OLD(loop)",
7311    [MCPE_SET_ADDRESS_RANGE_PERMS] = "set_address_range_perms",
7312    [MCPE_SET_ADDRESS_RANGE_PERMS_SINGLE_SECMAP] =
7313         "set_address_range_perms(single-secmap)",
7314    [MCPE_SET_ADDRESS_RANGE_PERMS_STARTOF_SECMAP] =
7315         "set_address_range_perms(startof-secmap)",
7316    [MCPE_SET_ADDRESS_RANGE_PERMS_MULTIPLE_SECMAPS] =
7317    "set_address_range_perms(multiple-secmaps)",
7318    [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1] =
7319         "set_address_range_perms(dist-sm1)",
7320    [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2] =
7321         "set_address_range_perms(dist-sm2)",
7322    [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM1_QUICK] =
7323         "set_address_range_perms(dist-sm1-quick)",
7324    [MCPE_SET_ADDRESS_RANGE_PERMS_DIST_SM2_QUICK] =
7325         "set_address_range_perms(dist-sm2-quick)",
7326    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1A] = "set_address_range_perms(loop1a)",
7327    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1B] = "set_address_range_perms(loop1b)",
7328    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP1C] = "set_address_range_perms(loop1c)",
7329    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8A] = "set_address_range_perms(loop8a)",
7330    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP8B] = "set_address_range_perms(loop8b)",
7331    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K] = "set_address_range_perms(loop64K)",
7332    [MCPE_SET_ADDRESS_RANGE_PERMS_LOOP64K_FREE_DIST_SM] =
7333         "set_address_range_perms(loop64K-free-dist-sm)",
7334    [MCPE_LOADV_128_OR_256_SLOW_LOOP] = "LOADV_128_or_256_slow(loop)",
7335    [MCPE_LOADV_128_OR_256]       = "LOADV_128_or_256",
7336    [MCPE_LOADV_128_OR_256_SLOW1] = "LOADV_128_or_256-slow1",
7337    [MCPE_LOADV_128_OR_256_SLOW2] = "LOADV_128_or_256-slow2",
7338    [MCPE_LOADV64]        = "LOADV64",
7339    [MCPE_LOADV64_SLOW1]  = "LOADV64-slow1",
7340    [MCPE_LOADV64_SLOW2]  = "LOADV64-slow2",
7341    [MCPE_STOREV64]       = "STOREV64",
7342    [MCPE_STOREV64_SLOW1] = "STOREV64-slow1",
7343    [MCPE_STOREV64_SLOW2] = "STOREV64-slow2",
7344    [MCPE_STOREV64_SLOW3] = "STOREV64-slow3",
7345    [MCPE_STOREV64_SLOW4] = "STOREV64-slow4",
7346    [MCPE_LOADV32]        = "LOADV32",
7347    [MCPE_LOADV32_SLOW1]  = "LOADV32-slow1",
7348    [MCPE_LOADV32_SLOW2]  = "LOADV32-slow2",
7349    [MCPE_STOREV32]       = "STOREV32",
7350    [MCPE_STOREV32_SLOW1] = "STOREV32-slow1",
7351    [MCPE_STOREV32_SLOW2] = "STOREV32-slow2",
7352    [MCPE_STOREV32_SLOW3] = "STOREV32-slow3",
7353    [MCPE_STOREV32_SLOW4] = "STOREV32-slow4",
7354    [MCPE_LOADV16]        = "LOADV16",
7355    [MCPE_LOADV16_SLOW1]  = "LOADV16-slow1",
7356    [MCPE_LOADV16_SLOW2]  = "LOADV16-slow2",
7357    [MCPE_STOREV16]       = "STOREV16",
7358    [MCPE_STOREV16_SLOW1] = "STOREV16-slow1",
7359    [MCPE_STOREV16_SLOW2] = "STOREV16-slow2",
7360    [MCPE_STOREV16_SLOW3] = "STOREV16-slow3",
7361    [MCPE_STOREV16_SLOW4] = "STOREV16-slow4",
7362    [MCPE_LOADV8]         = "LOADV8",
7363    [MCPE_LOADV8_SLOW1]   = "LOADV8-slow1",
7364    [MCPE_LOADV8_SLOW2]   = "LOADV8-slow2",
7365    [MCPE_STOREV8]        = "STOREV8",
7366    [MCPE_STOREV8_SLOW1]  = "STOREV8-slow1",
7367    [MCPE_STOREV8_SLOW2]  = "STOREV8-slow2",
7368    [MCPE_STOREV8_SLOW3]  = "STOREV8-slow3",
7369    [MCPE_STOREV8_SLOW4]  = "STOREV8-slow4",
7370    [MCPE_NEW_MEM_STACK_4]   = "new_mem_stack_4",
7371    [MCPE_NEW_MEM_STACK_8]   = "new_mem_stack_8",
7372    [MCPE_NEW_MEM_STACK_12]  = "new_mem_stack_12",
7373    [MCPE_NEW_MEM_STACK_16]  = "new_mem_stack_16",
7374    [MCPE_NEW_MEM_STACK_32]  = "new_mem_stack_32",
7375    [MCPE_NEW_MEM_STACK_112] = "new_mem_stack_112",
7376    [MCPE_NEW_MEM_STACK_128] = "new_mem_stack_128",
7377    [MCPE_NEW_MEM_STACK_144] = "new_mem_stack_144",
7378    [MCPE_NEW_MEM_STACK_160] = "new_mem_stack_160",
7379    [MCPE_DIE_MEM_STACK_4]   = "die_mem_stack_4",
7380    [MCPE_DIE_MEM_STACK_8]   = "die_mem_stack_8",
7381    [MCPE_DIE_MEM_STACK_12]  = "die_mem_stack_12",
7382    [MCPE_DIE_MEM_STACK_16]  = "die_mem_stack_16",
7383    [MCPE_DIE_MEM_STACK_32]  = "die_mem_stack_32",
7384    [MCPE_DIE_MEM_STACK_112] = "die_mem_stack_112",
7385    [MCPE_DIE_MEM_STACK_128] = "die_mem_stack_128",
7386    [MCPE_DIE_MEM_STACK_144] = "die_mem_stack_144",
7387    [MCPE_DIE_MEM_STACK_160] = "die_mem_stack_160",
7388    [MCPE_NEW_MEM_STACK]     = "new_mem_stack",
7389    [MCPE_DIE_MEM_STACK]     = "die_mem_stack",
7390    [MCPE_MAKE_STACK_UNINIT_W_O]      = "MAKE_STACK_UNINIT_w_o",
7391    [MCPE_MAKE_STACK_UNINIT_NO_O]     = "MAKE_STACK_UNINIT_no_o",
7392    [MCPE_MAKE_STACK_UNINIT_128_NO_O] = "MAKE_STACK_UNINIT_128_no_o",
7393    [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_16]
7394                                      = "MAKE_STACK_UNINIT_128_no_o_aligned_16",
7395    [MCPE_MAKE_STACK_UNINIT_128_NO_O_ALIGNED_8]
7396                                      = "MAKE_STACK_UNINIT_128_no_o_aligned_8",
7397    [MCPE_MAKE_STACK_UNINIT_128_NO_O_SLOWCASE]
7398                                      = "MAKE_STACK_UNINIT_128_no_o_slowcase",
7399 };
7400 
init_prof_mem(void)7401 static void init_prof_mem ( void )
7402 {
7403    Int i, name_count = 0;
7404 
7405    for (i = 0; i < MCPE_LAST; i++) {
7406       MC_(event_ctr)[i] = 0;
7407       if (MC_(event_ctr_name)[i] != NULL)
7408          ++name_count;
7409    }
7410 
7411    /* Make sure every profiling event has a name */
7412    tl_assert(name_count == MCPE_LAST);
7413 }
7414 
done_prof_mem(void)7415 static void done_prof_mem ( void )
7416 {
7417    Int  i, n;
7418    Bool spaced = False;
7419    for (i = n = 0; i < MCPE_LAST; i++) {
7420       if (!spaced && (n % 10) == 0) {
7421          VG_(printf)("\n");
7422          spaced = True;
7423       }
7424       if (MC_(event_ctr)[i] > 0) {
7425          spaced = False;
7426          ++n;
7427          VG_(printf)( "prof mem event %3d: %11llu   %s\n",
7428                       i, MC_(event_ctr)[i],
7429                       MC_(event_ctr_name)[i]);
7430       }
7431    }
7432 }
7433 
7434 #else
7435 
init_prof_mem(void)7436 static void init_prof_mem ( void ) { }
done_prof_mem(void)7437 static void done_prof_mem ( void ) { }
7438 
7439 #endif
7440 
7441 
7442 /*------------------------------------------------------------*/
7443 /*--- Origin tracking stuff                                ---*/
7444 /*------------------------------------------------------------*/
7445 
7446 /*--------------------------------------------*/
7447 /*--- Origin tracking: load handlers       ---*/
7448 /*--------------------------------------------*/
7449 
merge_origins(UInt or1,UInt or2)7450 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
7451    return or1 > or2 ? or1 : or2;
7452 }
7453 
MC_(helperc_b_load1)7454 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
7455    OCacheLine* line;
7456    UChar descr;
7457    UWord lineoff = oc_line_offset(a);
7458    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7459 
7460    if (OC_ENABLE_ASSERTIONS) {
7461       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7462    }
7463 
7464    line = find_OCacheLine( a );
7465 
7466    descr = line->descr[lineoff];
7467    if (OC_ENABLE_ASSERTIONS) {
7468       tl_assert(descr < 0x10);
7469    }
7470 
7471    if (LIKELY(0 == (descr & (1 << byteoff))))  {
7472       return 0;
7473    } else {
7474       return line->w32[lineoff];
7475    }
7476 }
7477 
MC_(helperc_b_load2)7478 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
7479    OCacheLine* line;
7480    UChar descr;
7481    UWord lineoff, byteoff;
7482 
7483    if (UNLIKELY(a & 1)) {
7484       /* Handle misaligned case, slowly. */
7485       UInt oLo   = (UInt)MC_(helperc_b_load1)( a + 0 );
7486       UInt oHi   = (UInt)MC_(helperc_b_load1)( a + 1 );
7487       return merge_origins(oLo, oHi);
7488    }
7489 
7490    lineoff = oc_line_offset(a);
7491    byteoff = a & 3; /* 0 or 2 */
7492 
7493    if (OC_ENABLE_ASSERTIONS) {
7494       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7495    }
7496    line = find_OCacheLine( a );
7497 
7498    descr = line->descr[lineoff];
7499    if (OC_ENABLE_ASSERTIONS) {
7500       tl_assert(descr < 0x10);
7501    }
7502 
7503    if (LIKELY(0 == (descr & (3 << byteoff)))) {
7504       return 0;
7505    } else {
7506       return line->w32[lineoff];
7507    }
7508 }
7509 
MC_(helperc_b_load4)7510 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
7511    OCacheLine* line;
7512    UChar descr;
7513    UWord lineoff;
7514 
7515    if (UNLIKELY(a & 3)) {
7516       /* Handle misaligned case, slowly. */
7517       UInt oLo   = (UInt)MC_(helperc_b_load2)( a + 0 );
7518       UInt oHi   = (UInt)MC_(helperc_b_load2)( a + 2 );
7519       return merge_origins(oLo, oHi);
7520    }
7521 
7522    lineoff = oc_line_offset(a);
7523    if (OC_ENABLE_ASSERTIONS) {
7524       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7525    }
7526 
7527    line = find_OCacheLine( a );
7528 
7529    descr = line->descr[lineoff];
7530    if (OC_ENABLE_ASSERTIONS) {
7531       tl_assert(descr < 0x10);
7532    }
7533 
7534    if (LIKELY(0 == descr)) {
7535       return 0;
7536    } else {
7537       return line->w32[lineoff];
7538    }
7539 }
7540 
MC_(helperc_b_load8)7541 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
7542    OCacheLine* line;
7543    UChar descrLo, descrHi, descr;
7544    UWord lineoff;
7545 
7546    if (UNLIKELY(a & 7)) {
7547       /* Handle misaligned case, slowly. */
7548       UInt oLo   = (UInt)MC_(helperc_b_load4)( a + 0 );
7549       UInt oHi   = (UInt)MC_(helperc_b_load4)( a + 4 );
7550       return merge_origins(oLo, oHi);
7551    }
7552 
7553    lineoff = oc_line_offset(a);
7554    if (OC_ENABLE_ASSERTIONS) {
7555       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7556    }
7557 
7558    line = find_OCacheLine( a );
7559 
7560    descrLo = line->descr[lineoff + 0];
7561    descrHi = line->descr[lineoff + 1];
7562    descr   = descrLo | descrHi;
7563    if (OC_ENABLE_ASSERTIONS) {
7564       tl_assert(descr < 0x10);
7565    }
7566 
7567    if (LIKELY(0 == descr)) {
7568       return 0; /* both 32-bit chunks are defined */
7569    } else {
7570       UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
7571       UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
7572       return merge_origins(oLo, oHi);
7573    }
7574 }
7575 
MC_(helperc_b_load16)7576 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
7577    UInt oLo   = (UInt)MC_(helperc_b_load8)( a + 0 );
7578    UInt oHi   = (UInt)MC_(helperc_b_load8)( a + 8 );
7579    UInt oBoth = merge_origins(oLo, oHi);
7580    return (UWord)oBoth;
7581 }
7582 
MC_(helperc_b_load32)7583 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
7584    UInt oQ0   = (UInt)MC_(helperc_b_load8)( a + 0 );
7585    UInt oQ1   = (UInt)MC_(helperc_b_load8)( a + 8 );
7586    UInt oQ2   = (UInt)MC_(helperc_b_load8)( a + 16 );
7587    UInt oQ3   = (UInt)MC_(helperc_b_load8)( a + 24 );
7588    UInt oAll  = merge_origins(merge_origins(oQ0, oQ1),
7589                               merge_origins(oQ2, oQ3));
7590    return (UWord)oAll;
7591 }
7592 
7593 
7594 /*--------------------------------------------*/
7595 /*--- Origin tracking: store handlers      ---*/
7596 /*--------------------------------------------*/
7597 
MC_(helperc_b_store1)7598 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
7599    OCacheLine* line;
7600    UWord lineoff = oc_line_offset(a);
7601    UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
7602 
7603    if (OC_ENABLE_ASSERTIONS) {
7604       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7605    }
7606 
7607    line = find_OCacheLine( a );
7608 
7609    if (d32 == 0) {
7610       line->descr[lineoff] &= ~(1 << byteoff);
7611    } else {
7612       line->descr[lineoff] |= (1 << byteoff);
7613       line->w32[lineoff] = d32;
7614    }
7615 }
7616 
MC_(helperc_b_store2)7617 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
7618    OCacheLine* line;
7619    UWord lineoff, byteoff;
7620 
7621    if (UNLIKELY(a & 1)) {
7622       /* Handle misaligned case, slowly. */
7623       MC_(helperc_b_store1)( a + 0, d32 );
7624       MC_(helperc_b_store1)( a + 1, d32 );
7625       return;
7626    }
7627 
7628    lineoff = oc_line_offset(a);
7629    byteoff = a & 3; /* 0 or 2 */
7630 
7631    if (OC_ENABLE_ASSERTIONS) {
7632       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7633    }
7634 
7635    line = find_OCacheLine( a );
7636 
7637    if (d32 == 0) {
7638       line->descr[lineoff] &= ~(3 << byteoff);
7639    } else {
7640       line->descr[lineoff] |= (3 << byteoff);
7641       line->w32[lineoff] = d32;
7642    }
7643 }
7644 
MC_(helperc_b_store4)7645 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
7646    OCacheLine* line;
7647    UWord lineoff;
7648 
7649    if (UNLIKELY(a & 3)) {
7650       /* Handle misaligned case, slowly. */
7651       MC_(helperc_b_store2)( a + 0, d32 );
7652       MC_(helperc_b_store2)( a + 2, d32 );
7653       return;
7654    }
7655 
7656    lineoff = oc_line_offset(a);
7657    if (OC_ENABLE_ASSERTIONS) {
7658       tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
7659    }
7660 
7661    line = find_OCacheLine( a );
7662 
7663    if (d32 == 0) {
7664       line->descr[lineoff] = 0;
7665    } else {
7666       line->descr[lineoff] = 0xF;
7667       line->w32[lineoff] = d32;
7668    }
7669 }
7670 
MC_(helperc_b_store8)7671 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
7672    OCacheLine* line;
7673    UWord lineoff;
7674 
7675    if (UNLIKELY(a & 7)) {
7676       /* Handle misaligned case, slowly. */
7677       MC_(helperc_b_store4)( a + 0, d32 );
7678       MC_(helperc_b_store4)( a + 4, d32 );
7679       return;
7680    }
7681 
7682    lineoff = oc_line_offset(a);
7683    if (OC_ENABLE_ASSERTIONS) {
7684       tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
7685    }
7686 
7687    line = find_OCacheLine( a );
7688 
7689    if (d32 == 0) {
7690       line->descr[lineoff + 0] = 0;
7691       line->descr[lineoff + 1] = 0;
7692    } else {
7693       line->descr[lineoff + 0] = 0xF;
7694       line->descr[lineoff + 1] = 0xF;
7695       line->w32[lineoff + 0] = d32;
7696       line->w32[lineoff + 1] = d32;
7697    }
7698 }
7699 
MC_(helperc_b_store16)7700 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
7701    MC_(helperc_b_store8)( a + 0, d32 );
7702    MC_(helperc_b_store8)( a + 8, d32 );
7703 }
7704 
MC_(helperc_b_store32)7705 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
7706    MC_(helperc_b_store8)( a +  0, d32 );
7707    MC_(helperc_b_store8)( a +  8, d32 );
7708    MC_(helperc_b_store8)( a + 16, d32 );
7709    MC_(helperc_b_store8)( a + 24, d32 );
7710 }
7711 
7712 
7713 /*--------------------------------------------*/
7714 /*--- Origin tracking: sarp handlers       ---*/
7715 /*--------------------------------------------*/
7716 
7717 __attribute__((noinline))
ocache_sarp_Set_Origins(Addr a,UWord len,UInt otag)7718 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
7719    if ((a & 1) && len >= 1) {
7720       MC_(helperc_b_store1)( a, otag );
7721       a++;
7722       len--;
7723    }
7724    if ((a & 2) && len >= 2) {
7725       MC_(helperc_b_store2)( a, otag );
7726       a += 2;
7727       len -= 2;
7728    }
7729    if (len >= 4)
7730       tl_assert(0 == (a & 3));
7731    while (len >= 4) {
7732       MC_(helperc_b_store4)( a, otag );
7733       a += 4;
7734       len -= 4;
7735    }
7736    if (len >= 2) {
7737       MC_(helperc_b_store2)( a, otag );
7738       a += 2;
7739       len -= 2;
7740    }
7741    if (len >= 1) {
7742       MC_(helperc_b_store1)( a, otag );
7743       //a++;
7744       len--;
7745    }
7746    tl_assert(len == 0);
7747 }
7748 
7749 __attribute__((noinline))
ocache_sarp_Clear_Origins(Addr a,UWord len)7750 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
7751    if ((a & 1) && len >= 1) {
7752       MC_(helperc_b_store1)( a, 0 );
7753       a++;
7754       len--;
7755    }
7756    if ((a & 2) && len >= 2) {
7757       MC_(helperc_b_store2)( a, 0 );
7758       a += 2;
7759       len -= 2;
7760    }
7761    if (len >= 4)
7762       tl_assert(0 == (a & 3));
7763    while (len >= 4) {
7764       MC_(helperc_b_store4)( a, 0 );
7765       a += 4;
7766       len -= 4;
7767    }
7768    if (len >= 2) {
7769       MC_(helperc_b_store2)( a, 0 );
7770       a += 2;
7771       len -= 2;
7772    }
7773    if (len >= 1) {
7774       MC_(helperc_b_store1)( a, 0 );
7775       //a++;
7776       len--;
7777    }
7778    tl_assert(len == 0);
7779 }
7780 
7781 
7782 /*------------------------------------------------------------*/
7783 /*--- Setup and finalisation                               ---*/
7784 /*------------------------------------------------------------*/
7785 
mc_post_clo_init(void)7786 static void mc_post_clo_init ( void )
7787 {
7788    /* If we've been asked to emit XML, mash around various other
7789       options so as to constrain the output somewhat. */
7790    if (VG_(clo_xml)) {
7791       /* Extract as much info as possible from the leak checker. */
7792       MC_(clo_leak_check) = LC_Full;
7793    }
7794 
7795    if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol)
7796        && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
7797       VG_(message)(Vg_UserMsg,
7798                    "Warning: --freelist-big-blocks value %lld has no effect\n"
7799                    "as it is >= to --freelist-vol value %lld\n",
7800                    MC_(clo_freelist_big_blocks),
7801                    MC_(clo_freelist_vol));
7802    }
7803 
7804    if (MC_(clo_workaround_gcc296_bugs)
7805        && VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
7806       VG_(umsg)(
7807          "Warning: --workaround-gcc296-bugs=yes is deprecated.\n"
7808          "Warning: Instead use: --ignore-range-below-sp=1024-1\n"
7809          "\n"
7810       );
7811    }
7812 
7813    tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
7814 
7815    if (MC_(clo_mc_level) == 3) {
7816       /* We're doing origin tracking. */
7817 #     ifdef PERF_FAST_STACK
7818       VG_(track_new_mem_stack_4_w_ECU)   ( mc_new_mem_stack_4_w_ECU   );
7819       VG_(track_new_mem_stack_8_w_ECU)   ( mc_new_mem_stack_8_w_ECU   );
7820       VG_(track_new_mem_stack_12_w_ECU)  ( mc_new_mem_stack_12_w_ECU  );
7821       VG_(track_new_mem_stack_16_w_ECU)  ( mc_new_mem_stack_16_w_ECU  );
7822       VG_(track_new_mem_stack_32_w_ECU)  ( mc_new_mem_stack_32_w_ECU  );
7823       VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
7824       VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
7825       VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
7826       VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
7827 #     endif
7828       VG_(track_new_mem_stack_w_ECU)     ( mc_new_mem_stack_w_ECU     );
7829       VG_(track_new_mem_stack_signal)    ( mc_new_mem_w_tid_make_ECU );
7830    } else {
7831       /* Not doing origin tracking */
7832 #     ifdef PERF_FAST_STACK
7833       VG_(track_new_mem_stack_4)   ( mc_new_mem_stack_4   );
7834       VG_(track_new_mem_stack_8)   ( mc_new_mem_stack_8   );
7835       VG_(track_new_mem_stack_12)  ( mc_new_mem_stack_12  );
7836       VG_(track_new_mem_stack_16)  ( mc_new_mem_stack_16  );
7837       VG_(track_new_mem_stack_32)  ( mc_new_mem_stack_32  );
7838       VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
7839       VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
7840       VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
7841       VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
7842 #     endif
7843       VG_(track_new_mem_stack)     ( mc_new_mem_stack     );
7844       VG_(track_new_mem_stack_signal) ( mc_new_mem_w_tid_no_ECU );
7845    }
7846 
7847    // We assume that brk()/sbrk() does not initialise new memory.  Is this
7848    // accurate?  John Reiser says:
7849    //
7850    //   0) sbrk() can *decrease* process address space.  No zero fill is done
7851    //   for a decrease, not even the fragment on the high end of the last page
7852    //   that is beyond the new highest address.  For maximum safety and
7853    //   portability, then the bytes in the last page that reside above [the
7854    //   new] sbrk(0) should be considered to be uninitialized, but in practice
7855    //   it is exceedingly likely that they will retain their previous
7856    //   contents.
7857    //
7858    //   1) If an increase is large enough to require new whole pages, then
7859    //   those new whole pages (like all new pages) are zero-filled by the
7860    //   operating system.  So if sbrk(0) already is page aligned, then
7861    //   sbrk(PAGE_SIZE) *does* zero-fill the new memory.
7862    //
7863    //   2) Any increase that lies within an existing allocated page is not
7864    //   changed.  So if (x = sbrk(0)) is not page aligned, then
7865    //   sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
7866    //   existing contents, and an additional PAGE_SIZE bytes which are zeroed.
7867    //   ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
7868    //   of them come along for the ride because the operating system deals
7869    //   only in whole pages.  Again, for maximum safety and portability, then
7870    //   anything that lives above [the new] sbrk(0) should be considered
7871    //   uninitialized, but in practice will retain previous contents [zero in
7872    //   this case.]"
7873    //
7874    // In short:
7875    //
7876    //   A key property of sbrk/brk is that new whole pages that are supplied
7877    //   by the operating system *do* get initialized to zero.
7878    //
7879    // As for the portability of all this:
7880    //
7881    //   sbrk and brk are not POSIX.  However, any system that is a derivative
7882    //   of *nix has sbrk and brk because there are too many software (such as
7883    //   the Bourne shell) which rely on the traditional memory map (.text,
7884    //   .data+.bss, stack) and the existence of sbrk/brk.
7885    //
7886    // So we should arguably observe all this.  However:
7887    // - The current inaccuracy has caused maybe one complaint in seven years(?)
7888    // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
7889    //   doubt most programmers know the above information.
7890    // So I'm not terribly unhappy with marking it as undefined. --njn.
7891    //
7892    // [More:  I think most of what John said only applies to sbrk().  It seems
7893    // that brk() always deals in whole pages.  And since this event deals
7894    // directly with brk(), not with sbrk(), perhaps it would be reasonable to
7895    // just mark all memory it allocates as defined.]
7896    //
7897 #  if !defined(VGO_solaris)
7898    if (MC_(clo_mc_level) == 3)
7899       VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_make_ECU );
7900    else
7901       VG_(track_new_mem_brk)         ( mc_new_mem_w_tid_no_ECU );
7902 #  else
7903    // On Solaris, brk memory has to be marked as defined, otherwise we get
7904    // many false positives.
7905    VG_(track_new_mem_brk)         ( make_mem_defined_w_tid );
7906 #  endif
7907 
7908    /* This origin tracking cache is huge (~100M), so only initialise
7909       if we need it. */
7910    if (MC_(clo_mc_level) >= 3) {
7911       init_OCache();
7912       tl_assert(ocacheL1 != NULL);
7913       tl_assert(ocacheL2 != NULL);
7914    } else {
7915       tl_assert(ocacheL1 == NULL);
7916       tl_assert(ocacheL2 == NULL);
7917    }
7918 
7919    MC_(chunk_poolalloc) = VG_(newPA)
7920       (sizeof(MC_Chunk) + MC_(n_where_pointers)() * sizeof(ExeContext*),
7921        1000,
7922        VG_(malloc),
7923        "mc.cMC.1 (MC_Chunk pools)",
7924        VG_(free));
7925 
7926    /* Do not check definedness of guest state if --undef-value-errors=no */
7927    if (MC_(clo_mc_level) >= 2)
7928       VG_(track_pre_reg_read) ( mc_pre_reg_read );
7929 
7930    if (VG_(clo_xtree_memory) == Vg_XTMemory_Full) {
7931       if (MC_(clo_keep_stacktraces) == KS_none
7932           || MC_(clo_keep_stacktraces) == KS_free)
7933          VG_(fmsg_bad_option)("--keep-stacktraces",
7934                               "To use --xtree-memory=full, you must"
7935                               " keep at least the alloc stacktrace\n");
7936       // Activate full xtree memory profiling.
7937       VG_(XTMemory_Full_init)(VG_(XT_filter_1top_and_maybe_below_main));
7938    }
7939 
7940 }
7941 
print_SM_info(const HChar * type,Int n_SMs)7942 static void print_SM_info(const HChar* type, Int n_SMs)
7943 {
7944    VG_(message)(Vg_DebugMsg,
7945       " memcheck: SMs: %s = %d (%luk, %luM)\n",
7946       type,
7947       n_SMs,
7948       n_SMs * sizeof(SecMap) / 1024UL,
7949       n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
7950 }
7951 
mc_print_stats(void)7952 static void mc_print_stats (void)
7953 {
7954    SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
7955 
7956    VG_(message)(Vg_DebugMsg, " memcheck: freelist: vol %lld length %lld\n",
7957                 VG_(free_queue_volume), VG_(free_queue_length));
7958    VG_(message)(Vg_DebugMsg,
7959       " memcheck: sanity checks: %d cheap, %d expensive\n",
7960       n_sanity_cheap, n_sanity_expensive );
7961    VG_(message)(Vg_DebugMsg,
7962       " memcheck: auxmaps: %llu auxmap entries (%lluk, %lluM) in use\n",
7963       n_auxmap_L2_nodes,
7964       n_auxmap_L2_nodes * 64,
7965       n_auxmap_L2_nodes / 16 );
7966    VG_(message)(Vg_DebugMsg,
7967       " memcheck: auxmaps_L1: %llu searches, %llu cmps, ratio %llu:10\n",
7968       n_auxmap_L1_searches, n_auxmap_L1_cmps,
7969       (10ULL * n_auxmap_L1_cmps)
7970          / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
7971    );
7972    VG_(message)(Vg_DebugMsg,
7973       " memcheck: auxmaps_L2: %llu searches, %llu nodes\n",
7974       n_auxmap_L2_searches, n_auxmap_L2_nodes
7975    );
7976 
7977    print_SM_info("n_issued     ", n_issued_SMs);
7978    print_SM_info("n_deissued   ", n_deissued_SMs);
7979    print_SM_info("max_noaccess ", max_noaccess_SMs);
7980    print_SM_info("max_undefined", max_undefined_SMs);
7981    print_SM_info("max_defined  ", max_defined_SMs);
7982    print_SM_info("max_non_DSM  ", max_non_DSM_SMs);
7983 
7984    // Three DSMs, plus the non-DSM ones
7985    max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
7986    // The 3*sizeof(Word) bytes is the AVL node metadata size.
7987    // The VG_ROUNDUP is because the OSet pool allocator will/must align
7988    // the elements on pointer size.
7989    // Note that the pool allocator has some additional small overhead
7990    // which is not counted in the below.
7991    // Hardwiring this logic sucks, but I don't see how else to do it.
7992    max_secVBit_szB = max_secVBit_nodes *
7993          (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
7994    max_shmem_szB   = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
7995 
7996    VG_(message)(Vg_DebugMsg,
7997       " memcheck: max sec V bit nodes:    %d (%luk, %luM)\n",
7998       max_secVBit_nodes, max_secVBit_szB / 1024,
7999                          max_secVBit_szB / (1024 * 1024));
8000    VG_(message)(Vg_DebugMsg,
8001       " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
8002       sec_vbits_new_nodes + sec_vbits_updates,
8003       sec_vbits_new_nodes, sec_vbits_updates );
8004    VG_(message)(Vg_DebugMsg,
8005       " memcheck: max shadow mem size:   %luk, %luM\n",
8006       max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
8007 
8008    if (MC_(clo_mc_level) >= 3) {
8009       VG_(message)(Vg_DebugMsg,
8010                    " ocacheL1: %'12lu refs   %'12lu misses (%'lu lossage)\n",
8011                    stats_ocacheL1_find,
8012                    stats_ocacheL1_misses,
8013                    stats_ocacheL1_lossage );
8014       VG_(message)(Vg_DebugMsg,
8015                    " ocacheL1: %'12lu at 0   %'12lu at 1\n",
8016                    stats_ocacheL1_find - stats_ocacheL1_misses
8017                       - stats_ocacheL1_found_at_1
8018                       - stats_ocacheL1_found_at_N,
8019                    stats_ocacheL1_found_at_1 );
8020       VG_(message)(Vg_DebugMsg,
8021                    " ocacheL1: %'12lu at 2+  %'12lu move-fwds\n",
8022                    stats_ocacheL1_found_at_N,
8023                    stats_ocacheL1_movefwds );
8024       VG_(message)(Vg_DebugMsg,
8025                    " ocacheL1: %'12lu sizeB  %'12d useful\n",
8026                    (SizeT)sizeof(OCache),
8027                    4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
8028       VG_(message)(Vg_DebugMsg,
8029                    " ocacheL2: %'12lu refs   %'12lu misses\n",
8030                    stats__ocacheL2_refs,
8031                    stats__ocacheL2_misses );
8032       VG_(message)(Vg_DebugMsg,
8033                    " ocacheL2:    %'9lu max nodes %'9lu curr nodes\n",
8034                    stats__ocacheL2_n_nodes_max,
8035                    stats__ocacheL2_n_nodes );
8036       VG_(message)(Vg_DebugMsg,
8037                    " niacache: %'12lu refs   %'12lu misses\n",
8038                    stats__nia_cache_queries, stats__nia_cache_misses);
8039    } else {
8040       tl_assert(ocacheL1 == NULL);
8041       tl_assert(ocacheL2 == NULL);
8042    }
8043 }
8044 
8045 
mc_fini(Int exitcode)8046 static void mc_fini ( Int exitcode )
8047 {
8048    MC_(xtmemory_report) (VG_(clo_xtree_memory_file), True);
8049    MC_(print_malloc_stats)();
8050 
8051    if (MC_(clo_leak_check) != LC_Off) {
8052       LeakCheckParams lcp;
8053       HChar* xt_filename = NULL;
8054       lcp.mode = MC_(clo_leak_check);
8055       lcp.show_leak_kinds = MC_(clo_show_leak_kinds);
8056       lcp.heuristics = MC_(clo_leak_check_heuristics);
8057       lcp.errors_for_leak_kinds = MC_(clo_error_for_leak_kinds);
8058       lcp.deltamode = LCD_Any;
8059       lcp.max_loss_records_output = 999999999;
8060       lcp.requested_by_monitor_command = False;
8061       if (MC_(clo_xtree_leak)) {
8062          xt_filename = VG_(expand_file_name)("--xtree-leak-file",
8063                                              MC_(clo_xtree_leak_file));
8064          lcp.xt_filename = xt_filename;
8065          lcp.mode = LC_Full;
8066       }
8067       else
8068          lcp.xt_filename = NULL;
8069       MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
8070       if (MC_(clo_xtree_leak))
8071          VG_(free)(xt_filename);
8072    } else {
8073       if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
8074          VG_(umsg)(
8075             "For a detailed leak analysis, rerun with: --leak-check=full\n"
8076             "\n"
8077          );
8078       }
8079    }
8080 
8081    if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
8082       VG_(message)(Vg_UserMsg,
8083                    "For counts of detected and suppressed errors, rerun with: -v\n");
8084    }
8085 
8086    if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
8087        && MC_(clo_mc_level) == 2) {
8088       VG_(message)(Vg_UserMsg,
8089                    "Use --track-origins=yes to see where "
8090                    "uninitialised values come from\n");
8091    }
8092 
8093    /* Print a warning if any client-request generated ignore-ranges
8094       still exist.  It would be reasonable to expect that a properly
8095       written program would remove any such ranges before exiting, and
8096       since they are a bit on the dangerous side, let's comment.  By
8097       contrast ranges which are specified on the command line normally
8098       pertain to hardware mapped into the address space, and so we
8099       can't expect the client to have got rid of them. */
8100    if (gIgnoredAddressRanges) {
8101       UInt i, nBad = 0;
8102       for (i = 0; i < VG_(sizeRangeMap)(gIgnoredAddressRanges); i++) {
8103          UWord val     = IAR_INVALID;
8104          UWord key_min = ~(UWord)0;
8105          UWord key_max = (UWord)0;
8106          VG_(indexRangeMap)( &key_min, &key_max, &val,
8107                              gIgnoredAddressRanges, i );
8108          if (val != IAR_ClientReq)
8109            continue;
8110          /* Print the offending range.  Also, if it is the first,
8111             print a banner before it. */
8112          nBad++;
8113          if (nBad == 1) {
8114             VG_(umsg)(
8115               "WARNING: exiting program has the following client-requested\n"
8116               "WARNING: address error disablement range(s) still in force,\n"
8117               "WARNING: "
8118                  "possibly as a result of some mistake in the use of the\n"
8119               "WARNING: "
8120                  "VALGRIND_{DISABLE,ENABLE}_ERROR_REPORTING_IN_RANGE macros.\n"
8121             );
8122          }
8123          VG_(umsg)("   [%u]  0x%016lx-0x%016lx  %s\n",
8124                    i, key_min, key_max, showIARKind(val));
8125       }
8126    }
8127 
8128    done_prof_mem();
8129 
8130    if (VG_(clo_stats))
8131       mc_print_stats();
8132 
8133    if (0) {
8134       VG_(message)(Vg_DebugMsg,
8135         "------ Valgrind's client block stats follow ---------------\n" );
8136       show_client_block_stats();
8137    }
8138 }
8139 
8140 /* mark the given addr/len unaddressable for watchpoint implementation
8141    The PointKind will be handled at access time */
mc_mark_unaddressable_for_watchpoint(PointKind kind,Bool insert,Addr addr,SizeT len)8142 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
8143                                                   Addr addr, SizeT len)
8144 {
8145    /* GDBTD this is somewhat fishy. We might rather have to save the previous
8146       accessibility and definedness in gdbserver so as to allow restoring it
8147       properly. Currently, we assume that the user only watches things
8148       which are properly addressable and defined */
8149    if (insert)
8150       MC_(make_mem_noaccess) (addr, len);
8151    else
8152       MC_(make_mem_defined)  (addr, len);
8153    return True;
8154 }
8155 
mc_pre_clo_init(void)8156 static void mc_pre_clo_init(void)
8157 {
8158    VG_(details_name)            ("Memcheck");
8159    VG_(details_version)         (NULL);
8160    VG_(details_description)     ("a memory error detector");
8161    VG_(details_copyright_author)(
8162       "Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.");
8163    VG_(details_bug_reports_to)  (VG_BUGS_TO);
8164    VG_(details_avg_translation_sizeB) ( 640 );
8165 
8166    VG_(basic_tool_funcs)          (mc_post_clo_init,
8167                                    MC_(instrument),
8168                                    mc_fini);
8169 
8170    VG_(needs_final_IR_tidy_pass)  ( MC_(final_tidy) );
8171 
8172 
8173    VG_(needs_core_errors)         ();
8174    VG_(needs_tool_errors)         (MC_(eq_Error),
8175                                    MC_(before_pp_Error),
8176                                    MC_(pp_Error),
8177                                    True,/*show TIDs for errors*/
8178                                    MC_(update_Error_extra),
8179                                    MC_(is_recognised_suppression),
8180                                    MC_(read_extra_suppression_info),
8181                                    MC_(error_matches_suppression),
8182                                    MC_(get_error_name),
8183                                    MC_(get_extra_suppression_info),
8184                                    MC_(print_extra_suppression_use),
8185                                    MC_(update_extra_suppression_use));
8186    VG_(needs_libc_freeres)        ();
8187    VG_(needs_cxx_freeres)         ();
8188    VG_(needs_command_line_options)(mc_process_cmd_line_options,
8189                                    mc_print_usage,
8190                                    mc_print_debug_usage);
8191    VG_(needs_client_requests)     (mc_handle_client_request);
8192    VG_(needs_sanity_checks)       (mc_cheap_sanity_check,
8193                                    mc_expensive_sanity_check);
8194    VG_(needs_print_stats)         (mc_print_stats);
8195    VG_(needs_info_location)       (MC_(pp_describe_addr));
8196    VG_(needs_malloc_replacement)  (MC_(malloc),
8197                                    MC_(__builtin_new),
8198                                    MC_(__builtin_vec_new),
8199                                    MC_(memalign),
8200                                    MC_(calloc),
8201                                    MC_(free),
8202                                    MC_(__builtin_delete),
8203                                    MC_(__builtin_vec_delete),
8204                                    MC_(realloc),
8205                                    MC_(malloc_usable_size),
8206                                    MC_MALLOC_DEFAULT_REDZONE_SZB );
8207    MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
8208 
8209    VG_(needs_xml_output)          ();
8210 
8211    VG_(track_new_mem_startup)     ( mc_new_mem_startup );
8212 
8213    // Handling of mmap and mprotect isn't simple (well, it is simple,
8214    // but the justification isn't.)  See comments above, just prior to
8215    // mc_new_mem_mmap.
8216    VG_(track_new_mem_mmap)        ( mc_new_mem_mmap );
8217    VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
8218 
8219    VG_(track_copy_mem_remap)      ( MC_(copy_address_range_state) );
8220 
8221    VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
8222    VG_(track_die_mem_brk)         ( MC_(make_mem_noaccess) );
8223    VG_(track_die_mem_munmap)      ( MC_(make_mem_noaccess) );
8224 
8225    /* Defer the specification of the new_mem_stack functions to the
8226       post_clo_init function, since we need to first parse the command
8227       line before deciding which set to use. */
8228 
8229 #  ifdef PERF_FAST_STACK
8230    VG_(track_die_mem_stack_4)     ( mc_die_mem_stack_4   );
8231    VG_(track_die_mem_stack_8)     ( mc_die_mem_stack_8   );
8232    VG_(track_die_mem_stack_12)    ( mc_die_mem_stack_12  );
8233    VG_(track_die_mem_stack_16)    ( mc_die_mem_stack_16  );
8234    VG_(track_die_mem_stack_32)    ( mc_die_mem_stack_32  );
8235    VG_(track_die_mem_stack_112)   ( mc_die_mem_stack_112 );
8236    VG_(track_die_mem_stack_128)   ( mc_die_mem_stack_128 );
8237    VG_(track_die_mem_stack_144)   ( mc_die_mem_stack_144 );
8238    VG_(track_die_mem_stack_160)   ( mc_die_mem_stack_160 );
8239 #  endif
8240    VG_(track_die_mem_stack)       ( mc_die_mem_stack     );
8241 
8242    VG_(track_ban_mem_stack)       ( MC_(make_mem_noaccess) );
8243 
8244    VG_(track_pre_mem_read)        ( check_mem_is_defined );
8245    VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
8246    VG_(track_pre_mem_write)       ( check_mem_is_addressable );
8247    VG_(track_post_mem_write)      ( mc_post_mem_write );
8248 
8249    VG_(track_post_reg_write)                  ( mc_post_reg_write );
8250    VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
8251 
8252    if (MC_(clo_mc_level) >= 2) {
8253       VG_(track_copy_mem_to_reg)  ( mc_copy_mem_to_reg );
8254       VG_(track_copy_reg_to_mem)  ( mc_copy_reg_to_mem );
8255    }
8256 
8257    VG_(needs_watchpoint)          ( mc_mark_unaddressable_for_watchpoint );
8258 
8259    init_shadow_memory();
8260    // MC_(chunk_poolalloc) must be allocated in post_clo_init
8261    tl_assert(MC_(chunk_poolalloc) == NULL);
8262    MC_(malloc_list)  = VG_(HT_construct)( "MC_(malloc_list)" );
8263    MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
8264    init_prof_mem();
8265 
8266    tl_assert( mc_expensive_sanity_check() );
8267 
8268    // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
8269    tl_assert(sizeof(UWord) == sizeof(Addr));
8270    // Call me paranoid.  I don't care.
8271    tl_assert(sizeof(void*) == sizeof(Addr));
8272 
8273    // BYTES_PER_SEC_VBIT_NODE must be a power of two.
8274    tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
8275 
8276    /* This is small.  Always initialise it. */
8277    init_nia_to_ecu_cache();
8278 
8279    /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
8280       if we need to, since the command line args haven't been
8281       processed yet.  Hence defer it to mc_post_clo_init. */
8282    tl_assert(ocacheL1 == NULL);
8283    tl_assert(ocacheL2 == NULL);
8284 
8285    /* Check some important stuff.  See extensive comments above
8286       re UNALIGNED_OR_HIGH for background. */
8287 #  if VG_WORDSIZE == 4
8288    tl_assert(sizeof(void*) == 4);
8289    tl_assert(sizeof(Addr)  == 4);
8290    tl_assert(sizeof(UWord) == 4);
8291    tl_assert(sizeof(Word)  == 4);
8292    tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
8293    tl_assert(MASK(1) == 0UL);
8294    tl_assert(MASK(2) == 1UL);
8295    tl_assert(MASK(4) == 3UL);
8296    tl_assert(MASK(8) == 7UL);
8297 #  else
8298    tl_assert(VG_WORDSIZE == 8);
8299    tl_assert(sizeof(void*) == 8);
8300    tl_assert(sizeof(Addr)  == 8);
8301    tl_assert(sizeof(UWord) == 8);
8302    tl_assert(sizeof(Word)  == 8);
8303    tl_assert(MAX_PRIMARY_ADDRESS == 0x1FFFFFFFFFULL);
8304    tl_assert(MASK(1) == 0xFFFFFFE000000000ULL);
8305    tl_assert(MASK(2) == 0xFFFFFFE000000001ULL);
8306    tl_assert(MASK(4) == 0xFFFFFFE000000003ULL);
8307    tl_assert(MASK(8) == 0xFFFFFFE000000007ULL);
8308 #  endif
8309 
8310    /* Check some assertions to do with the instrumentation machinery. */
8311    MC_(do_instrumentation_startup_checks)();
8312 }
8313 
8314 STATIC_ASSERT(sizeof(UWord) == sizeof(SizeT));
8315 
8316 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
8317 
8318 /*--------------------------------------------------------------------*/
8319 /*--- end                                                mc_main.c ---*/
8320 /*--------------------------------------------------------------------*/
8321