1
2 /*--------------------------------------------------------------------*/
3 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
4 /*--- accessibility (A) and validity (V) status of each byte. ---*/
5 /*--- mc_main.c ---*/
6 /*--------------------------------------------------------------------*/
7
8 /*
9 This file is part of MemCheck, a heavyweight Valgrind tool for
10 detecting memory errors.
11
12 Copyright (C) 2000-2010 Julian Seward
13 jseward@acm.org
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31 */
32
33 #include "pub_tool_basics.h"
34 #include "pub_tool_aspacemgr.h"
35 #include "pub_tool_hashtable.h" // For mc_include.h
36 #include "pub_tool_libcbase.h"
37 #include "pub_tool_libcassert.h"
38 #include "pub_tool_libcprint.h"
39 #include "pub_tool_machine.h"
40 #include "pub_tool_mallocfree.h"
41 #include "pub_tool_options.h"
42 #include "pub_tool_oset.h"
43 #include "pub_tool_replacemalloc.h"
44 #include "pub_tool_tooliface.h"
45 #include "pub_tool_threadstate.h"
46
47 #include "mc_include.h"
48 #include "memcheck.h" /* for client requests */
49
50
51 /* Set to 1 to do a little more sanity checking */
52 #define VG_DEBUG_MEMORY 0
53
54 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
55
56 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
57 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
58
59
60 /*------------------------------------------------------------*/
61 /*--- Fast-case knobs ---*/
62 /*------------------------------------------------------------*/
63
64 // Comment these out to disable the fast cases (don't just set them to zero).
65
66 #define PERF_FAST_LOADV 1
67 #define PERF_FAST_STOREV 1
68
69 #define PERF_FAST_SARP 1
70
71 #define PERF_FAST_STACK 1
72 #define PERF_FAST_STACK2 1
73
74 /* Change this to 1 to enable assertions on origin tracking cache fast
75 paths */
76 #define OC_ENABLE_ASSERTIONS 0
77
78
79 /*------------------------------------------------------------*/
80 /*--- Comments on the origin tracking implementation ---*/
81 /*------------------------------------------------------------*/
82
83 /* See detailed comment entitled
84 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
85 which is contained further on in this file. */
86
87
88 /*------------------------------------------------------------*/
89 /*--- V bits and A bits ---*/
90 /*------------------------------------------------------------*/
91
92 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
93 thinks the corresponding value bit is defined. And every memory byte
94 has an A bit, which tracks whether Memcheck thinks the program can access
95 it safely (ie. it's mapped, and has at least one of the RWX permission bits
96 set). So every N-bit register is shadowed with N V bits, and every memory
97 byte is shadowed with 8 V bits and one A bit.
98
99 In the implementation, we use two forms of compression (compressed V bits
100 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
101 for memory.
102
103 Memcheck also tracks extra information about each heap block that is
104 allocated, for detecting memory leaks and other purposes.
105 */
106
107 /*------------------------------------------------------------*/
108 /*--- Basic A/V bitmap representation. ---*/
109 /*------------------------------------------------------------*/
110
111 /* All reads and writes are checked against a memory map (a.k.a. shadow
112 memory), which records the state of all memory in the process.
113
114 On 32-bit machines the memory map is organised as follows.
115 The top 16 bits of an address are used to index into a top-level
116 map table, containing 65536 entries. Each entry is a pointer to a
117 second-level map, which records the accesibililty and validity
118 permissions for the 65536 bytes indexed by the lower 16 bits of the
119 address. Each byte is represented by two bits (details are below). So
120 each second-level map contains 16384 bytes. This two-level arrangement
121 conveniently divides the 4G address space into 64k lumps, each size 64k
122 bytes.
123
124 All entries in the primary (top-level) map must point to a valid
125 secondary (second-level) map. Since many of the 64kB chunks will
126 have the same status for every bit -- ie. noaccess (for unused
127 address space) or entirely addressable and defined (for code segments) --
128 there are three distinguished secondary maps, which indicate 'noaccess',
129 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
130 map entry points to the relevant distinguished map. In practice,
131 typically more than half of the addressable memory is represented with
132 the 'undefined' or 'defined' distinguished secondary map, so it gives a
133 good saving. It also lets us set the V+A bits of large address regions
134 quickly in set_address_range_perms().
135
136 On 64-bit machines it's more complicated. If we followed the same basic
137 scheme we'd have a four-level table which would require too many memory
138 accesses. So instead the top-level map table has 2^19 entries (indexed
139 using bits 16..34 of the address); this covers the bottom 32GB. Any
140 accesses above 32GB are handled with a slow, sparse auxiliary table.
141 Valgrind's address space manager tries very hard to keep things below
142 this 32GB barrier so that performance doesn't suffer too much.
143
144 Note that this file has a lot of different functions for reading and
145 writing shadow memory. Only a couple are strictly necessary (eg.
146 get_vabits2 and set_vabits2), most are just specialised for specific
147 common cases to improve performance.
148
149 Aside: the V+A bits are less precise than they could be -- we have no way
150 of marking memory as read-only. It would be great if we could add an
151 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
152 which requires 2.3 bits to hold, and there's no way to do that elegantly
153 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
154 seem worth it.
155 */
156
157 /* --------------- Basic configuration --------------- */
158
159 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
160
161 #if VG_WORDSIZE == 4
162
163 /* cover the entire address space */
164 # define N_PRIMARY_BITS 16
165
166 #else
167
168 /* Just handle the first 256G fast and the rest via auxiliary
169 primaries. If you change this, Memcheck will assert at startup.
170 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
171 # define N_PRIMARY_BITS 22
172
173 #endif
174
175
176 /* Do not change this. */
177 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
178
179 /* Do not change this. */
180 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
181
182
183 /* --------------- Secondary maps --------------- */
184
185 // Each byte of memory conceptually has an A bit, which indicates its
186 // addressability, and 8 V bits, which indicates its definedness.
187 //
188 // But because very few bytes are partially defined, we can use a nice
189 // compression scheme to reduce the size of shadow memory. Each byte of
190 // memory has 2 bits which indicates its state (ie. V+A bits):
191 //
192 // 00: noaccess (unaddressable but treated as fully defined)
193 // 01: undefined (addressable and fully undefined)
194 // 10: defined (addressable and fully defined)
195 // 11: partdefined (addressable and partially defined)
196 //
197 // In the "partdefined" case, we use a secondary table to store the V bits.
198 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
199 // bits.
200 //
201 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
202 // four bytes (32 bits) of memory are in each chunk. Hence the name
203 // "vabits8". This lets us get the V+A bits for four bytes at a time
204 // easily (without having to do any shifting and/or masking), and that is a
205 // very common operation. (Note that although each vabits8 chunk
206 // is 8 bits in size, it represents 32 bits of memory.)
207 //
208 // The representation is "inverse" little-endian... each 4 bytes of
209 // memory is represented by a 1 byte value, where:
210 //
211 // - the status of byte (a+0) is held in bits [1..0]
212 // - the status of byte (a+1) is held in bits [3..2]
213 // - the status of byte (a+2) is held in bits [5..4]
214 // - the status of byte (a+3) is held in bits [7..6]
215 //
216 // It's "inverse" because endianness normally describes a mapping from
217 // value bits to memory addresses; in this case the mapping is inverted.
218 // Ie. instead of particular value bits being held in certain addresses, in
219 // this case certain addresses are represented by particular value bits.
220 // See insert_vabits2_into_vabits8() for an example.
221 //
222 // But note that we don't compress the V bits stored in registers; they
223 // need to be explicit to made the shadow operations possible. Therefore
224 // when moving values between registers and memory we need to convert
225 // between the expanded in-register format and the compressed in-memory
226 // format. This isn't so difficult, it just requires careful attention in a
227 // few places.
228
229 // These represent eight bits of memory.
230 #define VA_BITS2_NOACCESS 0x0 // 00b
231 #define VA_BITS2_UNDEFINED 0x1 // 01b
232 #define VA_BITS2_DEFINED 0x2 // 10b
233 #define VA_BITS2_PARTDEFINED 0x3 // 11b
234
235 // These represent 16 bits of memory.
236 #define VA_BITS4_NOACCESS 0x0 // 00_00b
237 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
238 #define VA_BITS4_DEFINED 0xa // 10_10b
239
240 // These represent 32 bits of memory.
241 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
242 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
243 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
244
245 // These represent 64 bits of memory.
246 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
247 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
248 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
249
250
251 #define SM_CHUNKS 16384
252 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
253 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
254
255 // Paranoia: it's critical for performance that the requested inlining
256 // occurs. So try extra hard.
257 #define INLINE inline __attribute__((always_inline))
258
start_of_this_sm(Addr a)259 static INLINE Addr start_of_this_sm ( Addr a ) {
260 return (a & (~SM_MASK));
261 }
is_start_of_sm(Addr a)262 static INLINE Bool is_start_of_sm ( Addr a ) {
263 return (start_of_this_sm(a) == a);
264 }
265
266 typedef
267 struct {
268 UChar vabits8[SM_CHUNKS];
269 }
270 SecMap;
271
272 // 3 distinguished secondary maps, one for no-access, one for
273 // accessible but undefined, and one for accessible and defined.
274 // Distinguished secondaries may never be modified.
275 #define SM_DIST_NOACCESS 0
276 #define SM_DIST_UNDEFINED 1
277 #define SM_DIST_DEFINED 2
278
279 static SecMap sm_distinguished[3];
280
is_distinguished_sm(SecMap * sm)281 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
282 return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
283 }
284
285 // Forward declaration
286 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
287
288 /* dist_sm points to one of our three distinguished secondaries. Make
289 a copy of it so that we can write to it.
290 */
copy_for_writing(SecMap * dist_sm)291 static SecMap* copy_for_writing ( SecMap* dist_sm )
292 {
293 SecMap* new_sm;
294 tl_assert(dist_sm == &sm_distinguished[0]
295 || dist_sm == &sm_distinguished[1]
296 || dist_sm == &sm_distinguished[2]);
297
298 new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
299 if (new_sm == NULL)
300 VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
301 sizeof(SecMap) );
302 VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
303 update_SM_counts(dist_sm, new_sm);
304 return new_sm;
305 }
306
307 /* --------------- Stats --------------- */
308
309 static Int n_issued_SMs = 0;
310 static Int n_deissued_SMs = 0;
311 static Int n_noaccess_SMs = N_PRIMARY_MAP; // start with many noaccess DSMs
312 static Int n_undefined_SMs = 0;
313 static Int n_defined_SMs = 0;
314 static Int n_non_DSM_SMs = 0;
315 static Int max_noaccess_SMs = 0;
316 static Int max_undefined_SMs = 0;
317 static Int max_defined_SMs = 0;
318 static Int max_non_DSM_SMs = 0;
319
320 /* # searches initiated in auxmap_L1, and # base cmps required */
321 static ULong n_auxmap_L1_searches = 0;
322 static ULong n_auxmap_L1_cmps = 0;
323 /* # of searches that missed in auxmap_L1 and therefore had to
324 be handed to auxmap_L2. And the number of nodes inserted. */
325 static ULong n_auxmap_L2_searches = 0;
326 static ULong n_auxmap_L2_nodes = 0;
327
328 static Int n_sanity_cheap = 0;
329 static Int n_sanity_expensive = 0;
330
331 static Int n_secVBit_nodes = 0;
332 static Int max_secVBit_nodes = 0;
333
update_SM_counts(SecMap * oldSM,SecMap * newSM)334 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
335 {
336 if (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
337 else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
338 else if (oldSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs --;
339 else { n_non_DSM_SMs --;
340 n_deissued_SMs ++; }
341
342 if (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
343 else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
344 else if (newSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs ++;
345 else { n_non_DSM_SMs ++;
346 n_issued_SMs ++; }
347
348 if (n_noaccess_SMs > max_noaccess_SMs ) max_noaccess_SMs = n_noaccess_SMs;
349 if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
350 if (n_defined_SMs > max_defined_SMs ) max_defined_SMs = n_defined_SMs;
351 if (n_non_DSM_SMs > max_non_DSM_SMs ) max_non_DSM_SMs = n_non_DSM_SMs;
352 }
353
354 /* --------------- Primary maps --------------- */
355
356 /* The main primary map. This covers some initial part of the address
357 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
358 handled using the auxiliary primary map.
359 */
360 static SecMap* primary_map[N_PRIMARY_MAP];
361
362
363 /* An entry in the auxiliary primary map. base must be a 64k-aligned
364 value, and sm points at the relevant secondary map. As with the
365 main primary map, the secondary may be either a real secondary, or
366 one of the three distinguished secondaries. DO NOT CHANGE THIS
367 LAYOUT: the first word has to be the key for OSet fast lookups.
368 */
369 typedef
370 struct {
371 Addr base;
372 SecMap* sm;
373 }
374 AuxMapEnt;
375
376 /* Tunable parameter: How big is the L1 queue? */
377 #define N_AUXMAP_L1 24
378
379 /* Tunable parameter: How far along the L1 queue to insert
380 entries resulting from L2 lookups? */
381 #define AUXMAP_L1_INSERT_IX 12
382
383 static struct {
384 Addr base;
385 AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
386 }
387 auxmap_L1[N_AUXMAP_L1];
388
389 static OSet* auxmap_L2 = NULL;
390
init_auxmap_L1_L2(void)391 static void init_auxmap_L1_L2 ( void )
392 {
393 Int i;
394 for (i = 0; i < N_AUXMAP_L1; i++) {
395 auxmap_L1[i].base = 0;
396 auxmap_L1[i].ent = NULL;
397 }
398
399 tl_assert(0 == offsetof(AuxMapEnt,base));
400 tl_assert(sizeof(Addr) == sizeof(void*));
401 auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/ offsetof(AuxMapEnt,base),
402 /*fastCmp*/ NULL,
403 VG_(malloc), "mc.iaLL.1", VG_(free) );
404 }
405
406 /* Check representation invariants; if OK return NULL; else a
407 descriptive bit of text. Also return the number of
408 non-distinguished secondary maps referred to from the auxiliary
409 primary maps. */
410
check_auxmap_L1_L2_sanity(Word * n_secmaps_found)411 static HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
412 {
413 Word i, j;
414 /* On a 32-bit platform, the L2 and L1 tables should
415 both remain empty forever.
416
417 On a 64-bit platform:
418 In the L2 table:
419 all .base & 0xFFFF == 0
420 all .base > MAX_PRIMARY_ADDRESS
421 In the L1 table:
422 all .base & 0xFFFF == 0
423 all (.base > MAX_PRIMARY_ADDRESS
424 .base & 0xFFFF == 0
425 and .ent points to an AuxMapEnt with the same .base)
426 or
427 (.base == 0 and .ent == NULL)
428 */
429 *n_secmaps_found = 0;
430 if (sizeof(void*) == 4) {
431 /* 32-bit platform */
432 if (VG_(OSetGen_Size)(auxmap_L2) != 0)
433 return "32-bit: auxmap_L2 is non-empty";
434 for (i = 0; i < N_AUXMAP_L1; i++)
435 if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
436 return "32-bit: auxmap_L1 is non-empty";
437 } else {
438 /* 64-bit platform */
439 UWord elems_seen = 0;
440 AuxMapEnt *elem, *res;
441 AuxMapEnt key;
442 /* L2 table */
443 VG_(OSetGen_ResetIter)(auxmap_L2);
444 while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
445 elems_seen++;
446 if (0 != (elem->base & (Addr)0xFFFF))
447 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
448 if (elem->base <= MAX_PRIMARY_ADDRESS)
449 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
450 if (elem->sm == NULL)
451 return "64-bit: .sm in _L2 is NULL";
452 if (!is_distinguished_sm(elem->sm))
453 (*n_secmaps_found)++;
454 }
455 if (elems_seen != n_auxmap_L2_nodes)
456 return "64-bit: disagreement on number of elems in _L2";
457 /* Check L1-L2 correspondence */
458 for (i = 0; i < N_AUXMAP_L1; i++) {
459 if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
460 continue;
461 if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
462 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
463 if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
464 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
465 if (auxmap_L1[i].ent == NULL)
466 return "64-bit: .ent is NULL in auxmap_L1";
467 if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
468 return "64-bit: _L1 and _L2 bases are inconsistent";
469 /* Look it up in auxmap_L2. */
470 key.base = auxmap_L1[i].base;
471 key.sm = 0;
472 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
473 if (res == NULL)
474 return "64-bit: _L1 .base not found in _L2";
475 if (res != auxmap_L1[i].ent)
476 return "64-bit: _L1 .ent disagrees with _L2 entry";
477 }
478 /* Check L1 contains no duplicates */
479 for (i = 0; i < N_AUXMAP_L1; i++) {
480 if (auxmap_L1[i].base == 0)
481 continue;
482 for (j = i+1; j < N_AUXMAP_L1; j++) {
483 if (auxmap_L1[j].base == 0)
484 continue;
485 if (auxmap_L1[j].base == auxmap_L1[i].base)
486 return "64-bit: duplicate _L1 .base entries";
487 }
488 }
489 }
490 return NULL; /* ok */
491 }
492
insert_into_auxmap_L1_at(Word rank,AuxMapEnt * ent)493 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
494 {
495 Word i;
496 tl_assert(ent);
497 tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
498 for (i = N_AUXMAP_L1-1; i > rank; i--)
499 auxmap_L1[i] = auxmap_L1[i-1];
500 auxmap_L1[rank].base = ent->base;
501 auxmap_L1[rank].ent = ent;
502 }
503
maybe_find_in_auxmap(Addr a)504 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
505 {
506 AuxMapEnt key;
507 AuxMapEnt* res;
508 Word i;
509
510 tl_assert(a > MAX_PRIMARY_ADDRESS);
511 a &= ~(Addr)0xFFFF;
512
513 /* First search the front-cache, which is a self-organising
514 list containing the most popular entries. */
515
516 if (LIKELY(auxmap_L1[0].base == a))
517 return auxmap_L1[0].ent;
518 if (LIKELY(auxmap_L1[1].base == a)) {
519 Addr t_base = auxmap_L1[0].base;
520 AuxMapEnt* t_ent = auxmap_L1[0].ent;
521 auxmap_L1[0].base = auxmap_L1[1].base;
522 auxmap_L1[0].ent = auxmap_L1[1].ent;
523 auxmap_L1[1].base = t_base;
524 auxmap_L1[1].ent = t_ent;
525 return auxmap_L1[0].ent;
526 }
527
528 n_auxmap_L1_searches++;
529
530 for (i = 0; i < N_AUXMAP_L1; i++) {
531 if (auxmap_L1[i].base == a) {
532 break;
533 }
534 }
535 tl_assert(i >= 0 && i <= N_AUXMAP_L1);
536
537 n_auxmap_L1_cmps += (ULong)(i+1);
538
539 if (i < N_AUXMAP_L1) {
540 if (i > 0) {
541 Addr t_base = auxmap_L1[i-1].base;
542 AuxMapEnt* t_ent = auxmap_L1[i-1].ent;
543 auxmap_L1[i-1].base = auxmap_L1[i-0].base;
544 auxmap_L1[i-1].ent = auxmap_L1[i-0].ent;
545 auxmap_L1[i-0].base = t_base;
546 auxmap_L1[i-0].ent = t_ent;
547 i--;
548 }
549 return auxmap_L1[i].ent;
550 }
551
552 n_auxmap_L2_searches++;
553
554 /* First see if we already have it. */
555 key.base = a;
556 key.sm = 0;
557
558 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
559 if (res)
560 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
561 return res;
562 }
563
find_or_alloc_in_auxmap(Addr a)564 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
565 {
566 AuxMapEnt *nyu, *res;
567
568 /* First see if we already have it. */
569 res = maybe_find_in_auxmap( a );
570 if (LIKELY(res))
571 return res;
572
573 /* Ok, there's no entry in the secondary map, so we'll have
574 to allocate one. */
575 a &= ~(Addr)0xFFFF;
576
577 nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
578 tl_assert(nyu);
579 nyu->base = a;
580 nyu->sm = &sm_distinguished[SM_DIST_NOACCESS];
581 VG_(OSetGen_Insert)( auxmap_L2, nyu );
582 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
583 n_auxmap_L2_nodes++;
584 return nyu;
585 }
586
587 /* --------------- SecMap fundamentals --------------- */
588
589 // In all these, 'low' means it's definitely in the main primary map,
590 // 'high' means it's definitely in the auxiliary table.
591
get_secmap_low_ptr(Addr a)592 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
593 {
594 UWord pm_off = a >> 16;
595 # if VG_DEBUG_MEMORY >= 1
596 tl_assert(pm_off < N_PRIMARY_MAP);
597 # endif
598 return &primary_map[ pm_off ];
599 }
600
get_secmap_high_ptr(Addr a)601 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
602 {
603 AuxMapEnt* am = find_or_alloc_in_auxmap(a);
604 return &am->sm;
605 }
606
get_secmap_ptr(Addr a)607 static SecMap** get_secmap_ptr ( Addr a )
608 {
609 return ( a <= MAX_PRIMARY_ADDRESS
610 ? get_secmap_low_ptr(a)
611 : get_secmap_high_ptr(a));
612 }
613
get_secmap_for_reading_low(Addr a)614 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
615 {
616 return *get_secmap_low_ptr(a);
617 }
618
get_secmap_for_reading_high(Addr a)619 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
620 {
621 return *get_secmap_high_ptr(a);
622 }
623
get_secmap_for_writing_low(Addr a)624 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
625 {
626 SecMap** p = get_secmap_low_ptr(a);
627 if (UNLIKELY(is_distinguished_sm(*p)))
628 *p = copy_for_writing(*p);
629 return *p;
630 }
631
get_secmap_for_writing_high(Addr a)632 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
633 {
634 SecMap** p = get_secmap_high_ptr(a);
635 if (UNLIKELY(is_distinguished_sm(*p)))
636 *p = copy_for_writing(*p);
637 return *p;
638 }
639
640 /* Produce the secmap for 'a', either from the primary map or by
641 ensuring there is an entry for it in the aux primary map. The
642 secmap may be a distinguished one as the caller will only want to
643 be able to read it.
644 */
get_secmap_for_reading(Addr a)645 static INLINE SecMap* get_secmap_for_reading ( Addr a )
646 {
647 return ( a <= MAX_PRIMARY_ADDRESS
648 ? get_secmap_for_reading_low (a)
649 : get_secmap_for_reading_high(a) );
650 }
651
652 /* Produce the secmap for 'a', either from the primary map or by
653 ensuring there is an entry for it in the aux primary map. The
654 secmap may not be a distinguished one, since the caller will want
655 to be able to write it. If it is a distinguished secondary, make a
656 writable copy of it, install it, and return the copy instead. (COW
657 semantics).
658 */
get_secmap_for_writing(Addr a)659 static SecMap* get_secmap_for_writing ( Addr a )
660 {
661 return ( a <= MAX_PRIMARY_ADDRESS
662 ? get_secmap_for_writing_low (a)
663 : get_secmap_for_writing_high(a) );
664 }
665
666 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
667 allocate one if one doesn't already exist. This is used by the
668 leak checker.
669 */
maybe_get_secmap_for(Addr a)670 static SecMap* maybe_get_secmap_for ( Addr a )
671 {
672 if (a <= MAX_PRIMARY_ADDRESS) {
673 return get_secmap_for_reading_low(a);
674 } else {
675 AuxMapEnt* am = maybe_find_in_auxmap(a);
676 return am ? am->sm : NULL;
677 }
678 }
679
680 /* --------------- Fundamental functions --------------- */
681
682 static INLINE
insert_vabits2_into_vabits8(Addr a,UChar vabits2,UChar * vabits8)683 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
684 {
685 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
686 *vabits8 &= ~(0x3 << shift); // mask out the two old bits
687 *vabits8 |= (vabits2 << shift); // mask in the two new bits
688 }
689
690 static INLINE
insert_vabits4_into_vabits8(Addr a,UChar vabits4,UChar * vabits8)691 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
692 {
693 UInt shift;
694 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
695 shift = (a & 2) << 1; // shift by 0 or 4
696 *vabits8 &= ~(0xf << shift); // mask out the four old bits
697 *vabits8 |= (vabits4 << shift); // mask in the four new bits
698 }
699
700 static INLINE
extract_vabits2_from_vabits8(Addr a,UChar vabits8)701 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
702 {
703 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
704 vabits8 >>= shift; // shift the two bits to the bottom
705 return 0x3 & vabits8; // mask out the rest
706 }
707
708 static INLINE
extract_vabits4_from_vabits8(Addr a,UChar vabits8)709 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
710 {
711 UInt shift;
712 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
713 shift = (a & 2) << 1; // shift by 0 or 4
714 vabits8 >>= shift; // shift the four bits to the bottom
715 return 0xf & vabits8; // mask out the rest
716 }
717
718 // Note that these four are only used in slow cases. The fast cases do
719 // clever things like combine the auxmap check (in
720 // get_secmap_{read,writ}able) with alignment checks.
721
722 // *** WARNING! ***
723 // Any time this function is called, if it is possible that vabits2
724 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
725 // sec-V-bits table must also be set!
726 static INLINE
set_vabits2(Addr a,UChar vabits2)727 void set_vabits2 ( Addr a, UChar vabits2 )
728 {
729 SecMap* sm = get_secmap_for_writing(a);
730 UWord sm_off = SM_OFF(a);
731 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
732 }
733
734 static INLINE
get_vabits2(Addr a)735 UChar get_vabits2 ( Addr a )
736 {
737 SecMap* sm = get_secmap_for_reading(a);
738 UWord sm_off = SM_OFF(a);
739 UChar vabits8 = sm->vabits8[sm_off];
740 return extract_vabits2_from_vabits8(a, vabits8);
741 }
742
743 // *** WARNING! ***
744 // Any time this function is called, if it is possible that any of the
745 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
746 // corresponding entry(s) in the sec-V-bits table must also be set!
747 static INLINE
get_vabits8_for_aligned_word32(Addr a)748 UChar get_vabits8_for_aligned_word32 ( Addr a )
749 {
750 SecMap* sm = get_secmap_for_reading(a);
751 UWord sm_off = SM_OFF(a);
752 UChar vabits8 = sm->vabits8[sm_off];
753 return vabits8;
754 }
755
756 static INLINE
set_vabits8_for_aligned_word32(Addr a,UChar vabits8)757 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
758 {
759 SecMap* sm = get_secmap_for_writing(a);
760 UWord sm_off = SM_OFF(a);
761 sm->vabits8[sm_off] = vabits8;
762 }
763
764
765 // Forward declarations
766 static UWord get_sec_vbits8(Addr a);
767 static void set_sec_vbits8(Addr a, UWord vbits8);
768
769 // Returns False if there was an addressability error.
770 static INLINE
set_vbits8(Addr a,UChar vbits8)771 Bool set_vbits8 ( Addr a, UChar vbits8 )
772 {
773 Bool ok = True;
774 UChar vabits2 = get_vabits2(a);
775 if ( VA_BITS2_NOACCESS != vabits2 ) {
776 // Addressable. Convert in-register format to in-memory format.
777 // Also remove any existing sec V bit entry for the byte if no
778 // longer necessary.
779 if ( V_BITS8_DEFINED == vbits8 ) { vabits2 = VA_BITS2_DEFINED; }
780 else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
781 else { vabits2 = VA_BITS2_PARTDEFINED;
782 set_sec_vbits8(a, vbits8); }
783 set_vabits2(a, vabits2);
784
785 } else {
786 // Unaddressable! Do nothing -- when writing to unaddressable
787 // memory it acts as a black hole, and the V bits can never be seen
788 // again. So we don't have to write them at all.
789 ok = False;
790 }
791 return ok;
792 }
793
794 // Returns False if there was an addressability error. In that case, we put
795 // all defined bits into vbits8.
796 static INLINE
get_vbits8(Addr a,UChar * vbits8)797 Bool get_vbits8 ( Addr a, UChar* vbits8 )
798 {
799 Bool ok = True;
800 UChar vabits2 = get_vabits2(a);
801
802 // Convert the in-memory format to in-register format.
803 if ( VA_BITS2_DEFINED == vabits2 ) { *vbits8 = V_BITS8_DEFINED; }
804 else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
805 else if ( VA_BITS2_NOACCESS == vabits2 ) {
806 *vbits8 = V_BITS8_DEFINED; // Make V bits defined!
807 ok = False;
808 } else {
809 tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
810 *vbits8 = get_sec_vbits8(a);
811 }
812 return ok;
813 }
814
815
816 /* --------------- Secondary V bit table ------------ */
817
818 // This table holds the full V bit pattern for partially-defined bytes
819 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
820 // memory.
821 //
822 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
823 // then overwrite the same address with a fully defined byte, the sec-V-bit
824 // node will not necessarily be removed. This is because checking for
825 // whether removal is necessary would slow down the fast paths.
826 //
827 // To avoid the stale nodes building up too much, we periodically (once the
828 // table reaches a certain size) garbage collect (GC) the table by
829 // traversing it and evicting any "sufficiently stale" nodes, ie. nodes that
830 // are stale and haven't been touched for a certain number of collections.
831 // If more than a certain proportion of nodes survived, we increase the
832 // table size so that GCs occur less often.
833 //
834 // (So this a bit different to a traditional GC, where you definitely want
835 // to remove any dead nodes. It's more like we have a resizable cache and
836 // we're trying to find the right balance how many elements to evict and how
837 // big to make the cache.)
838 //
839 // This policy is designed to avoid bad table bloat in the worst case where
840 // a program creates huge numbers of stale PDBs -- we would get this bloat
841 // if we had no GC -- while handling well the case where a node becomes
842 // stale but shortly afterwards is rewritten with a PDB and so becomes
843 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
844 // remove all stale nodes as soon as possible, we just end up re-adding a
845 // lot of them in later again. The "sufficiently stale" approach avoids
846 // this. (If a program has many live PDBs, performance will just suck,
847 // there's no way around that.)
848
849 static OSet* secVBitTable;
850
851 // Stats
852 static ULong sec_vbits_new_nodes = 0;
853 static ULong sec_vbits_updates = 0;
854
855 // This must be a power of two; this is checked in mc_pre_clo_init().
856 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
857 // a larger address range) they take more space but we can get multiple
858 // partially-defined bytes in one if they are close to each other, reducing
859 // the number of total nodes. In practice sometimes they are clustered (eg.
860 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
861 // row), but often not. So we choose something intermediate.
862 #define BYTES_PER_SEC_VBIT_NODE 16
863
864 // We make the table bigger if more than this many nodes survive a GC.
865 #define MAX_SURVIVOR_PROPORTION 0.5
866
867 // Each time we make the table bigger, we increase it by this much.
868 #define TABLE_GROWTH_FACTOR 2
869
870 // This defines "sufficiently stale" -- any node that hasn't been touched in
871 // this many GCs will be removed.
872 #define MAX_STALE_AGE 2
873
874 // We GC the table when it gets this many nodes in it, ie. it's effectively
875 // the table size. It can change.
876 static Int secVBitLimit = 1024;
877
878 // The number of GCs done, used to age sec-V-bit nodes for eviction.
879 // Because it's unsigned, wrapping doesn't matter -- the right answer will
880 // come out anyway.
881 static UInt GCs_done = 0;
882
883 typedef
884 struct {
885 Addr a;
886 UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
887 UInt last_touched;
888 }
889 SecVBitNode;
890
createSecVBitTable(void)891 static OSet* createSecVBitTable(void)
892 {
893 return VG_(OSetGen_Create)( offsetof(SecVBitNode, a),
894 NULL, // use fast comparisons
895 VG_(malloc), "mc.cSVT.1 (sec VBit table)",
896 VG_(free) );
897 }
898
gcSecVBitTable(void)899 static void gcSecVBitTable(void)
900 {
901 OSet* secVBitTable2;
902 SecVBitNode* n;
903 Int i, n_nodes = 0, n_survivors = 0;
904
905 GCs_done++;
906
907 // Create the new table.
908 secVBitTable2 = createSecVBitTable();
909
910 // Traverse the table, moving fresh nodes into the new table.
911 VG_(OSetGen_ResetIter)(secVBitTable);
912 while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
913 Bool keep = False;
914 if ( (GCs_done - n->last_touched) <= MAX_STALE_AGE ) {
915 // Keep node if it's been touched recently enough (regardless of
916 // freshness/staleness).
917 keep = True;
918 } else {
919 // Keep node if any of its bytes are non-stale. Using
920 // get_vabits2() for the lookup is not very efficient, but I don't
921 // think it matters.
922 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
923 if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
924 keep = True; // Found a non-stale byte, so keep
925 break;
926 }
927 }
928 }
929
930 if ( keep ) {
931 // Insert a copy of the node into the new table.
932 SecVBitNode* n2 =
933 VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
934 *n2 = *n;
935 VG_(OSetGen_Insert)(secVBitTable2, n2);
936 }
937 }
938
939 // Get the before and after sizes.
940 n_nodes = VG_(OSetGen_Size)(secVBitTable);
941 n_survivors = VG_(OSetGen_Size)(secVBitTable2);
942
943 // Destroy the old table, and put the new one in its place.
944 VG_(OSetGen_Destroy)(secVBitTable);
945 secVBitTable = secVBitTable2;
946
947 if (VG_(clo_verbosity) > 1) {
948 Char percbuf[6];
949 VG_(percentify)(n_survivors, n_nodes, 1, 6, percbuf);
950 VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%s)\n",
951 n_nodes, n_survivors, percbuf);
952 }
953
954 // Increase table size if necessary.
955 if (n_survivors > (secVBitLimit * MAX_SURVIVOR_PROPORTION)) {
956 secVBitLimit *= TABLE_GROWTH_FACTOR;
957 if (VG_(clo_verbosity) > 1)
958 VG_(message)(Vg_DebugMsg, "memcheck GC: increase table size to %d\n",
959 secVBitLimit);
960 }
961 }
962
get_sec_vbits8(Addr a)963 static UWord get_sec_vbits8(Addr a)
964 {
965 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
966 Int amod = a % BYTES_PER_SEC_VBIT_NODE;
967 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
968 UChar vbits8;
969 tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
970 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
971 // make it to the secondary V bits table.
972 vbits8 = n->vbits8[amod];
973 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
974 return vbits8;
975 }
976
set_sec_vbits8(Addr a,UWord vbits8)977 static void set_sec_vbits8(Addr a, UWord vbits8)
978 {
979 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
980 Int i, amod = a % BYTES_PER_SEC_VBIT_NODE;
981 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
982 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
983 // make it to the secondary V bits table.
984 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
985 if (n) {
986 n->vbits8[amod] = vbits8; // update
987 n->last_touched = GCs_done;
988 sec_vbits_updates++;
989 } else {
990 // New node: assign the specific byte, make the rest invalid (they
991 // should never be read as-is, but be cautious).
992 n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
993 n->a = aAligned;
994 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
995 n->vbits8[i] = V_BITS8_UNDEFINED;
996 }
997 n->vbits8[amod] = vbits8;
998 n->last_touched = GCs_done;
999
1000 // Do a table GC if necessary. Nb: do this before inserting the new
1001 // node, to avoid erroneously GC'ing the new node.
1002 if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1003 gcSecVBitTable();
1004 }
1005
1006 // Insert the new node.
1007 VG_(OSetGen_Insert)(secVBitTable, n);
1008 sec_vbits_new_nodes++;
1009
1010 n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1011 if (n_secVBit_nodes > max_secVBit_nodes)
1012 max_secVBit_nodes = n_secVBit_nodes;
1013 }
1014 }
1015
1016 /* --------------- Endianness helpers --------------- */
1017
1018 /* Returns the offset in memory of the byteno-th most significant byte
1019 in a wordszB-sized word, given the specified endianness. */
byte_offset_w(UWord wordszB,Bool bigendian,UWord byteno)1020 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1021 UWord byteno ) {
1022 return bigendian ? (wordszB-1-byteno) : byteno;
1023 }
1024
1025
1026 /* --------------- Ignored address ranges --------------- */
1027
1028 #define M_IGNORE_RANGES 4
1029
1030 typedef
1031 struct {
1032 Int used;
1033 Addr start[M_IGNORE_RANGES];
1034 Addr end[M_IGNORE_RANGES];
1035 }
1036 IgnoreRanges;
1037
1038 static IgnoreRanges ignoreRanges;
1039
MC_(in_ignored_range)1040 INLINE Bool MC_(in_ignored_range) ( Addr a )
1041 {
1042 Int i;
1043 if (LIKELY(ignoreRanges.used == 0))
1044 return False;
1045 for (i = 0; i < ignoreRanges.used; i++) {
1046 if (a >= ignoreRanges.start[i] && a < ignoreRanges.end[i])
1047 return True;
1048 }
1049 return False;
1050 }
1051
1052
1053 /* Parse a 32- or 64-bit hex number, including leading 0x, from string
1054 starting at *ppc, putting result in *result, and return True. Or
1055 fail, in which case *ppc and *result are undefined, and return
1056 False. */
1057
isHex(UChar c)1058 static Bool isHex ( UChar c )
1059 {
1060 return ((c >= '0' && c <= '9') ||
1061 (c >= 'a' && c <= 'f') ||
1062 (c >= 'A' && c <= 'F'));
1063 }
1064
fromHex(UChar c)1065 static UInt fromHex ( UChar c )
1066 {
1067 if (c >= '0' && c <= '9')
1068 return (UInt)c - (UInt)'0';
1069 if (c >= 'a' && c <= 'f')
1070 return 10 + (UInt)c - (UInt)'a';
1071 if (c >= 'A' && c <= 'F')
1072 return 10 + (UInt)c - (UInt)'A';
1073 /*NOTREACHED*/
1074 tl_assert(0);
1075 return 0;
1076 }
1077
parse_Addr(UChar ** ppc,Addr * result)1078 static Bool parse_Addr ( UChar** ppc, Addr* result )
1079 {
1080 Int used, limit = 2 * sizeof(Addr);
1081 if (**ppc != '0')
1082 return False;
1083 (*ppc)++;
1084 if (**ppc != 'x')
1085 return False;
1086 (*ppc)++;
1087 *result = 0;
1088 used = 0;
1089 while (isHex(**ppc)) {
1090 UInt d = fromHex(**ppc);
1091 tl_assert(d < 16);
1092 *result = ((*result) << 4) | fromHex(**ppc);
1093 (*ppc)++;
1094 used++;
1095 if (used > limit) return False;
1096 }
1097 if (used == 0)
1098 return False;
1099 return True;
1100 }
1101
1102 /* Parse two such numbers separated by a dash, or fail. */
1103
parse_range(UChar ** ppc,Addr * result1,Addr * result2)1104 static Bool parse_range ( UChar** ppc, Addr* result1, Addr* result2 )
1105 {
1106 Bool ok = parse_Addr(ppc, result1);
1107 if (!ok)
1108 return False;
1109 if (**ppc != '-')
1110 return False;
1111 (*ppc)++;
1112 ok = parse_Addr(ppc, result2);
1113 if (!ok)
1114 return False;
1115 return True;
1116 }
1117
1118 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1119 fail. */
1120
parse_ignore_ranges(UChar * str0)1121 static Bool parse_ignore_ranges ( UChar* str0 )
1122 {
1123 Addr start, end;
1124 Bool ok;
1125 UChar* str = str0;
1126 UChar** ppc = &str;
1127 ignoreRanges.used = 0;
1128 while (1) {
1129 ok = parse_range(ppc, &start, &end);
1130 if (!ok)
1131 return False;
1132 if (ignoreRanges.used >= M_IGNORE_RANGES)
1133 return False;
1134 ignoreRanges.start[ignoreRanges.used] = start;
1135 ignoreRanges.end[ignoreRanges.used] = end;
1136 ignoreRanges.used++;
1137 if (**ppc == 0)
1138 return True;
1139 if (**ppc != ',')
1140 return False;
1141 (*ppc)++;
1142 }
1143 /*NOTREACHED*/
1144 return False;
1145 }
1146
1147
1148 /* --------------- Load/store slow cases. --------------- */
1149
1150 static
1151 #ifndef PERF_FAST_LOADV
1152 INLINE
1153 #endif
mc_LOADVn_slow(Addr a,SizeT nBits,Bool bigendian)1154 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1155 {
1156 /* Make up a 64-bit result V word, which contains the loaded data for
1157 valid addresses and Defined for invalid addresses. Iterate over
1158 the bytes in the word, from the most significant down to the
1159 least. */
1160 ULong vbits64 = V_BITS64_UNDEFINED;
1161 SizeT szB = nBits / 8;
1162 SSizeT i; // Must be signed.
1163 SizeT n_addrs_bad = 0;
1164 Addr ai;
1165 Bool partial_load_exemption_applies;
1166 UChar vbits8;
1167 Bool ok;
1168
1169 PROF_EVENT(30, "mc_LOADVn_slow");
1170
1171 /* ------------ BEGIN semi-fast cases ------------ */
1172 /* These deal quickly-ish with the common auxiliary primary map
1173 cases on 64-bit platforms. Are merely a speedup hack; can be
1174 omitted without loss of correctness/functionality. Note that in
1175 both cases the "sizeof(void*) == 8" causes these cases to be
1176 folded out by compilers on 32-bit platforms. These are derived
1177 from LOADV64 and LOADV32.
1178 */
1179 if (LIKELY(sizeof(void*) == 8
1180 && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1181 SecMap* sm = get_secmap_for_reading(a);
1182 UWord sm_off16 = SM_OFF_16(a);
1183 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1184 if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1185 return V_BITS64_DEFINED;
1186 if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1187 return V_BITS64_UNDEFINED;
1188 /* else fall into the slow case */
1189 }
1190 if (LIKELY(sizeof(void*) == 8
1191 && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1192 SecMap* sm = get_secmap_for_reading(a);
1193 UWord sm_off = SM_OFF(a);
1194 UWord vabits8 = sm->vabits8[sm_off];
1195 if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1196 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1197 if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1198 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1199 /* else fall into slow case */
1200 }
1201 /* ------------ END semi-fast cases ------------ */
1202
1203 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1204
1205 for (i = szB-1; i >= 0; i--) {
1206 PROF_EVENT(31, "mc_LOADVn_slow(loop)");
1207 ai = a + byte_offset_w(szB, bigendian, i);
1208 ok = get_vbits8(ai, &vbits8);
1209 if (!ok) n_addrs_bad++;
1210 vbits64 <<= 8;
1211 vbits64 |= vbits8;
1212 }
1213
1214 /* This is a hack which avoids producing errors for code which
1215 insists in stepping along byte strings in aligned word-sized
1216 chunks, and there is a partially defined word at the end. (eg,
1217 optimised strlen). Such code is basically broken at least WRT
1218 semantics of ANSI C, but sometimes users don't have the option
1219 to fix it, and so this option is provided. Note it is now
1220 defaulted to not-engaged.
1221
1222 A load from a partially-addressible place is allowed if:
1223 - the command-line flag is set
1224 - it's a word-sized, word-aligned load
1225 - at least one of the addresses in the word *is* valid
1226 */
1227 partial_load_exemption_applies
1228 = MC_(clo_partial_loads_ok) && szB == VG_WORDSIZE
1229 && VG_IS_WORD_ALIGNED(a)
1230 && n_addrs_bad < VG_WORDSIZE;
1231
1232 if (n_addrs_bad > 0 && !partial_load_exemption_applies)
1233 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1234
1235 return vbits64;
1236 }
1237
1238
1239 static
1240 #ifndef PERF_FAST_STOREV
1241 INLINE
1242 #endif
mc_STOREVn_slow(Addr a,SizeT nBits,ULong vbytes,Bool bigendian)1243 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1244 {
1245 SizeT szB = nBits / 8;
1246 SizeT i, n_addrs_bad = 0;
1247 UChar vbits8;
1248 Addr ai;
1249 Bool ok;
1250
1251 PROF_EVENT(35, "mc_STOREVn_slow");
1252
1253 /* ------------ BEGIN semi-fast cases ------------ */
1254 /* These deal quickly-ish with the common auxiliary primary map
1255 cases on 64-bit platforms. Are merely a speedup hack; can be
1256 omitted without loss of correctness/functionality. Note that in
1257 both cases the "sizeof(void*) == 8" causes these cases to be
1258 folded out by compilers on 32-bit platforms. These are derived
1259 from STOREV64 and STOREV32.
1260 */
1261 if (LIKELY(sizeof(void*) == 8
1262 && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1263 SecMap* sm = get_secmap_for_reading(a);
1264 UWord sm_off16 = SM_OFF_16(a);
1265 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1266 if (LIKELY( !is_distinguished_sm(sm) &&
1267 (VA_BITS16_DEFINED == vabits16 ||
1268 VA_BITS16_UNDEFINED == vabits16) )) {
1269 /* Handle common case quickly: a is suitably aligned, */
1270 /* is mapped, and is addressible. */
1271 // Convert full V-bits in register to compact 2-bit form.
1272 if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1273 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
1274 return;
1275 } else if (V_BITS64_UNDEFINED == vbytes) {
1276 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
1277 return;
1278 }
1279 /* else fall into the slow case */
1280 }
1281 /* else fall into the slow case */
1282 }
1283 if (LIKELY(sizeof(void*) == 8
1284 && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1285 SecMap* sm = get_secmap_for_reading(a);
1286 UWord sm_off = SM_OFF(a);
1287 UWord vabits8 = sm->vabits8[sm_off];
1288 if (LIKELY( !is_distinguished_sm(sm) &&
1289 (VA_BITS8_DEFINED == vabits8 ||
1290 VA_BITS8_UNDEFINED == vabits8) )) {
1291 /* Handle common case quickly: a is suitably aligned, */
1292 /* is mapped, and is addressible. */
1293 // Convert full V-bits in register to compact 2-bit form.
1294 if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1295 sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1296 return;
1297 } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1298 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1299 return;
1300 }
1301 /* else fall into the slow case */
1302 }
1303 /* else fall into the slow case */
1304 }
1305 /* ------------ END semi-fast cases ------------ */
1306
1307 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1308
1309 /* Dump vbytes in memory, iterating from least to most significant
1310 byte. At the same time establish addressibility of the location. */
1311 for (i = 0; i < szB; i++) {
1312 PROF_EVENT(36, "mc_STOREVn_slow(loop)");
1313 ai = a + byte_offset_w(szB, bigendian, i);
1314 vbits8 = vbytes & 0xff;
1315 ok = set_vbits8(ai, vbits8);
1316 if (!ok) n_addrs_bad++;
1317 vbytes >>= 8;
1318 }
1319
1320 /* If an address error has happened, report it. */
1321 if (n_addrs_bad > 0)
1322 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1323 }
1324
1325
1326 /*------------------------------------------------------------*/
1327 /*--- Setting permissions over address ranges. ---*/
1328 /*------------------------------------------------------------*/
1329
set_address_range_perms(Addr a,SizeT lenT,UWord vabits16,UWord dsm_num)1330 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1331 UWord dsm_num )
1332 {
1333 UWord sm_off, sm_off16;
1334 UWord vabits2 = vabits16 & 0x3;
1335 SizeT lenA, lenB, len_to_next_secmap;
1336 Addr aNext;
1337 SecMap* sm;
1338 SecMap** sm_ptr;
1339 SecMap* example_dsm;
1340
1341 PROF_EVENT(150, "set_address_range_perms");
1342
1343 /* Check the V+A bits make sense. */
1344 tl_assert(VA_BITS16_NOACCESS == vabits16 ||
1345 VA_BITS16_UNDEFINED == vabits16 ||
1346 VA_BITS16_DEFINED == vabits16);
1347
1348 // This code should never write PDBs; ensure this. (See comment above
1349 // set_vabits2().)
1350 tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1351
1352 if (lenT == 0)
1353 return;
1354
1355 if (lenT > 256 * 1024 * 1024) {
1356 if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1357 Char* s = "unknown???";
1358 if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1359 if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1360 if (vabits16 == VA_BITS16_DEFINED ) s = "defined";
1361 VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1362 "large range [0x%lx, 0x%lx) (%s)\n",
1363 a, a + lenT, s);
1364 }
1365 }
1366
1367 #ifndef PERF_FAST_SARP
1368 /*------------------ debug-only case ------------------ */
1369 {
1370 // Endianness doesn't matter here because all bytes are being set to
1371 // the same value.
1372 // Nb: We don't have to worry about updating the sec-V-bits table
1373 // after these set_vabits2() calls because this code never writes
1374 // VA_BITS2_PARTDEFINED values.
1375 SizeT i;
1376 for (i = 0; i < lenT; i++) {
1377 set_vabits2(a + i, vabits2);
1378 }
1379 return;
1380 }
1381 #endif
1382
1383 /*------------------ standard handling ------------------ */
1384
1385 /* Get the distinguished secondary that we might want
1386 to use (part of the space-compression scheme). */
1387 example_dsm = &sm_distinguished[dsm_num];
1388
1389 // We have to handle ranges covering various combinations of partial and
1390 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1391 // Cases marked with a '*' are common.
1392 //
1393 // TYPE PARTS USED
1394 // ---- ----------
1395 // * one partial sec-map (p) 1
1396 // - one whole sec-map (P) 2
1397 //
1398 // * two partial sec-maps (pp) 1,3
1399 // - one partial, one whole sec-map (pP) 1,2
1400 // - one whole, one partial sec-map (Pp) 2,3
1401 // - two whole sec-maps (PP) 2,2
1402 //
1403 // * one partial, one whole, one partial (pPp) 1,2,3
1404 // - one partial, two whole (pPP) 1,2,2
1405 // - two whole, one partial (PPp) 2,2,3
1406 // - three whole (PPP) 2,2,2
1407 //
1408 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1409 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1410 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1411 // - N whole (PP...PP) 2,2...2,3
1412
1413 // Break up total length (lenT) into two parts: length in the first
1414 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1415 aNext = start_of_this_sm(a) + SM_SIZE;
1416 len_to_next_secmap = aNext - a;
1417 if ( lenT <= len_to_next_secmap ) {
1418 // Range entirely within one sec-map. Covers almost all cases.
1419 PROF_EVENT(151, "set_address_range_perms-single-secmap");
1420 lenA = lenT;
1421 lenB = 0;
1422 } else if (is_start_of_sm(a)) {
1423 // Range spans at least one whole sec-map, and starts at the beginning
1424 // of a sec-map; skip to Part 2.
1425 PROF_EVENT(152, "set_address_range_perms-startof-secmap");
1426 lenA = 0;
1427 lenB = lenT;
1428 goto part2;
1429 } else {
1430 // Range spans two or more sec-maps, first one is partial.
1431 PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
1432 lenA = len_to_next_secmap;
1433 lenB = lenT - lenA;
1434 }
1435
1436 //------------------------------------------------------------------------
1437 // Part 1: Deal with the first sec_map. Most of the time the range will be
1438 // entirely within a sec_map and this part alone will suffice. Also,
1439 // doing it this way lets us avoid repeatedly testing for the crossing of
1440 // a sec-map boundary within these loops.
1441 //------------------------------------------------------------------------
1442
1443 // If it's distinguished, make it undistinguished if necessary.
1444 sm_ptr = get_secmap_ptr(a);
1445 if (is_distinguished_sm(*sm_ptr)) {
1446 if (*sm_ptr == example_dsm) {
1447 // Sec-map already has the V+A bits that we want, so skip.
1448 PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
1449 a = aNext;
1450 lenA = 0;
1451 } else {
1452 PROF_EVENT(155, "set_address_range_perms-dist-sm1");
1453 *sm_ptr = copy_for_writing(*sm_ptr);
1454 }
1455 }
1456 sm = *sm_ptr;
1457
1458 // 1 byte steps
1459 while (True) {
1460 if (VG_IS_8_ALIGNED(a)) break;
1461 if (lenA < 1) break;
1462 PROF_EVENT(156, "set_address_range_perms-loop1a");
1463 sm_off = SM_OFF(a);
1464 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1465 a += 1;
1466 lenA -= 1;
1467 }
1468 // 8-aligned, 8 byte steps
1469 while (True) {
1470 if (lenA < 8) break;
1471 PROF_EVENT(157, "set_address_range_perms-loop8a");
1472 sm_off16 = SM_OFF_16(a);
1473 ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1474 a += 8;
1475 lenA -= 8;
1476 }
1477 // 1 byte steps
1478 while (True) {
1479 if (lenA < 1) break;
1480 PROF_EVENT(158, "set_address_range_perms-loop1b");
1481 sm_off = SM_OFF(a);
1482 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1483 a += 1;
1484 lenA -= 1;
1485 }
1486
1487 // We've finished the first sec-map. Is that it?
1488 if (lenB == 0)
1489 return;
1490
1491 //------------------------------------------------------------------------
1492 // Part 2: Fast-set entire sec-maps at a time.
1493 //------------------------------------------------------------------------
1494 part2:
1495 // 64KB-aligned, 64KB steps.
1496 // Nb: we can reach here with lenB < SM_SIZE
1497 tl_assert(0 == lenA);
1498 while (True) {
1499 if (lenB < SM_SIZE) break;
1500 tl_assert(is_start_of_sm(a));
1501 PROF_EVENT(159, "set_address_range_perms-loop64K");
1502 sm_ptr = get_secmap_ptr(a);
1503 if (!is_distinguished_sm(*sm_ptr)) {
1504 PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
1505 // Free the non-distinguished sec-map that we're replacing. This
1506 // case happens moderately often, enough to be worthwhile.
1507 VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1508 }
1509 update_SM_counts(*sm_ptr, example_dsm);
1510 // Make the sec-map entry point to the example DSM
1511 *sm_ptr = example_dsm;
1512 lenB -= SM_SIZE;
1513 a += SM_SIZE;
1514 }
1515
1516 // We've finished the whole sec-maps. Is that it?
1517 if (lenB == 0)
1518 return;
1519
1520 //------------------------------------------------------------------------
1521 // Part 3: Finish off the final partial sec-map, if necessary.
1522 //------------------------------------------------------------------------
1523
1524 tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1525
1526 // If it's distinguished, make it undistinguished if necessary.
1527 sm_ptr = get_secmap_ptr(a);
1528 if (is_distinguished_sm(*sm_ptr)) {
1529 if (*sm_ptr == example_dsm) {
1530 // Sec-map already has the V+A bits that we want, so stop.
1531 PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
1532 return;
1533 } else {
1534 PROF_EVENT(162, "set_address_range_perms-dist-sm2");
1535 *sm_ptr = copy_for_writing(*sm_ptr);
1536 }
1537 }
1538 sm = *sm_ptr;
1539
1540 // 8-aligned, 8 byte steps
1541 while (True) {
1542 if (lenB < 8) break;
1543 PROF_EVENT(163, "set_address_range_perms-loop8b");
1544 sm_off16 = SM_OFF_16(a);
1545 ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1546 a += 8;
1547 lenB -= 8;
1548 }
1549 // 1 byte steps
1550 while (True) {
1551 if (lenB < 1) return;
1552 PROF_EVENT(164, "set_address_range_perms-loop1c");
1553 sm_off = SM_OFF(a);
1554 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1555 a += 1;
1556 lenB -= 1;
1557 }
1558 }
1559
1560
1561 /* --- Set permissions for arbitrary address ranges --- */
1562
MC_(make_mem_noaccess)1563 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1564 {
1565 PROF_EVENT(40, "MC_(make_mem_noaccess)");
1566 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1567 set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1568 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1569 ocache_sarp_Clear_Origins ( a, len );
1570 }
1571
make_mem_undefined(Addr a,SizeT len)1572 static void make_mem_undefined ( Addr a, SizeT len )
1573 {
1574 PROF_EVENT(41, "make_mem_undefined");
1575 DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1576 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1577 }
1578
MC_(make_mem_undefined_w_otag)1579 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1580 {
1581 PROF_EVENT(41, "MC_(make_mem_undefined)");
1582 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1583 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1584 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1585 ocache_sarp_Set_Origins ( a, len, otag );
1586 }
1587
1588 static
make_mem_undefined_w_tid_and_okind(Addr a,SizeT len,ThreadId tid,UInt okind)1589 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1590 ThreadId tid, UInt okind )
1591 {
1592 UInt ecu;
1593 ExeContext* here;
1594 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1595 if it is invalid. So no need to do it here. */
1596 tl_assert(okind <= 3);
1597 here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1598 tl_assert(here);
1599 ecu = VG_(get_ECU_from_ExeContext)(here);
1600 tl_assert(VG_(is_plausible_ECU)(ecu));
1601 MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1602 }
1603
1604 static
make_mem_undefined_w_tid(Addr a,SizeT len,ThreadId tid)1605 void make_mem_undefined_w_tid ( Addr a, SizeT len, ThreadId tid ) {
1606 make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1607 }
1608
1609
MC_(make_mem_defined)1610 void MC_(make_mem_defined) ( Addr a, SizeT len )
1611 {
1612 PROF_EVENT(42, "MC_(make_mem_defined)");
1613 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1614 set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1615 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1616 ocache_sarp_Clear_Origins ( a, len );
1617 }
1618
1619 /* For each byte in [a,a+len), if the byte is addressable, make it be
1620 defined, but if it isn't addressible, leave it alone. In other
1621 words a version of MC_(make_mem_defined) that doesn't mess with
1622 addressibility. Low-performance implementation. */
make_mem_defined_if_addressable(Addr a,SizeT len)1623 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1624 {
1625 SizeT i;
1626 UChar vabits2;
1627 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1628 for (i = 0; i < len; i++) {
1629 vabits2 = get_vabits2( a+i );
1630 if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1631 set_vabits2(a+i, VA_BITS2_DEFINED);
1632 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1633 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1634 }
1635 }
1636 }
1637 }
1638
1639 /* Similarly (needed for mprotect handling ..) */
make_mem_defined_if_noaccess(Addr a,SizeT len)1640 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1641 {
1642 SizeT i;
1643 UChar vabits2;
1644 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1645 for (i = 0; i < len; i++) {
1646 vabits2 = get_vabits2( a+i );
1647 if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1648 set_vabits2(a+i, VA_BITS2_DEFINED);
1649 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1650 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1651 }
1652 }
1653 }
1654 }
1655
1656 /* --- Block-copy permissions (needed for implementing realloc() and
1657 sys_mremap). --- */
1658
MC_(copy_address_range_state)1659 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
1660 {
1661 SizeT i, j;
1662 UChar vabits2, vabits8;
1663 Bool aligned, nooverlap;
1664
1665 DEBUG("MC_(copy_address_range_state)\n");
1666 PROF_EVENT(50, "MC_(copy_address_range_state)");
1667
1668 if (len == 0 || src == dst)
1669 return;
1670
1671 aligned = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
1672 nooverlap = src+len <= dst || dst+len <= src;
1673
1674 if (nooverlap && aligned) {
1675
1676 /* Vectorised fast case, when no overlap and suitably aligned */
1677 /* vector loop */
1678 i = 0;
1679 while (len >= 4) {
1680 vabits8 = get_vabits8_for_aligned_word32( src+i );
1681 set_vabits8_for_aligned_word32( dst+i, vabits8 );
1682 if (LIKELY(VA_BITS8_DEFINED == vabits8
1683 || VA_BITS8_UNDEFINED == vabits8
1684 || VA_BITS8_NOACCESS == vabits8)) {
1685 /* do nothing */
1686 } else {
1687 /* have to copy secondary map info */
1688 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
1689 set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
1690 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
1691 set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
1692 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
1693 set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
1694 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
1695 set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
1696 }
1697 i += 4;
1698 len -= 4;
1699 }
1700 /* fixup loop */
1701 while (len >= 1) {
1702 vabits2 = get_vabits2( src+i );
1703 set_vabits2( dst+i, vabits2 );
1704 if (VA_BITS2_PARTDEFINED == vabits2) {
1705 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1706 }
1707 i++;
1708 len--;
1709 }
1710
1711 } else {
1712
1713 /* We have to do things the slow way */
1714 if (src < dst) {
1715 for (i = 0, j = len-1; i < len; i++, j--) {
1716 PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
1717 vabits2 = get_vabits2( src+j );
1718 set_vabits2( dst+j, vabits2 );
1719 if (VA_BITS2_PARTDEFINED == vabits2) {
1720 set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
1721 }
1722 }
1723 }
1724
1725 if (src > dst) {
1726 for (i = 0; i < len; i++) {
1727 PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
1728 vabits2 = get_vabits2( src+i );
1729 set_vabits2( dst+i, vabits2 );
1730 if (VA_BITS2_PARTDEFINED == vabits2) {
1731 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1732 }
1733 }
1734 }
1735 }
1736
1737 }
1738
1739
1740 /*------------------------------------------------------------*/
1741 /*--- Origin tracking stuff - cache basics ---*/
1742 /*------------------------------------------------------------*/
1743
1744 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
1745 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1746
1747 Note that this implementation draws inspiration from the "origin
1748 tracking by value piggybacking" scheme described in "Tracking Bad
1749 Apples: Reporting the Origin of Null and Undefined Value Errors"
1750 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
1751 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
1752 implemented completely differently.
1753
1754 Origin tags and ECUs -- about the shadow values
1755 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1756
1757 This implementation tracks the defining point of all uninitialised
1758 values using so called "origin tags", which are 32-bit integers,
1759 rather than using the values themselves to encode the origins. The
1760 latter, so-called value piggybacking", is what the OOPSLA07 paper
1761 describes.
1762
1763 Origin tags, as tracked by the machinery below, are 32-bit unsigned
1764 ints (UInts), regardless of the machine's word size. Each tag
1765 comprises an upper 30-bit ECU field and a lower 2-bit
1766 'kind' field. The ECU field is a number given out by m_execontext
1767 and has a 1-1 mapping with ExeContext*s. An ECU can be used
1768 directly as an origin tag (otag), but in fact we want to put
1769 additional information 'kind' field to indicate roughly where the
1770 tag came from. This helps print more understandable error messages
1771 for the user -- it has no other purpose. In summary:
1772
1773 * Both ECUs and origin tags are represented as 32-bit words
1774
1775 * m_execontext and the core-tool interface deal purely in ECUs.
1776 They have no knowledge of origin tags - that is a purely
1777 Memcheck-internal matter.
1778
1779 * all valid ECUs have the lowest 2 bits zero and at least
1780 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
1781
1782 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
1783 constants defined in mc_include.h.
1784
1785 * to convert an otag back to an ECU, AND it with ~3
1786
1787 One important fact is that no valid otag is zero. A zero otag is
1788 used by the implementation to indicate "no origin", which could
1789 mean that either the value is defined, or it is undefined but the
1790 implementation somehow managed to lose the origin.
1791
1792 The ECU used for memory created by malloc etc is derived from the
1793 stack trace at the time the malloc etc happens. This means the
1794 mechanism can show the exact allocation point for heap-created
1795 uninitialised values.
1796
1797 In contrast, it is simply too expensive to create a complete
1798 backtrace for each stack allocation. Therefore we merely use a
1799 depth-1 backtrace for stack allocations, which can be done once at
1800 translation time, rather than N times at run time. The result of
1801 this is that, for stack created uninitialised values, Memcheck can
1802 only show the allocating function, and not what called it.
1803 Furthermore, compilers tend to move the stack pointer just once at
1804 the start of the function, to allocate all locals, and so in fact
1805 the stack origin almost always simply points to the opening brace
1806 of the function. Net result is, for stack origins, the mechanism
1807 can tell you in which function the undefined value was created, but
1808 that's all. Users will need to carefully check all locals in the
1809 specified function.
1810
1811 Shadowing registers and memory
1812 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1813
1814 Memory is shadowed using a two level cache structure (ocacheL1 and
1815 ocacheL2). Memory references are first directed to ocacheL1. This
1816 is a traditional 2-way set associative cache with 32-byte lines and
1817 approximate LRU replacement within each set.
1818
1819 A naive implementation would require storing one 32 bit otag for
1820 each byte of memory covered, a 4:1 space overhead. Instead, there
1821 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
1822 that shows which of the 4 bytes have that shadow value and which
1823 have a shadow value of zero (indicating no origin). Hence a lot of
1824 space is saved, but the cost is that only one different origin per
1825 4 bytes of address space can be represented. This is a source of
1826 imprecision, but how much of a problem it really is remains to be
1827 seen.
1828
1829 A cache line that contains all zeroes ("no origins") contains no
1830 useful information, and can be ejected from the L1 cache "for
1831 free", in the sense that a read miss on the L1 causes a line of
1832 zeroes to be installed. However, ejecting a line containing
1833 nonzeroes risks losing origin information permanently. In order to
1834 prevent such lossage, ejected nonzero lines are placed in a
1835 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
1836 lines. This can grow arbitrarily large, and so should ensure that
1837 Memcheck runs out of memory in preference to losing useful origin
1838 info due to cache size limitations.
1839
1840 Shadowing registers is a bit tricky, because the shadow values are
1841 32 bits, regardless of the size of the register. That gives a
1842 problem for registers smaller than 32 bits. The solution is to
1843 find spaces in the guest state that are unused, and use those to
1844 shadow guest state fragments smaller than 32 bits. For example, on
1845 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
1846 shadow are allocated for the register's otag, then there are still
1847 12 bytes left over which could be used to shadow 3 other values.
1848
1849 This implies there is some non-obvious mapping from guest state
1850 (start,length) pairs to the relevant shadow offset (for the origin
1851 tags). And it is unfortunately guest-architecture specific. The
1852 mapping is contained in mc_machine.c, which is quite lengthy but
1853 straightforward.
1854
1855 Instrumenting the IR
1856 ~~~~~~~~~~~~~~~~~~~~
1857
1858 Instrumentation is largely straightforward, and done by the
1859 functions schemeE and schemeS in mc_translate.c. These generate
1860 code for handling the origin tags of expressions (E) and statements
1861 (S) respectively. The rather strange names are a reference to the
1862 "compilation schemes" shown in Simon Peyton Jones' book "The
1863 Implementation of Functional Programming Languages" (Prentice Hall,
1864 1987, see
1865 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
1866
1867 schemeS merely arranges to move shadow values around the guest
1868 state to track the incoming IR. schemeE is largely trivial too.
1869 The only significant point is how to compute the otag corresponding
1870 to binary (or ternary, quaternary, etc) operator applications. The
1871 rule is simple: just take whichever value is larger (32-bit
1872 unsigned max). Constants get the special value zero. Hence this
1873 rule always propagates a nonzero (known) otag in preference to a
1874 zero (unknown, or more likely, value-is-defined) tag, as we want.
1875 If two different undefined values are inputs to a binary operator
1876 application, then which is propagated is arbitrary, but that
1877 doesn't matter, since the program is erroneous in using either of
1878 the values, and so there's no point in attempting to propagate
1879 both.
1880
1881 Since constants are abstracted to (otag) zero, much of the
1882 instrumentation code can be folded out without difficulty by the
1883 generic post-instrumentation IR cleanup pass, using these rules:
1884 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
1885 constants is evaluated at JIT time. And the resulting dead code
1886 removal. In practice this causes surprisingly few Max32Us to
1887 survive through to backend code generation.
1888
1889 Integration with the V-bits machinery
1890 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1891
1892 This is again largely straightforward. Mostly the otag and V bits
1893 stuff are independent. The only point of interaction is when the V
1894 bits instrumenter creates a call to a helper function to report an
1895 uninitialised value error -- in that case it must first use schemeE
1896 to get hold of the origin tag expression for the value, and pass
1897 that to the helper too.
1898
1899 There is the usual stuff to do with setting address range
1900 permissions. When memory is painted undefined, we must also know
1901 the origin tag to paint with, which involves some tedious plumbing,
1902 particularly to do with the fast case stack handlers. When memory
1903 is painted defined or noaccess then the origin tags must be forced
1904 to zero.
1905
1906 One of the goals of the implementation was to ensure that the
1907 non-origin tracking mode isn't slowed down at all. To do this,
1908 various functions to do with memory permissions setting (again,
1909 mostly pertaining to the stack) are duplicated for the with- and
1910 without-otag case.
1911
1912 Dealing with stack redzones, and the NIA cache
1913 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1914
1915 This is one of the few non-obvious parts of the implementation.
1916
1917 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
1918 reserved area below the stack pointer, that can be used as scratch
1919 space by compiler generated code for functions. In the Memcheck
1920 sources this is referred to as the "stack redzone". The important
1921 thing here is that such redzones are considered volatile across
1922 function calls and returns. So Memcheck takes care to mark them as
1923 undefined for each call and return, on the afflicted platforms.
1924 Past experience shows this is essential in order to get reliable
1925 messages about uninitialised values that come from the stack.
1926
1927 So the question is, when we paint a redzone undefined, what origin
1928 tag should we use for it? Consider a function f() calling g(). If
1929 we paint the redzone using an otag derived from the ExeContext of
1930 the CALL/BL instruction in f, then any errors in g causing it to
1931 use uninitialised values that happen to lie in the redzone, will be
1932 reported as having their origin in f. Which is highly confusing.
1933
1934 The same applies for returns: if, on a return, we paint the redzone
1935 using a origin tag derived from the ExeContext of the RET/BLR
1936 instruction in g, then any later errors in f causing it to use
1937 uninitialised values in the redzone, will be reported as having
1938 their origin in g. Which is just as confusing.
1939
1940 To do it right, in both cases we need to use an origin tag which
1941 pertains to the instruction which dynamically follows the CALL/BL
1942 or RET/BLR. In short, one derived from the NIA - the "next
1943 instruction address".
1944
1945 To make this work, Memcheck's redzone-painting helper,
1946 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
1947 NIA. It converts the NIA to a 1-element ExeContext, and uses that
1948 ExeContext's ECU as the basis for the otag used to paint the
1949 redzone. The expensive part of this is converting an NIA into an
1950 ECU, since this happens once for every call and every return. So
1951 we use a simple 511-line, 2-way set associative cache
1952 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
1953 the cost out.
1954
1955 Further background comments
1956 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1957
1958 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
1959 > it really just the address of the relevant ExeContext?
1960
1961 Well, it's not the address, but a value which has a 1-1 mapping
1962 with ExeContexts, and is guaranteed not to be zero, since zero
1963 denotes (to memcheck) "unknown origin or defined value". So these
1964 UInts are just numbers starting at 4 and incrementing by 4; each
1965 ExeContext is given a number when it is created. (*** NOTE this
1966 confuses otags and ECUs; see comments above ***).
1967
1968 Making these otags 32-bit regardless of the machine's word size
1969 makes the 64-bit implementation easier (next para). And it doesn't
1970 really limit us in any way, since for the tags to overflow would
1971 require that the program somehow caused 2^30-1 different
1972 ExeContexts to be created, in which case it is probably in deep
1973 trouble. Not to mention V will have soaked up many tens of
1974 gigabytes of memory merely to store them all.
1975
1976 So having 64-bit origins doesn't really buy you anything, and has
1977 the following downsides:
1978
1979 Suppose that instead, an otag is a UWord. This would mean that, on
1980 a 64-bit target,
1981
1982 1. It becomes hard to shadow any element of guest state which is
1983 smaller than 8 bytes. To do so means you'd need to find some
1984 8-byte-sized hole in the guest state which you don't want to
1985 shadow, and use that instead to hold the otag. On ppc64, the
1986 condition code register(s) are split into 20 UChar sized pieces,
1987 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
1988 and so that would entail finding 160 bytes somewhere else in the
1989 guest state.
1990
1991 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
1992 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
1993 same) and so I had to look for 4 untracked otag-sized areas in
1994 the guest state to make that possible.
1995
1996 The same problem exists of course when origin tags are only 32
1997 bits, but it's less extreme.
1998
1999 2. (More compelling) it doubles the size of the origin shadow
2000 memory. Given that the shadow memory is organised as a fixed
2001 size cache, and that accuracy of tracking is limited by origins
2002 falling out the cache due to space conflicts, this isn't good.
2003
2004 > Another question: is the origin tracking perfect, or are there
2005 > cases where it fails to determine an origin?
2006
2007 It is imperfect for at least for the following reasons, and
2008 probably more:
2009
2010 * Insufficient capacity in the origin cache. When a line is
2011 evicted from the cache it is gone forever, and so subsequent
2012 queries for the line produce zero, indicating no origin
2013 information. Interestingly, a line containing all zeroes can be
2014 evicted "free" from the cache, since it contains no useful
2015 information, so there is scope perhaps for some cleverer cache
2016 management schemes. (*** NOTE, with the introduction of the
2017 second level origin tag cache, ocacheL2, this is no longer a
2018 problem. ***)
2019
2020 * The origin cache only stores one otag per 32-bits of address
2021 space, plus 4 bits indicating which of the 4 bytes has that tag
2022 and which are considered defined. The result is that if two
2023 undefined bytes in the same word are stored in memory, the first
2024 stored byte's origin will be lost and replaced by the origin for
2025 the second byte.
2026
2027 * Nonzero origin tags for defined values. Consider a binary
2028 operator application op(x,y). Suppose y is undefined (and so has
2029 a valid nonzero origin tag), and x is defined, but erroneously
2030 has a nonzero origin tag (defined values should have tag zero).
2031 If the erroneous tag has a numeric value greater than y's tag,
2032 then the rule for propagating origin tags though binary
2033 operations, which is simply to take the unsigned max of the two
2034 tags, will erroneously propagate x's tag rather than y's.
2035
2036 * Some obscure uses of x86/amd64 byte registers can cause lossage
2037 or confusion of origins. %AH .. %DH are treated as different
2038 from, and unrelated to, their parent registers, %EAX .. %EDX.
2039 So some wierd sequences like
2040
2041 movb undefined-value, %AH
2042 movb defined-value, %AL
2043 .. use %AX or %EAX ..
2044
2045 will cause the origin attributed to %AH to be ignored, since %AL,
2046 %AX, %EAX are treated as the same register, and %AH as a
2047 completely separate one.
2048
2049 But having said all that, it actually seems to work fairly well in
2050 practice.
2051 */
2052
2053 static UWord stats_ocacheL1_find = 0;
2054 static UWord stats_ocacheL1_found_at_1 = 0;
2055 static UWord stats_ocacheL1_found_at_N = 0;
2056 static UWord stats_ocacheL1_misses = 0;
2057 static UWord stats_ocacheL1_lossage = 0;
2058 static UWord stats_ocacheL1_movefwds = 0;
2059
2060 static UWord stats__ocacheL2_refs = 0;
2061 static UWord stats__ocacheL2_misses = 0;
2062 static UWord stats__ocacheL2_n_nodes_max = 0;
2063
2064 /* Cache of 32-bit values, one every 32 bits of address space */
2065
2066 #define OC_BITS_PER_LINE 5
2067 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2068
oc_line_offset(Addr a)2069 static INLINE UWord oc_line_offset ( Addr a ) {
2070 return (a >> 2) & (OC_W32S_PER_LINE - 1);
2071 }
is_valid_oc_tag(Addr tag)2072 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2073 return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2074 }
2075
2076 #define OC_LINES_PER_SET 2
2077
2078 #define OC_N_SET_BITS 20
2079 #define OC_N_SETS (1 << OC_N_SET_BITS)
2080
2081 /* These settings give:
2082 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2083 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2084 */
2085
2086 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2087
2088
2089 typedef
2090 struct {
2091 Addr tag;
2092 UInt w32[OC_W32S_PER_LINE];
2093 UChar descr[OC_W32S_PER_LINE];
2094 }
2095 OCacheLine;
2096
2097 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2098 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2099 and 'z' if all the represented tags are zero. */
classify_OCacheLine(OCacheLine * line)2100 static UChar classify_OCacheLine ( OCacheLine* line )
2101 {
2102 UWord i;
2103 if (line->tag == 1/*invalid*/)
2104 return 'e'; /* EMPTY */
2105 tl_assert(is_valid_oc_tag(line->tag));
2106 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2107 tl_assert(0 == ((~0xF) & line->descr[i]));
2108 if (line->w32[i] > 0 && line->descr[i] > 0)
2109 return 'n'; /* NONZERO - contains useful info */
2110 }
2111 return 'z'; /* ZERO - no useful info */
2112 }
2113
2114 typedef
2115 struct {
2116 OCacheLine line[OC_LINES_PER_SET];
2117 }
2118 OCacheSet;
2119
2120 typedef
2121 struct {
2122 OCacheSet set[OC_N_SETS];
2123 }
2124 OCache;
2125
2126 static OCache* ocacheL1 = NULL;
2127 static UWord ocacheL1_event_ctr = 0;
2128
2129 static void init_ocacheL2 ( void ); /* fwds */
init_OCache(void)2130 static void init_OCache ( void )
2131 {
2132 UWord line, set;
2133 tl_assert(MC_(clo_mc_level) >= 3);
2134 tl_assert(ocacheL1 == NULL);
2135 ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2136 if (ocacheL1 == NULL) {
2137 VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2138 sizeof(OCache) );
2139 }
2140 tl_assert(ocacheL1 != NULL);
2141 for (set = 0; set < OC_N_SETS; set++) {
2142 for (line = 0; line < OC_LINES_PER_SET; line++) {
2143 ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2144 }
2145 }
2146 init_ocacheL2();
2147 }
2148
moveLineForwards(OCacheSet * set,UWord lineno)2149 static void moveLineForwards ( OCacheSet* set, UWord lineno )
2150 {
2151 OCacheLine tmp;
2152 stats_ocacheL1_movefwds++;
2153 tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2154 tmp = set->line[lineno-1];
2155 set->line[lineno-1] = set->line[lineno];
2156 set->line[lineno] = tmp;
2157 }
2158
zeroise_OCacheLine(OCacheLine * line,Addr tag)2159 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2160 UWord i;
2161 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2162 line->w32[i] = 0; /* NO ORIGIN */
2163 line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2164 }
2165 line->tag = tag;
2166 }
2167
2168 //////////////////////////////////////////////////////////////
2169 //// OCache backing store
2170
2171 static OSet* ocacheL2 = NULL;
2172
ocacheL2_malloc(HChar * cc,SizeT szB)2173 static void* ocacheL2_malloc ( HChar* cc, SizeT szB ) {
2174 return VG_(malloc)(cc, szB);
2175 }
ocacheL2_free(void * v)2176 static void ocacheL2_free ( void* v ) {
2177 VG_(free)( v );
2178 }
2179
2180 /* Stats: # nodes currently in tree */
2181 static UWord stats__ocacheL2_n_nodes = 0;
2182
init_ocacheL2(void)2183 static void init_ocacheL2 ( void )
2184 {
2185 tl_assert(!ocacheL2);
2186 tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2187 tl_assert(0 == offsetof(OCacheLine,tag));
2188 ocacheL2
2189 = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2190 NULL, /* fast cmp */
2191 ocacheL2_malloc, "mc.ioL2", ocacheL2_free );
2192 tl_assert(ocacheL2);
2193 stats__ocacheL2_n_nodes = 0;
2194 }
2195
2196 /* Find line with the given tag in the tree, or NULL if not found. */
ocacheL2_find_tag(Addr tag)2197 static OCacheLine* ocacheL2_find_tag ( Addr tag )
2198 {
2199 OCacheLine* line;
2200 tl_assert(is_valid_oc_tag(tag));
2201 stats__ocacheL2_refs++;
2202 line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2203 return line;
2204 }
2205
2206 /* Delete the line with the given tag from the tree, if it is present, and
2207 free up the associated memory. */
ocacheL2_del_tag(Addr tag)2208 static void ocacheL2_del_tag ( Addr tag )
2209 {
2210 OCacheLine* line;
2211 tl_assert(is_valid_oc_tag(tag));
2212 stats__ocacheL2_refs++;
2213 line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2214 if (line) {
2215 VG_(OSetGen_FreeNode)(ocacheL2, line);
2216 tl_assert(stats__ocacheL2_n_nodes > 0);
2217 stats__ocacheL2_n_nodes--;
2218 }
2219 }
2220
2221 /* Add a copy of the given line to the tree. It must not already be
2222 present. */
ocacheL2_add_line(OCacheLine * line)2223 static void ocacheL2_add_line ( OCacheLine* line )
2224 {
2225 OCacheLine* copy;
2226 tl_assert(is_valid_oc_tag(line->tag));
2227 copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2228 tl_assert(copy);
2229 *copy = *line;
2230 stats__ocacheL2_refs++;
2231 VG_(OSetGen_Insert)( ocacheL2, copy );
2232 stats__ocacheL2_n_nodes++;
2233 if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2234 stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2235 }
2236
2237 ////
2238 //////////////////////////////////////////////////////////////
2239
2240 __attribute__((noinline))
find_OCacheLine_SLOW(Addr a)2241 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2242 {
2243 OCacheLine *victim, *inL2;
2244 UChar c;
2245 UWord line;
2246 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2247 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2248 UWord tag = a & tagmask;
2249 tl_assert(setno >= 0 && setno < OC_N_SETS);
2250
2251 /* we already tried line == 0; skip therefore. */
2252 for (line = 1; line < OC_LINES_PER_SET; line++) {
2253 if (ocacheL1->set[setno].line[line].tag == tag) {
2254 if (line == 1) {
2255 stats_ocacheL1_found_at_1++;
2256 } else {
2257 stats_ocacheL1_found_at_N++;
2258 }
2259 if (UNLIKELY(0 == (ocacheL1_event_ctr++
2260 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2261 moveLineForwards( &ocacheL1->set[setno], line );
2262 line--;
2263 }
2264 return &ocacheL1->set[setno].line[line];
2265 }
2266 }
2267
2268 /* A miss. Use the last slot. Implicitly this means we're
2269 ejecting the line in the last slot. */
2270 stats_ocacheL1_misses++;
2271 tl_assert(line == OC_LINES_PER_SET);
2272 line--;
2273 tl_assert(line > 0);
2274
2275 /* First, move the to-be-ejected line to the L2 cache. */
2276 victim = &ocacheL1->set[setno].line[line];
2277 c = classify_OCacheLine(victim);
2278 switch (c) {
2279 case 'e':
2280 /* the line is empty (has invalid tag); ignore it. */
2281 break;
2282 case 'z':
2283 /* line contains zeroes. We must ensure the backing store is
2284 updated accordingly, either by copying the line there
2285 verbatim, or by ensuring it isn't present there. We
2286 chosse the latter on the basis that it reduces the size of
2287 the backing store. */
2288 ocacheL2_del_tag( victim->tag );
2289 break;
2290 case 'n':
2291 /* line contains at least one real, useful origin. Copy it
2292 to the backing store. */
2293 stats_ocacheL1_lossage++;
2294 inL2 = ocacheL2_find_tag( victim->tag );
2295 if (inL2) {
2296 *inL2 = *victim;
2297 } else {
2298 ocacheL2_add_line( victim );
2299 }
2300 break;
2301 default:
2302 tl_assert(0);
2303 }
2304
2305 /* Now we must reload the L1 cache from the backing tree, if
2306 possible. */
2307 tl_assert(tag != victim->tag); /* stay sane */
2308 inL2 = ocacheL2_find_tag( tag );
2309 if (inL2) {
2310 /* We're in luck. It's in the L2. */
2311 ocacheL1->set[setno].line[line] = *inL2;
2312 } else {
2313 /* Missed at both levels of the cache hierarchy. We have to
2314 declare it as full of zeroes (unknown origins). */
2315 stats__ocacheL2_misses++;
2316 zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2317 }
2318
2319 /* Move it one forwards */
2320 moveLineForwards( &ocacheL1->set[setno], line );
2321 line--;
2322
2323 return &ocacheL1->set[setno].line[line];
2324 }
2325
find_OCacheLine(Addr a)2326 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2327 {
2328 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2329 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2330 UWord tag = a & tagmask;
2331
2332 stats_ocacheL1_find++;
2333
2334 if (OC_ENABLE_ASSERTIONS) {
2335 tl_assert(setno >= 0 && setno < OC_N_SETS);
2336 tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2337 }
2338
2339 if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2340 return &ocacheL1->set[setno].line[0];
2341 }
2342
2343 return find_OCacheLine_SLOW( a );
2344 }
2345
set_aligned_word64_Origin_to_undef(Addr a,UInt otag)2346 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2347 {
2348 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2349 //// Set the origins for a+0 .. a+7
2350 { OCacheLine* line;
2351 UWord lineoff = oc_line_offset(a);
2352 if (OC_ENABLE_ASSERTIONS) {
2353 tl_assert(lineoff >= 0
2354 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2355 }
2356 line = find_OCacheLine( a );
2357 line->descr[lineoff+0] = 0xF;
2358 line->descr[lineoff+1] = 0xF;
2359 line->w32[lineoff+0] = otag;
2360 line->w32[lineoff+1] = otag;
2361 }
2362 //// END inlined, specialised version of MC_(helperc_b_store8)
2363 }
2364
2365
2366 /*------------------------------------------------------------*/
2367 /*--- Aligned fast case permission setters, ---*/
2368 /*--- for dealing with stacks ---*/
2369 /*------------------------------------------------------------*/
2370
2371 /*--------------------- 32-bit ---------------------*/
2372
2373 /* Nb: by "aligned" here we mean 4-byte aligned */
2374
make_aligned_word32_undefined(Addr a)2375 static INLINE void make_aligned_word32_undefined ( Addr a )
2376 {
2377 PROF_EVENT(300, "make_aligned_word32_undefined");
2378
2379 #ifndef PERF_FAST_STACK2
2380 make_mem_undefined(a, 4);
2381 #else
2382 {
2383 UWord sm_off;
2384 SecMap* sm;
2385
2386 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2387 PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
2388 make_mem_undefined(a, 4);
2389 return;
2390 }
2391
2392 sm = get_secmap_for_writing_low(a);
2393 sm_off = SM_OFF(a);
2394 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2395 }
2396 #endif
2397 }
2398
2399 static INLINE
make_aligned_word32_undefined_w_otag(Addr a,UInt otag)2400 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2401 {
2402 make_aligned_word32_undefined(a);
2403 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2404 //// Set the origins for a+0 .. a+3
2405 { OCacheLine* line;
2406 UWord lineoff = oc_line_offset(a);
2407 if (OC_ENABLE_ASSERTIONS) {
2408 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2409 }
2410 line = find_OCacheLine( a );
2411 line->descr[lineoff] = 0xF;
2412 line->w32[lineoff] = otag;
2413 }
2414 //// END inlined, specialised version of MC_(helperc_b_store4)
2415 }
2416
2417 static INLINE
make_aligned_word32_noaccess(Addr a)2418 void make_aligned_word32_noaccess ( Addr a )
2419 {
2420 PROF_EVENT(310, "make_aligned_word32_noaccess");
2421
2422 #ifndef PERF_FAST_STACK2
2423 MC_(make_mem_noaccess)(a, 4);
2424 #else
2425 {
2426 UWord sm_off;
2427 SecMap* sm;
2428
2429 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2430 PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
2431 MC_(make_mem_noaccess)(a, 4);
2432 return;
2433 }
2434
2435 sm = get_secmap_for_writing_low(a);
2436 sm_off = SM_OFF(a);
2437 sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2438
2439 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2440 //// Set the origins for a+0 .. a+3.
2441 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2442 OCacheLine* line;
2443 UWord lineoff = oc_line_offset(a);
2444 if (OC_ENABLE_ASSERTIONS) {
2445 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2446 }
2447 line = find_OCacheLine( a );
2448 line->descr[lineoff] = 0;
2449 }
2450 //// END inlined, specialised version of MC_(helperc_b_store4)
2451 }
2452 #endif
2453 }
2454
2455 /*--------------------- 64-bit ---------------------*/
2456
2457 /* Nb: by "aligned" here we mean 8-byte aligned */
2458
make_aligned_word64_undefined(Addr a)2459 static INLINE void make_aligned_word64_undefined ( Addr a )
2460 {
2461 PROF_EVENT(320, "make_aligned_word64_undefined");
2462
2463 #ifndef PERF_FAST_STACK2
2464 make_mem_undefined(a, 8);
2465 #else
2466 {
2467 UWord sm_off16;
2468 SecMap* sm;
2469
2470 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2471 PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
2472 make_mem_undefined(a, 8);
2473 return;
2474 }
2475
2476 sm = get_secmap_for_writing_low(a);
2477 sm_off16 = SM_OFF_16(a);
2478 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
2479 }
2480 #endif
2481 }
2482
2483 static INLINE
make_aligned_word64_undefined_w_otag(Addr a,UInt otag)2484 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2485 {
2486 make_aligned_word64_undefined(a);
2487 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2488 //// Set the origins for a+0 .. a+7
2489 { OCacheLine* line;
2490 UWord lineoff = oc_line_offset(a);
2491 tl_assert(lineoff >= 0
2492 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2493 line = find_OCacheLine( a );
2494 line->descr[lineoff+0] = 0xF;
2495 line->descr[lineoff+1] = 0xF;
2496 line->w32[lineoff+0] = otag;
2497 line->w32[lineoff+1] = otag;
2498 }
2499 //// END inlined, specialised version of MC_(helperc_b_store8)
2500 }
2501
2502 static INLINE
make_aligned_word64_noaccess(Addr a)2503 void make_aligned_word64_noaccess ( Addr a )
2504 {
2505 PROF_EVENT(330, "make_aligned_word64_noaccess");
2506
2507 #ifndef PERF_FAST_STACK2
2508 MC_(make_mem_noaccess)(a, 8);
2509 #else
2510 {
2511 UWord sm_off16;
2512 SecMap* sm;
2513
2514 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2515 PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
2516 MC_(make_mem_noaccess)(a, 8);
2517 return;
2518 }
2519
2520 sm = get_secmap_for_writing_low(a);
2521 sm_off16 = SM_OFF_16(a);
2522 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
2523
2524 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2525 //// Clear the origins for a+0 .. a+7.
2526 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2527 OCacheLine* line;
2528 UWord lineoff = oc_line_offset(a);
2529 tl_assert(lineoff >= 0
2530 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2531 line = find_OCacheLine( a );
2532 line->descr[lineoff+0] = 0;
2533 line->descr[lineoff+1] = 0;
2534 }
2535 //// END inlined, specialised version of MC_(helperc_b_store8)
2536 }
2537 #endif
2538 }
2539
2540
2541 /*------------------------------------------------------------*/
2542 /*--- Stack pointer adjustment ---*/
2543 /*------------------------------------------------------------*/
2544
2545 #ifdef PERF_FAST_STACK
2546 # define MAYBE_USED
2547 #else
2548 # define MAYBE_USED __attribute__((unused))
2549 #endif
2550
2551 /*--------------- adjustment by 4 bytes ---------------*/
2552
2553 MAYBE_USED
mc_new_mem_stack_4_w_ECU(Addr new_SP,UInt ecu)2554 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2555 {
2556 UInt otag = ecu | MC_OKIND_STACK;
2557 PROF_EVENT(110, "new_mem_stack_4");
2558 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2559 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2560 } else {
2561 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2562 }
2563 }
2564
2565 MAYBE_USED
mc_new_mem_stack_4(Addr new_SP)2566 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2567 {
2568 PROF_EVENT(110, "new_mem_stack_4");
2569 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2570 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2571 } else {
2572 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2573 }
2574 }
2575
2576 MAYBE_USED
mc_die_mem_stack_4(Addr new_SP)2577 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2578 {
2579 PROF_EVENT(120, "die_mem_stack_4");
2580 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2581 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2582 } else {
2583 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2584 }
2585 }
2586
2587 /*--------------- adjustment by 8 bytes ---------------*/
2588
2589 MAYBE_USED
mc_new_mem_stack_8_w_ECU(Addr new_SP,UInt ecu)2590 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2591 {
2592 UInt otag = ecu | MC_OKIND_STACK;
2593 PROF_EVENT(111, "new_mem_stack_8");
2594 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2595 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2596 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2597 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2598 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2599 } else {
2600 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2601 }
2602 }
2603
2604 MAYBE_USED
mc_new_mem_stack_8(Addr new_SP)2605 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2606 {
2607 PROF_EVENT(111, "new_mem_stack_8");
2608 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2609 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2610 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2611 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2612 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2613 } else {
2614 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2615 }
2616 }
2617
2618 MAYBE_USED
mc_die_mem_stack_8(Addr new_SP)2619 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2620 {
2621 PROF_EVENT(121, "die_mem_stack_8");
2622 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2623 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2624 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2625 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2626 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2627 } else {
2628 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2629 }
2630 }
2631
2632 /*--------------- adjustment by 12 bytes ---------------*/
2633
2634 MAYBE_USED
mc_new_mem_stack_12_w_ECU(Addr new_SP,UInt ecu)2635 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2636 {
2637 UInt otag = ecu | MC_OKIND_STACK;
2638 PROF_EVENT(112, "new_mem_stack_12");
2639 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2640 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2641 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2642 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2643 /* from previous test we don't have 8-alignment at offset +0,
2644 hence must have 8 alignment at offsets +4/-4. Hence safe to
2645 do 4 at +0 and then 8 at +4/. */
2646 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2647 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2648 } else {
2649 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2650 }
2651 }
2652
2653 MAYBE_USED
mc_new_mem_stack_12(Addr new_SP)2654 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
2655 {
2656 PROF_EVENT(112, "new_mem_stack_12");
2657 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2658 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2659 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2660 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2661 /* from previous test we don't have 8-alignment at offset +0,
2662 hence must have 8 alignment at offsets +4/-4. Hence safe to
2663 do 4 at +0 and then 8 at +4/. */
2664 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2665 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2666 } else {
2667 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
2668 }
2669 }
2670
2671 MAYBE_USED
mc_die_mem_stack_12(Addr new_SP)2672 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
2673 {
2674 PROF_EVENT(122, "die_mem_stack_12");
2675 /* Note the -12 in the test */
2676 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
2677 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2678 -4. */
2679 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2680 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2681 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2682 /* We have 4-alignment at +0, but we don't have 8-alignment at
2683 -12. So we must have 8-alignment at -8. Hence do 4 at -12
2684 and then 8 at -8. */
2685 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2686 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2687 } else {
2688 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
2689 }
2690 }
2691
2692 /*--------------- adjustment by 16 bytes ---------------*/
2693
2694 MAYBE_USED
mc_new_mem_stack_16_w_ECU(Addr new_SP,UInt ecu)2695 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
2696 {
2697 UInt otag = ecu | MC_OKIND_STACK;
2698 PROF_EVENT(113, "new_mem_stack_16");
2699 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2700 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2701 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2702 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2703 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2704 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2705 Hence do 4 at +0, 8 at +4, 4 at +12. */
2706 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2707 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2708 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2709 } else {
2710 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
2711 }
2712 }
2713
2714 MAYBE_USED
mc_new_mem_stack_16(Addr new_SP)2715 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
2716 {
2717 PROF_EVENT(113, "new_mem_stack_16");
2718 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2719 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2720 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2721 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2722 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2723 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2724 Hence do 4 at +0, 8 at +4, 4 at +12. */
2725 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2726 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2727 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2728 } else {
2729 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
2730 }
2731 }
2732
2733 MAYBE_USED
mc_die_mem_stack_16(Addr new_SP)2734 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
2735 {
2736 PROF_EVENT(123, "die_mem_stack_16");
2737 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2738 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
2739 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2740 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2741 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2742 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
2743 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2744 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2745 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2746 } else {
2747 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
2748 }
2749 }
2750
2751 /*--------------- adjustment by 32 bytes ---------------*/
2752
2753 MAYBE_USED
mc_new_mem_stack_32_w_ECU(Addr new_SP,UInt ecu)2754 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
2755 {
2756 UInt otag = ecu | MC_OKIND_STACK;
2757 PROF_EVENT(114, "new_mem_stack_32");
2758 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2759 /* Straightforward */
2760 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2761 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2762 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2763 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2764 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2765 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
2766 +0,+28. */
2767 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2768 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2769 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2770 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
2771 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
2772 } else {
2773 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
2774 }
2775 }
2776
2777 MAYBE_USED
mc_new_mem_stack_32(Addr new_SP)2778 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
2779 {
2780 PROF_EVENT(114, "new_mem_stack_32");
2781 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2782 /* Straightforward */
2783 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2784 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2785 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2786 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2787 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2788 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
2789 +0,+28. */
2790 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2791 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2792 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2793 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
2794 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
2795 } else {
2796 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
2797 }
2798 }
2799
2800 MAYBE_USED
mc_die_mem_stack_32(Addr new_SP)2801 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
2802 {
2803 PROF_EVENT(124, "die_mem_stack_32");
2804 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2805 /* Straightforward */
2806 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2807 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2808 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2809 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2810 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2811 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
2812 4 at -32,-4. */
2813 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2814 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
2815 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
2816 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2817 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2818 } else {
2819 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
2820 }
2821 }
2822
2823 /*--------------- adjustment by 112 bytes ---------------*/
2824
2825 MAYBE_USED
mc_new_mem_stack_112_w_ECU(Addr new_SP,UInt ecu)2826 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
2827 {
2828 UInt otag = ecu | MC_OKIND_STACK;
2829 PROF_EVENT(115, "new_mem_stack_112");
2830 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2831 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2832 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2833 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2834 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2835 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
2836 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
2837 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
2838 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
2839 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
2840 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
2841 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
2842 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
2843 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
2844 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
2845 } else {
2846 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
2847 }
2848 }
2849
2850 MAYBE_USED
mc_new_mem_stack_112(Addr new_SP)2851 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
2852 {
2853 PROF_EVENT(115, "new_mem_stack_112");
2854 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2855 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2856 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2857 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2858 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2859 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
2860 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
2861 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
2862 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
2863 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
2864 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
2865 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
2866 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
2867 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
2868 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
2869 } else {
2870 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
2871 }
2872 }
2873
2874 MAYBE_USED
mc_die_mem_stack_112(Addr new_SP)2875 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
2876 {
2877 PROF_EVENT(125, "die_mem_stack_112");
2878 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2879 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
2880 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
2881 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
2882 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
2883 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
2884 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
2885 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
2886 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
2887 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
2888 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
2889 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2890 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2891 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2892 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2893 } else {
2894 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
2895 }
2896 }
2897
2898 /*--------------- adjustment by 128 bytes ---------------*/
2899
2900 MAYBE_USED
mc_new_mem_stack_128_w_ECU(Addr new_SP,UInt ecu)2901 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
2902 {
2903 UInt otag = ecu | MC_OKIND_STACK;
2904 PROF_EVENT(116, "new_mem_stack_128");
2905 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2906 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2907 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2908 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2909 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2910 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
2911 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
2912 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
2913 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
2914 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
2915 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
2916 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
2917 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
2918 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
2919 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
2920 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
2921 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
2922 } else {
2923 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
2924 }
2925 }
2926
2927 MAYBE_USED
mc_new_mem_stack_128(Addr new_SP)2928 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
2929 {
2930 PROF_EVENT(116, "new_mem_stack_128");
2931 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2932 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2933 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2934 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2935 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2936 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
2937 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
2938 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
2939 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
2940 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
2941 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
2942 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
2943 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
2944 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
2945 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
2946 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
2947 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
2948 } else {
2949 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
2950 }
2951 }
2952
2953 MAYBE_USED
mc_die_mem_stack_128(Addr new_SP)2954 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
2955 {
2956 PROF_EVENT(126, "die_mem_stack_128");
2957 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2958 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
2959 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
2960 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
2961 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
2962 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
2963 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
2964 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
2965 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
2966 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
2967 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
2968 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
2969 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
2970 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2971 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2972 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2973 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2974 } else {
2975 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
2976 }
2977 }
2978
2979 /*--------------- adjustment by 144 bytes ---------------*/
2980
2981 MAYBE_USED
mc_new_mem_stack_144_w_ECU(Addr new_SP,UInt ecu)2982 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
2983 {
2984 UInt otag = ecu | MC_OKIND_STACK;
2985 PROF_EVENT(117, "new_mem_stack_144");
2986 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2987 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2988 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2989 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2990 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2991 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
2992 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
2993 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
2994 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
2995 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
2996 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
2997 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
2998 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
2999 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3000 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3001 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3002 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3003 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3004 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3005 } else {
3006 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3007 }
3008 }
3009
3010 MAYBE_USED
mc_new_mem_stack_144(Addr new_SP)3011 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3012 {
3013 PROF_EVENT(117, "new_mem_stack_144");
3014 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3015 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3016 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3017 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3018 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3019 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3020 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3021 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3022 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3023 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3024 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3025 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3026 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3027 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3028 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3029 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3030 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3031 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3032 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3033 } else {
3034 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3035 }
3036 }
3037
3038 MAYBE_USED
mc_die_mem_stack_144(Addr new_SP)3039 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3040 {
3041 PROF_EVENT(127, "die_mem_stack_144");
3042 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3043 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3044 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3045 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3046 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3047 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3048 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3049 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3050 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3051 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3052 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3053 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3054 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3055 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3056 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3057 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3058 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3059 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3060 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3061 } else {
3062 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3063 }
3064 }
3065
3066 /*--------------- adjustment by 160 bytes ---------------*/
3067
3068 MAYBE_USED
mc_new_mem_stack_160_w_ECU(Addr new_SP,UInt ecu)3069 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3070 {
3071 UInt otag = ecu | MC_OKIND_STACK;
3072 PROF_EVENT(118, "new_mem_stack_160");
3073 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3074 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3075 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3076 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3077 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3078 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3079 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3080 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3081 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3082 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3083 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3084 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3085 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3086 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3087 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3088 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3089 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3090 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3091 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3092 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3093 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3094 } else {
3095 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3096 }
3097 }
3098
3099 MAYBE_USED
mc_new_mem_stack_160(Addr new_SP)3100 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3101 {
3102 PROF_EVENT(118, "new_mem_stack_160");
3103 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3104 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3105 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3106 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3107 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3108 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3109 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3110 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3111 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3112 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3113 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3114 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3115 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3116 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3117 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3118 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3119 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3120 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3121 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3122 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3123 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3124 } else {
3125 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3126 }
3127 }
3128
3129 MAYBE_USED
mc_die_mem_stack_160(Addr new_SP)3130 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3131 {
3132 PROF_EVENT(128, "die_mem_stack_160");
3133 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3134 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3135 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3136 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3137 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3138 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3139 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3140 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3141 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3142 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3143 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3144 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3145 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3146 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3147 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3148 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3149 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3150 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3151 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3152 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3153 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3154 } else {
3155 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3156 }
3157 }
3158
3159 /*--------------- adjustment by N bytes ---------------*/
3160
mc_new_mem_stack_w_ECU(Addr a,SizeT len,UInt ecu)3161 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3162 {
3163 UInt otag = ecu | MC_OKIND_STACK;
3164 PROF_EVENT(115, "new_mem_stack_w_otag");
3165 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3166 }
3167
mc_new_mem_stack(Addr a,SizeT len)3168 static void mc_new_mem_stack ( Addr a, SizeT len )
3169 {
3170 PROF_EVENT(115, "new_mem_stack");
3171 make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3172 }
3173
mc_die_mem_stack(Addr a,SizeT len)3174 static void mc_die_mem_stack ( Addr a, SizeT len )
3175 {
3176 PROF_EVENT(125, "die_mem_stack");
3177 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3178 }
3179
3180
3181 /* The AMD64 ABI says:
3182
3183 "The 128-byte area beyond the location pointed to by %rsp is considered
3184 to be reserved and shall not be modified by signal or interrupt
3185 handlers. Therefore, functions may use this area for temporary data
3186 that is not needed across function calls. In particular, leaf functions
3187 may use this area for their entire stack frame, rather than adjusting
3188 the stack pointer in the prologue and epilogue. This area is known as
3189 red zone [sic]."
3190
3191 So after any call or return we need to mark this redzone as containing
3192 undefined values.
3193
3194 Consider this: we're in function f. f calls g. g moves rsp down
3195 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3196 defined. g returns. f is buggy and reads from parts of the red zone
3197 that it didn't write on. But because g filled that area in, f is going
3198 to be picking up defined V bits and so any errors from reading bits of
3199 the red zone it didn't write, will be missed. The only solution I could
3200 think of was to make the red zone undefined when g returns to f.
3201
3202 This is in accordance with the ABI, which makes it clear the redzone
3203 is volatile across function calls.
3204
3205 The problem occurs the other way round too: f could fill the RZ up
3206 with defined values and g could mistakenly read them. So the RZ
3207 also needs to be nuked on function calls.
3208 */
3209
3210
3211 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3212 improved so as to have a lower miss rate. */
3213
3214 static UWord stats__nia_cache_queries = 0;
3215 static UWord stats__nia_cache_misses = 0;
3216
3217 typedef
3218 struct { UWord nia0; UWord ecu0; /* nia0 maps to ecu0 */
3219 UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3220 WCacheEnt;
3221
3222 #define N_NIA_TO_ECU_CACHE 511
3223
3224 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3225
init_nia_to_ecu_cache(void)3226 static void init_nia_to_ecu_cache ( void )
3227 {
3228 UWord i;
3229 Addr zero_addr = 0;
3230 ExeContext* zero_ec;
3231 UInt zero_ecu;
3232 /* Fill all the slots with an entry for address zero, and the
3233 relevant otags accordingly. Hence the cache is initially filled
3234 with valid data. */
3235 zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3236 tl_assert(zero_ec);
3237 zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3238 tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3239 for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3240 nia_to_ecu_cache[i].nia0 = zero_addr;
3241 nia_to_ecu_cache[i].ecu0 = zero_ecu;
3242 nia_to_ecu_cache[i].nia1 = zero_addr;
3243 nia_to_ecu_cache[i].ecu1 = zero_ecu;
3244 }
3245 }
3246
convert_nia_to_ecu(Addr nia)3247 static inline UInt convert_nia_to_ecu ( Addr nia )
3248 {
3249 UWord i;
3250 UInt ecu;
3251 ExeContext* ec;
3252
3253 tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3254
3255 stats__nia_cache_queries++;
3256 i = nia % N_NIA_TO_ECU_CACHE;
3257 tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3258
3259 if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3260 return nia_to_ecu_cache[i].ecu0;
3261
3262 if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3263 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3264 SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3265 SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3266 # undef SWAP
3267 return nia_to_ecu_cache[i].ecu0;
3268 }
3269
3270 stats__nia_cache_misses++;
3271 ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3272 tl_assert(ec);
3273 ecu = VG_(get_ECU_from_ExeContext)(ec);
3274 tl_assert(VG_(is_plausible_ECU)(ecu));
3275
3276 nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3277 nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3278
3279 nia_to_ecu_cache[i].nia0 = nia;
3280 nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3281 return ecu;
3282 }
3283
3284
3285 /* Note that this serves both the origin-tracking and
3286 no-origin-tracking modes. We assume that calls to it are
3287 sufficiently infrequent that it isn't worth specialising for the
3288 with/without origin-tracking cases. */
MC_(helperc_MAKE_STACK_UNINIT)3289 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
3290 {
3291 UInt otag;
3292 tl_assert(sizeof(UWord) == sizeof(SizeT));
3293 if (0)
3294 VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
3295 base, len, nia );
3296
3297 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3298 UInt ecu = convert_nia_to_ecu ( nia );
3299 tl_assert(VG_(is_plausible_ECU)(ecu));
3300 otag = ecu | MC_OKIND_STACK;
3301 } else {
3302 tl_assert(nia == 0);
3303 otag = 0;
3304 }
3305
3306 # if 0
3307 /* Really slow version */
3308 MC_(make_mem_undefined)(base, len, otag);
3309 # endif
3310
3311 # if 0
3312 /* Slow(ish) version, which is fairly easily seen to be correct.
3313 */
3314 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3315 make_aligned_word64_undefined(base + 0, otag);
3316 make_aligned_word64_undefined(base + 8, otag);
3317 make_aligned_word64_undefined(base + 16, otag);
3318 make_aligned_word64_undefined(base + 24, otag);
3319
3320 make_aligned_word64_undefined(base + 32, otag);
3321 make_aligned_word64_undefined(base + 40, otag);
3322 make_aligned_word64_undefined(base + 48, otag);
3323 make_aligned_word64_undefined(base + 56, otag);
3324
3325 make_aligned_word64_undefined(base + 64, otag);
3326 make_aligned_word64_undefined(base + 72, otag);
3327 make_aligned_word64_undefined(base + 80, otag);
3328 make_aligned_word64_undefined(base + 88, otag);
3329
3330 make_aligned_word64_undefined(base + 96, otag);
3331 make_aligned_word64_undefined(base + 104, otag);
3332 make_aligned_word64_undefined(base + 112, otag);
3333 make_aligned_word64_undefined(base + 120, otag);
3334 } else {
3335 MC_(make_mem_undefined)(base, len, otag);
3336 }
3337 # endif
3338
3339 /* Idea is: go fast when
3340 * 8-aligned and length is 128
3341 * the sm is available in the main primary map
3342 * the address range falls entirely with a single secondary map
3343 If all those conditions hold, just update the V+A bits by writing
3344 directly into the vabits array. (If the sm was distinguished, this
3345 will make a copy and then write to it.)
3346 */
3347
3348 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3349 /* Now we know the address range is suitably sized and aligned. */
3350 UWord a_lo = (UWord)(base);
3351 UWord a_hi = (UWord)(base + 128 - 1);
3352 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3353 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3354 // Now we know the entire range is within the main primary map.
3355 SecMap* sm = get_secmap_for_writing_low(a_lo);
3356 SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3357 /* Now we know that the entire address range falls within a
3358 single secondary map, and that that secondary 'lives' in
3359 the main primary map. */
3360 if (LIKELY(sm == sm_hi)) {
3361 // Finally, we know that the range is entirely within one secmap.
3362 UWord v_off = SM_OFF(a_lo);
3363 UShort* p = (UShort*)(&sm->vabits8[v_off]);
3364 p[ 0] = VA_BITS16_UNDEFINED;
3365 p[ 1] = VA_BITS16_UNDEFINED;
3366 p[ 2] = VA_BITS16_UNDEFINED;
3367 p[ 3] = VA_BITS16_UNDEFINED;
3368 p[ 4] = VA_BITS16_UNDEFINED;
3369 p[ 5] = VA_BITS16_UNDEFINED;
3370 p[ 6] = VA_BITS16_UNDEFINED;
3371 p[ 7] = VA_BITS16_UNDEFINED;
3372 p[ 8] = VA_BITS16_UNDEFINED;
3373 p[ 9] = VA_BITS16_UNDEFINED;
3374 p[10] = VA_BITS16_UNDEFINED;
3375 p[11] = VA_BITS16_UNDEFINED;
3376 p[12] = VA_BITS16_UNDEFINED;
3377 p[13] = VA_BITS16_UNDEFINED;
3378 p[14] = VA_BITS16_UNDEFINED;
3379 p[15] = VA_BITS16_UNDEFINED;
3380 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3381 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3382 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3383 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3384 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3385 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3386 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3387 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3388 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3389 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3390 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3391 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3392 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3393 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3394 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3395 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3396 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3397 }
3398 return;
3399 }
3400 }
3401 }
3402
3403 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3404 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3405 /* Now we know the address range is suitably sized and aligned. */
3406 UWord a_lo = (UWord)(base);
3407 UWord a_hi = (UWord)(base + 288 - 1);
3408 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3409 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3410 // Now we know the entire range is within the main primary map.
3411 SecMap* sm = get_secmap_for_writing_low(a_lo);
3412 SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3413 /* Now we know that the entire address range falls within a
3414 single secondary map, and that that secondary 'lives' in
3415 the main primary map. */
3416 if (LIKELY(sm == sm_hi)) {
3417 // Finally, we know that the range is entirely within one secmap.
3418 UWord v_off = SM_OFF(a_lo);
3419 UShort* p = (UShort*)(&sm->vabits8[v_off]);
3420 p[ 0] = VA_BITS16_UNDEFINED;
3421 p[ 1] = VA_BITS16_UNDEFINED;
3422 p[ 2] = VA_BITS16_UNDEFINED;
3423 p[ 3] = VA_BITS16_UNDEFINED;
3424 p[ 4] = VA_BITS16_UNDEFINED;
3425 p[ 5] = VA_BITS16_UNDEFINED;
3426 p[ 6] = VA_BITS16_UNDEFINED;
3427 p[ 7] = VA_BITS16_UNDEFINED;
3428 p[ 8] = VA_BITS16_UNDEFINED;
3429 p[ 9] = VA_BITS16_UNDEFINED;
3430 p[10] = VA_BITS16_UNDEFINED;
3431 p[11] = VA_BITS16_UNDEFINED;
3432 p[12] = VA_BITS16_UNDEFINED;
3433 p[13] = VA_BITS16_UNDEFINED;
3434 p[14] = VA_BITS16_UNDEFINED;
3435 p[15] = VA_BITS16_UNDEFINED;
3436 p[16] = VA_BITS16_UNDEFINED;
3437 p[17] = VA_BITS16_UNDEFINED;
3438 p[18] = VA_BITS16_UNDEFINED;
3439 p[19] = VA_BITS16_UNDEFINED;
3440 p[20] = VA_BITS16_UNDEFINED;
3441 p[21] = VA_BITS16_UNDEFINED;
3442 p[22] = VA_BITS16_UNDEFINED;
3443 p[23] = VA_BITS16_UNDEFINED;
3444 p[24] = VA_BITS16_UNDEFINED;
3445 p[25] = VA_BITS16_UNDEFINED;
3446 p[26] = VA_BITS16_UNDEFINED;
3447 p[27] = VA_BITS16_UNDEFINED;
3448 p[28] = VA_BITS16_UNDEFINED;
3449 p[29] = VA_BITS16_UNDEFINED;
3450 p[30] = VA_BITS16_UNDEFINED;
3451 p[31] = VA_BITS16_UNDEFINED;
3452 p[32] = VA_BITS16_UNDEFINED;
3453 p[33] = VA_BITS16_UNDEFINED;
3454 p[34] = VA_BITS16_UNDEFINED;
3455 p[35] = VA_BITS16_UNDEFINED;
3456 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3457 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3458 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3459 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3460 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3461 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3462 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3463 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3464 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3465 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3466 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3467 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3468 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3469 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3470 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3471 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3472 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3473 set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3474 set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3475 set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3476 set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3477 set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3478 set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3479 set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3480 set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3481 set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3482 set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3483 set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3484 set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3485 set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3486 set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3487 set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3488 set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3489 set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3490 set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3491 set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3492 set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3493 }
3494 return;
3495 }
3496 }
3497 }
3498
3499 /* else fall into slow case */
3500 MC_(make_mem_undefined_w_otag)(base, len, otag);
3501 }
3502
3503
3504 /*------------------------------------------------------------*/
3505 /*--- Checking memory ---*/
3506 /*------------------------------------------------------------*/
3507
3508 typedef
3509 enum {
3510 MC_Ok = 5,
3511 MC_AddrErr = 6,
3512 MC_ValueErr = 7
3513 }
3514 MC_ReadResult;
3515
3516
3517 /* Check permissions for address range. If inadequate permissions
3518 exist, *bad_addr is set to the offending address, so the caller can
3519 know what it is. */
3520
3521 /* Returns True if [a .. a+len) is not addressible. Otherwise,
3522 returns False, and if bad_addr is non-NULL, sets *bad_addr to
3523 indicate the lowest failing address. Functions below are
3524 similar. */
MC_(check_mem_is_noaccess)3525 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
3526 {
3527 SizeT i;
3528 UWord vabits2;
3529
3530 PROF_EVENT(60, "check_mem_is_noaccess");
3531 for (i = 0; i < len; i++) {
3532 PROF_EVENT(61, "check_mem_is_noaccess(loop)");
3533 vabits2 = get_vabits2(a);
3534 if (VA_BITS2_NOACCESS != vabits2) {
3535 if (bad_addr != NULL) *bad_addr = a;
3536 return False;
3537 }
3538 a++;
3539 }
3540 return True;
3541 }
3542
is_mem_addressable(Addr a,SizeT len,Addr * bad_addr)3543 static Bool is_mem_addressable ( Addr a, SizeT len,
3544 /*OUT*/Addr* bad_addr )
3545 {
3546 SizeT i;
3547 UWord vabits2;
3548
3549 PROF_EVENT(62, "is_mem_addressable");
3550 for (i = 0; i < len; i++) {
3551 PROF_EVENT(63, "is_mem_addressable(loop)");
3552 vabits2 = get_vabits2(a);
3553 if (VA_BITS2_NOACCESS == vabits2) {
3554 if (bad_addr != NULL) *bad_addr = a;
3555 return False;
3556 }
3557 a++;
3558 }
3559 return True;
3560 }
3561
is_mem_defined(Addr a,SizeT len,Addr * bad_addr,UInt * otag)3562 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
3563 /*OUT*/Addr* bad_addr,
3564 /*OUT*/UInt* otag )
3565 {
3566 SizeT i;
3567 UWord vabits2;
3568
3569 PROF_EVENT(64, "is_mem_defined");
3570 DEBUG("is_mem_defined\n");
3571
3572 if (otag) *otag = 0;
3573 if (bad_addr) *bad_addr = 0;
3574 for (i = 0; i < len; i++) {
3575 PROF_EVENT(65, "is_mem_defined(loop)");
3576 vabits2 = get_vabits2(a);
3577 if (VA_BITS2_DEFINED != vabits2) {
3578 // Error! Nb: Report addressability errors in preference to
3579 // definedness errors. And don't report definedeness errors unless
3580 // --undef-value-errors=yes.
3581 if (bad_addr) {
3582 *bad_addr = a;
3583 }
3584 if (VA_BITS2_NOACCESS == vabits2) {
3585 return MC_AddrErr;
3586 }
3587 if (MC_(clo_mc_level) >= 2) {
3588 if (otag && MC_(clo_mc_level) == 3) {
3589 *otag = MC_(helperc_b_load1)( a );
3590 }
3591 return MC_ValueErr;
3592 }
3593 }
3594 a++;
3595 }
3596 return MC_Ok;
3597 }
3598
3599
3600 /* Check a zero-terminated ascii string. Tricky -- don't want to
3601 examine the actual bytes, to find the end, until we're sure it is
3602 safe to do so. */
3603
mc_is_defined_asciiz(Addr a,Addr * bad_addr,UInt * otag)3604 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
3605 {
3606 UWord vabits2;
3607
3608 PROF_EVENT(66, "mc_is_defined_asciiz");
3609 DEBUG("mc_is_defined_asciiz\n");
3610
3611 if (otag) *otag = 0;
3612 if (bad_addr) *bad_addr = 0;
3613 while (True) {
3614 PROF_EVENT(67, "mc_is_defined_asciiz(loop)");
3615 vabits2 = get_vabits2(a);
3616 if (VA_BITS2_DEFINED != vabits2) {
3617 // Error! Nb: Report addressability errors in preference to
3618 // definedness errors. And don't report definedeness errors unless
3619 // --undef-value-errors=yes.
3620 if (bad_addr) {
3621 *bad_addr = a;
3622 }
3623 if (VA_BITS2_NOACCESS == vabits2) {
3624 return MC_AddrErr;
3625 }
3626 if (MC_(clo_mc_level) >= 2) {
3627 if (otag && MC_(clo_mc_level) == 3) {
3628 *otag = MC_(helperc_b_load1)( a );
3629 }
3630 return MC_ValueErr;
3631 }
3632 }
3633 /* Ok, a is safe to read. */
3634 if (* ((UChar*)a) == 0) {
3635 return MC_Ok;
3636 }
3637 a++;
3638 }
3639 }
3640
3641
3642 /*------------------------------------------------------------*/
3643 /*--- Memory event handlers ---*/
3644 /*------------------------------------------------------------*/
3645
3646 static
check_mem_is_addressable(CorePart part,ThreadId tid,Char * s,Addr base,SizeT size)3647 void check_mem_is_addressable ( CorePart part, ThreadId tid, Char* s,
3648 Addr base, SizeT size )
3649 {
3650 Addr bad_addr;
3651 Bool ok = is_mem_addressable ( base, size, &bad_addr );
3652
3653 if (!ok) {
3654 switch (part) {
3655 case Vg_CoreSysCall:
3656 MC_(record_memparam_error) ( tid, bad_addr,
3657 /*isAddrErr*/True, s, 0/*otag*/ );
3658 break;
3659
3660 case Vg_CoreSignal:
3661 MC_(record_core_mem_error)( tid, s );
3662 break;
3663
3664 default:
3665 VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
3666 }
3667 }
3668 }
3669
3670 static
check_mem_is_defined(CorePart part,ThreadId tid,Char * s,Addr base,SizeT size)3671 void check_mem_is_defined ( CorePart part, ThreadId tid, Char* s,
3672 Addr base, SizeT size )
3673 {
3674 UInt otag = 0;
3675 Addr bad_addr;
3676 MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
3677
3678 if (MC_Ok != res) {
3679 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3680
3681 switch (part) {
3682 case Vg_CoreSysCall:
3683 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3684 isAddrErr ? 0 : otag );
3685 break;
3686
3687 case Vg_CoreSysCallArgInMem:
3688 MC_(record_regparam_error) ( tid, s, otag );
3689 break;
3690
3691 /* If we're being asked to jump to a silly address, record an error
3692 message before potentially crashing the entire system. */
3693 case Vg_CoreTranslate:
3694 MC_(record_jump_error)( tid, bad_addr );
3695 break;
3696
3697 default:
3698 VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
3699 }
3700 }
3701 }
3702
3703 static
check_mem_is_defined_asciiz(CorePart part,ThreadId tid,Char * s,Addr str)3704 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
3705 Char* s, Addr str )
3706 {
3707 MC_ReadResult res;
3708 Addr bad_addr = 0; // shut GCC up
3709 UInt otag = 0;
3710
3711 tl_assert(part == Vg_CoreSysCall);
3712 res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
3713 if (MC_Ok != res) {
3714 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3715 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3716 isAddrErr ? 0 : otag );
3717 }
3718 }
3719
3720 /* Handling of mmap and mprotect is not as simple as it seems.
3721
3722 The underlying semantics are that memory obtained from mmap is
3723 always initialised, but may be inaccessible. And changes to the
3724 protection of memory do not change its contents and hence not its
3725 definedness state. Problem is we can't model
3726 inaccessible-but-with-some-definedness state; once we mark memory
3727 as inaccessible we lose all info about definedness, and so can't
3728 restore that if it is later made accessible again.
3729
3730 One obvious thing to do is this:
3731
3732 mmap/mprotect NONE -> noaccess
3733 mmap/mprotect other -> defined
3734
3735 The problem case here is: taking accessible memory, writing
3736 uninitialised data to it, mprotecting it NONE and later mprotecting
3737 it back to some accessible state causes the undefinedness to be
3738 lost.
3739
3740 A better proposal is:
3741
3742 (1) mmap NONE -> make noaccess
3743 (2) mmap other -> make defined
3744
3745 (3) mprotect NONE -> # no change
3746 (4) mprotect other -> change any "noaccess" to "defined"
3747
3748 (2) is OK because memory newly obtained from mmap really is defined
3749 (zeroed out by the kernel -- doing anything else would
3750 constitute a massive security hole.)
3751
3752 (1) is OK because the only way to make the memory usable is via
3753 (4), in which case we also wind up correctly marking it all as
3754 defined.
3755
3756 (3) is the weak case. We choose not to change memory state.
3757 (presumably the range is in some mixture of "defined" and
3758 "undefined", viz, accessible but with arbitrary V bits). Doing
3759 nothing means we retain the V bits, so that if the memory is
3760 later mprotected "other", the V bits remain unchanged, so there
3761 can be no false negatives. The bad effect is that if there's
3762 an access in the area, then MC cannot warn; but at least we'll
3763 get a SEGV to show, so it's better than nothing.
3764
3765 Consider the sequence (3) followed by (4). Any memory that was
3766 "defined" or "undefined" previously retains its state (as
3767 required). Any memory that was "noaccess" before can only have
3768 been made that way by (1), and so it's OK to change it to
3769 "defined".
3770
3771 See https://bugs.kde.org/show_bug.cgi?id=205541
3772 and https://bugs.kde.org/show_bug.cgi?id=210268
3773 */
3774 static
mc_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)3775 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
3776 ULong di_handle )
3777 {
3778 if (rr || ww || xx) {
3779 /* (2) mmap/mprotect other -> defined */
3780 MC_(make_mem_defined)(a, len);
3781 } else {
3782 /* (1) mmap/mprotect NONE -> noaccess */
3783 MC_(make_mem_noaccess)(a, len);
3784 }
3785 }
3786
3787 static
mc_new_mem_mprotect(Addr a,SizeT len,Bool rr,Bool ww,Bool xx)3788 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
3789 {
3790 if (rr || ww || xx) {
3791 /* (4) mprotect other -> change any "noaccess" to "defined" */
3792 make_mem_defined_if_noaccess(a, len);
3793 } else {
3794 /* (3) mprotect NONE -> # no change */
3795 /* do nothing */
3796 }
3797 }
3798
3799
3800 static
mc_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)3801 void mc_new_mem_startup( Addr a, SizeT len,
3802 Bool rr, Bool ww, Bool xx, ULong di_handle )
3803 {
3804 // Because code is defined, initialised variables get put in the data
3805 // segment and are defined, and uninitialised variables get put in the
3806 // bss segment and are auto-zeroed (and so defined).
3807 //
3808 // It's possible that there will be padding between global variables.
3809 // This will also be auto-zeroed, and marked as defined by Memcheck. If
3810 // a program uses it, Memcheck will not complain. This is arguably a
3811 // false negative, but it's a grey area -- the behaviour is defined (the
3812 // padding is zeroed) but it's probably not what the user intended. And
3813 // we can't avoid it.
3814 //
3815 // Note: we generally ignore RWX permissions, because we can't track them
3816 // without requiring more than one A bit which would slow things down a
3817 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
3818 // So we mark any such pages as "unaddressable".
3819 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
3820 a, (ULong)len, rr, ww, xx);
3821 mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
3822 }
3823
3824 static
mc_post_mem_write(CorePart part,ThreadId tid,Addr a,SizeT len)3825 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
3826 {
3827 MC_(make_mem_defined)(a, len);
3828 }
3829
3830
3831 /*------------------------------------------------------------*/
3832 /*--- Register event handlers ---*/
3833 /*------------------------------------------------------------*/
3834
3835 /* Try and get a nonzero origin for the guest state section of thread
3836 tid characterised by (offset,size). Return 0 if nothing to show
3837 for it. */
mb_get_origin_for_guest_offset(ThreadId tid,Int offset,SizeT size)3838 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
3839 Int offset, SizeT size )
3840 {
3841 Int sh2off;
3842 UChar area[6];
3843 UInt otag;
3844 sh2off = MC_(get_otrack_shadow_offset)( offset, size );
3845 if (sh2off == -1)
3846 return 0; /* This piece of guest state is not tracked */
3847 tl_assert(sh2off >= 0);
3848 tl_assert(0 == (sh2off % 4));
3849 area[0] = 0x31;
3850 area[5] = 0x27;
3851 VG_(get_shadow_regs_area)( tid, &area[1], 2/*shadowno*/,sh2off,4 );
3852 tl_assert(area[0] == 0x31);
3853 tl_assert(area[5] == 0x27);
3854 otag = *(UInt*)&area[1];
3855 return otag;
3856 }
3857
3858
3859 /* When some chunk of guest state is written, mark the corresponding
3860 shadow area as valid. This is used to initialise arbitrarily large
3861 chunks of guest state, hence the _SIZE value, which has to be as
3862 big as the biggest guest state.
3863 */
mc_post_reg_write(CorePart part,ThreadId tid,PtrdiffT offset,SizeT size)3864 static void mc_post_reg_write ( CorePart part, ThreadId tid,
3865 PtrdiffT offset, SizeT size)
3866 {
3867 # define MAX_REG_WRITE_SIZE 1408
3868 UChar area[MAX_REG_WRITE_SIZE];
3869 tl_assert(size <= MAX_REG_WRITE_SIZE);
3870 VG_(memset)(area, V_BITS8_DEFINED, size);
3871 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
3872 # undef MAX_REG_WRITE_SIZE
3873 }
3874
3875 static
mc_post_reg_write_clientcall(ThreadId tid,PtrdiffT offset,SizeT size,Addr f)3876 void mc_post_reg_write_clientcall ( ThreadId tid,
3877 PtrdiffT offset, SizeT size, Addr f)
3878 {
3879 mc_post_reg_write(/*dummy*/0, tid, offset, size);
3880 }
3881
3882 /* Look at the definedness of the guest's shadow state for
3883 [offset, offset+len). If any part of that is undefined, record
3884 a parameter error.
3885 */
mc_pre_reg_read(CorePart part,ThreadId tid,Char * s,PtrdiffT offset,SizeT size)3886 static void mc_pre_reg_read ( CorePart part, ThreadId tid, Char* s,
3887 PtrdiffT offset, SizeT size)
3888 {
3889 Int i;
3890 Bool bad;
3891 UInt otag;
3892
3893 UChar area[16];
3894 tl_assert(size <= 16);
3895
3896 VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
3897
3898 bad = False;
3899 for (i = 0; i < size; i++) {
3900 if (area[i] != V_BITS8_DEFINED) {
3901 bad = True;
3902 break;
3903 }
3904 }
3905
3906 if (!bad)
3907 return;
3908
3909 /* We've found some undefinedness. See if we can also find an
3910 origin for it. */
3911 otag = mb_get_origin_for_guest_offset( tid, offset, size );
3912 MC_(record_regparam_error) ( tid, s, otag );
3913 }
3914
3915
3916 /*------------------------------------------------------------*/
3917 /*--- Functions called directly from generated code: ---*/
3918 /*--- Load/store handlers. ---*/
3919 /*------------------------------------------------------------*/
3920
3921 /* Types: LOADV32, LOADV16, LOADV8 are:
3922 UWord fn ( Addr a )
3923 so they return 32-bits on 32-bit machines and 64-bits on
3924 64-bit machines. Addr has the same size as a host word.
3925
3926 LOADV64 is always ULong fn ( Addr a )
3927
3928 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
3929 are a UWord, and for STOREV64 they are a ULong.
3930 */
3931
3932 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
3933 naturally '_sz/8'-aligned, or it exceeds the range covered by the
3934 primary map. This is all very tricky (and important!), so let's
3935 work through the maths by hand (below), *and* assert for these
3936 values at startup. */
3937 #define MASK(_szInBytes) \
3938 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
3939
3940 /* MASK only exists so as to define this macro. */
3941 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
3942 ((_a) & MASK((_szInBits>>3)))
3943
3944 /* On a 32-bit machine:
3945
3946 N_PRIMARY_BITS == 16, so
3947 N_PRIMARY_MAP == 0x10000, so
3948 N_PRIMARY_MAP-1 == 0xFFFF, so
3949 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
3950
3951 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
3952 = ~ ( 0xFFFF | 0xFFFF0000 )
3953 = ~ 0xFFFF'FFFF
3954 = 0
3955
3956 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
3957 = ~ ( 0xFFFE | 0xFFFF0000 )
3958 = ~ 0xFFFF'FFFE
3959 = 1
3960
3961 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
3962 = ~ ( 0xFFFC | 0xFFFF0000 )
3963 = ~ 0xFFFF'FFFC
3964 = 3
3965
3966 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
3967 = ~ ( 0xFFF8 | 0xFFFF0000 )
3968 = ~ 0xFFFF'FFF8
3969 = 7
3970
3971 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
3972 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
3973 the 1-byte alignment case, it is always a zero value, since MASK(1)
3974 is zero. All as expected.
3975
3976 On a 64-bit machine, it's more complex, since we're testing
3977 simultaneously for misalignment and for the address being at or
3978 above 32G:
3979
3980 N_PRIMARY_BITS == 19, so
3981 N_PRIMARY_MAP == 0x80000, so
3982 N_PRIMARY_MAP-1 == 0x7FFFF, so
3983 (N_PRIMARY_MAP-1) << 16 == 0x7FFFF'0000, and so
3984
3985 MASK(1) = ~ ( (0x10000 - 1) | 0x7FFFF'0000 )
3986 = ~ ( 0xFFFF | 0x7FFFF'0000 )
3987 = ~ 0x7FFFF'FFFF
3988 = 0xFFFF'FFF8'0000'0000
3989
3990 MASK(2) = ~ ( (0x10000 - 2) | 0x7FFFF'0000 )
3991 = ~ ( 0xFFFE | 0x7FFFF'0000 )
3992 = ~ 0x7FFFF'FFFE
3993 = 0xFFFF'FFF8'0000'0001
3994
3995 MASK(4) = ~ ( (0x10000 - 4) | 0x7FFFF'0000 )
3996 = ~ ( 0xFFFC | 0x7FFFF'0000 )
3997 = ~ 0x7FFFF'FFFC
3998 = 0xFFFF'FFF8'0000'0003
3999
4000 MASK(8) = ~ ( (0x10000 - 8) | 0x7FFFF'0000 )
4001 = ~ ( 0xFFF8 | 0x7FFFF'0000 )
4002 = ~ 0x7FFFF'FFF8
4003 = 0xFFFF'FFF8'0000'0007
4004 */
4005
4006
4007 /* ------------------------ Size = 8 ------------------------ */
4008
4009 static INLINE
mc_LOADV64(Addr a,Bool isBigEndian)4010 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4011 {
4012 PROF_EVENT(200, "mc_LOADV64");
4013
4014 #ifndef PERF_FAST_LOADV
4015 return mc_LOADVn_slow( a, 64, isBigEndian );
4016 #else
4017 {
4018 UWord sm_off16, vabits16;
4019 SecMap* sm;
4020
4021 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4022 PROF_EVENT(201, "mc_LOADV64-slow1");
4023 return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4024 }
4025
4026 sm = get_secmap_for_reading_low(a);
4027 sm_off16 = SM_OFF_16(a);
4028 vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4029
4030 // Handle common case quickly: a is suitably aligned, is mapped, and
4031 // addressible.
4032 // Convert V bits from compact memory form to expanded register form.
4033 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4034 return V_BITS64_DEFINED;
4035 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4036 return V_BITS64_UNDEFINED;
4037 } else {
4038 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4039 PROF_EVENT(202, "mc_LOADV64-slow2");
4040 return mc_LOADVn_slow( a, 64, isBigEndian );
4041 }
4042 }
4043 #endif
4044 }
4045
MC_(helperc_LOADV64be)4046 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4047 {
4048 return mc_LOADV64(a, True);
4049 }
MC_(helperc_LOADV64le)4050 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4051 {
4052 return mc_LOADV64(a, False);
4053 }
4054
4055
4056 static INLINE
mc_STOREV64(Addr a,ULong vbits64,Bool isBigEndian)4057 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4058 {
4059 PROF_EVENT(210, "mc_STOREV64");
4060
4061 #ifndef PERF_FAST_STOREV
4062 // XXX: this slow case seems to be marginally faster than the fast case!
4063 // Investigate further.
4064 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4065 #else
4066 {
4067 UWord sm_off16, vabits16;
4068 SecMap* sm;
4069
4070 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4071 PROF_EVENT(211, "mc_STOREV64-slow1");
4072 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4073 return;
4074 }
4075
4076 sm = get_secmap_for_reading_low(a);
4077 sm_off16 = SM_OFF_16(a);
4078 vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4079
4080 if (LIKELY( !is_distinguished_sm(sm) &&
4081 (VA_BITS16_DEFINED == vabits16 ||
4082 VA_BITS16_UNDEFINED == vabits16) ))
4083 {
4084 /* Handle common case quickly: a is suitably aligned, */
4085 /* is mapped, and is addressible. */
4086 // Convert full V-bits in register to compact 2-bit form.
4087 if (V_BITS64_DEFINED == vbits64) {
4088 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
4089 } else if (V_BITS64_UNDEFINED == vbits64) {
4090 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
4091 } else {
4092 /* Slow but general case -- writing partially defined bytes. */
4093 PROF_EVENT(212, "mc_STOREV64-slow2");
4094 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4095 }
4096 } else {
4097 /* Slow but general case. */
4098 PROF_EVENT(213, "mc_STOREV64-slow3");
4099 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4100 }
4101 }
4102 #endif
4103 }
4104
MC_(helperc_STOREV64be)4105 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
4106 {
4107 mc_STOREV64(a, vbits64, True);
4108 }
MC_(helperc_STOREV64le)4109 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
4110 {
4111 mc_STOREV64(a, vbits64, False);
4112 }
4113
4114
4115 /* ------------------------ Size = 4 ------------------------ */
4116
4117 static INLINE
mc_LOADV32(Addr a,Bool isBigEndian)4118 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
4119 {
4120 PROF_EVENT(220, "mc_LOADV32");
4121
4122 #ifndef PERF_FAST_LOADV
4123 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4124 #else
4125 {
4126 UWord sm_off, vabits8;
4127 SecMap* sm;
4128
4129 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4130 PROF_EVENT(221, "mc_LOADV32-slow1");
4131 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4132 }
4133
4134 sm = get_secmap_for_reading_low(a);
4135 sm_off = SM_OFF(a);
4136 vabits8 = sm->vabits8[sm_off];
4137
4138 // Handle common case quickly: a is suitably aligned, is mapped, and the
4139 // entire word32 it lives in is addressible.
4140 // Convert V bits from compact memory form to expanded register form.
4141 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
4142 // Almost certainly not necessary, but be paranoid.
4143 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4144 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
4145 } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
4146 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
4147 } else {
4148 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
4149 PROF_EVENT(222, "mc_LOADV32-slow2");
4150 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4151 }
4152 }
4153 #endif
4154 }
4155
MC_(helperc_LOADV32be)4156 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
4157 {
4158 return mc_LOADV32(a, True);
4159 }
MC_(helperc_LOADV32le)4160 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
4161 {
4162 return mc_LOADV32(a, False);
4163 }
4164
4165
4166 static INLINE
mc_STOREV32(Addr a,UWord vbits32,Bool isBigEndian)4167 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
4168 {
4169 PROF_EVENT(230, "mc_STOREV32");
4170
4171 #ifndef PERF_FAST_STOREV
4172 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4173 #else
4174 {
4175 UWord sm_off, vabits8;
4176 SecMap* sm;
4177
4178 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4179 PROF_EVENT(231, "mc_STOREV32-slow1");
4180 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4181 return;
4182 }
4183
4184 sm = get_secmap_for_reading_low(a);
4185 sm_off = SM_OFF(a);
4186 vabits8 = sm->vabits8[sm_off];
4187
4188 // Cleverness: sometimes we don't have to write the shadow memory at
4189 // all, if we can tell that what we want to write is the same as what is
4190 // already there. The 64/16/8 bit cases also have cleverness at this
4191 // point, but it works a little differently to the code below.
4192 if (V_BITS32_DEFINED == vbits32) {
4193 if (vabits8 == (UInt)VA_BITS8_DEFINED) {
4194 return;
4195 } else if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
4196 sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
4197 } else {
4198 // not defined/undefined, or distinguished and changing state
4199 PROF_EVENT(232, "mc_STOREV32-slow2");
4200 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4201 }
4202 } else if (V_BITS32_UNDEFINED == vbits32) {
4203 if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
4204 return;
4205 } else if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
4206 sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
4207 } else {
4208 // not defined/undefined, or distinguished and changing state
4209 PROF_EVENT(233, "mc_STOREV32-slow3");
4210 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4211 }
4212 } else {
4213 // Partially defined word
4214 PROF_EVENT(234, "mc_STOREV32-slow4");
4215 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4216 }
4217 }
4218 #endif
4219 }
4220
MC_(helperc_STOREV32be)4221 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
4222 {
4223 mc_STOREV32(a, vbits32, True);
4224 }
MC_(helperc_STOREV32le)4225 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
4226 {
4227 mc_STOREV32(a, vbits32, False);
4228 }
4229
4230
4231 /* ------------------------ Size = 2 ------------------------ */
4232
4233 static INLINE
mc_LOADV16(Addr a,Bool isBigEndian)4234 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
4235 {
4236 PROF_EVENT(240, "mc_LOADV16");
4237
4238 #ifndef PERF_FAST_LOADV
4239 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4240 #else
4241 {
4242 UWord sm_off, vabits8;
4243 SecMap* sm;
4244
4245 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4246 PROF_EVENT(241, "mc_LOADV16-slow1");
4247 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4248 }
4249
4250 sm = get_secmap_for_reading_low(a);
4251 sm_off = SM_OFF(a);
4252 vabits8 = sm->vabits8[sm_off];
4253 // Handle common case quickly: a is suitably aligned, is mapped, and is
4254 // addressible.
4255 // Convert V bits from compact memory form to expanded register form
4256 if (vabits8 == VA_BITS8_DEFINED ) { return V_BITS16_DEFINED; }
4257 else if (vabits8 == VA_BITS8_UNDEFINED) { return V_BITS16_UNDEFINED; }
4258 else {
4259 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4260 // the two sub-bytes.
4261 UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
4262 if (vabits4 == VA_BITS4_DEFINED ) { return V_BITS16_DEFINED; }
4263 else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
4264 else {
4265 /* Slow case: the two bytes are not all-defined or all-undefined. */
4266 PROF_EVENT(242, "mc_LOADV16-slow2");
4267 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4268 }
4269 }
4270 }
4271 #endif
4272 }
4273
MC_(helperc_LOADV16be)4274 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
4275 {
4276 return mc_LOADV16(a, True);
4277 }
MC_(helperc_LOADV16le)4278 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
4279 {
4280 return mc_LOADV16(a, False);
4281 }
4282
4283
4284 static INLINE
mc_STOREV16(Addr a,UWord vbits16,Bool isBigEndian)4285 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
4286 {
4287 PROF_EVENT(250, "mc_STOREV16");
4288
4289 #ifndef PERF_FAST_STOREV
4290 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4291 #else
4292 {
4293 UWord sm_off, vabits8;
4294 SecMap* sm;
4295
4296 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4297 PROF_EVENT(251, "mc_STOREV16-slow1");
4298 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4299 return;
4300 }
4301
4302 sm = get_secmap_for_reading_low(a);
4303 sm_off = SM_OFF(a);
4304 vabits8 = sm->vabits8[sm_off];
4305 if (LIKELY( !is_distinguished_sm(sm) &&
4306 (VA_BITS8_DEFINED == vabits8 ||
4307 VA_BITS8_UNDEFINED == vabits8) ))
4308 {
4309 /* Handle common case quickly: a is suitably aligned, */
4310 /* is mapped, and is addressible. */
4311 // Convert full V-bits in register to compact 2-bit form.
4312 if (V_BITS16_DEFINED == vbits16) {
4313 insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED ,
4314 &(sm->vabits8[sm_off]) );
4315 } else if (V_BITS16_UNDEFINED == vbits16) {
4316 insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
4317 &(sm->vabits8[sm_off]) );
4318 } else {
4319 /* Slow but general case -- writing partially defined bytes. */
4320 PROF_EVENT(252, "mc_STOREV16-slow2");
4321 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4322 }
4323 } else {
4324 /* Slow but general case. */
4325 PROF_EVENT(253, "mc_STOREV16-slow3");
4326 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4327 }
4328 }
4329 #endif
4330 }
4331
MC_(helperc_STOREV16be)4332 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
4333 {
4334 mc_STOREV16(a, vbits16, True);
4335 }
MC_(helperc_STOREV16le)4336 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
4337 {
4338 mc_STOREV16(a, vbits16, False);
4339 }
4340
4341
4342 /* ------------------------ Size = 1 ------------------------ */
4343 /* Note: endianness is irrelevant for size == 1 */
4344
4345 VG_REGPARM(1)
MC_(helperc_LOADV8)4346 UWord MC_(helperc_LOADV8) ( Addr a )
4347 {
4348 PROF_EVENT(260, "mc_LOADV8");
4349
4350 #ifndef PERF_FAST_LOADV
4351 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4352 #else
4353 {
4354 UWord sm_off, vabits8;
4355 SecMap* sm;
4356
4357 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4358 PROF_EVENT(261, "mc_LOADV8-slow1");
4359 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4360 }
4361
4362 sm = get_secmap_for_reading_low(a);
4363 sm_off = SM_OFF(a);
4364 vabits8 = sm->vabits8[sm_off];
4365 // Convert V bits from compact memory form to expanded register form
4366 // Handle common case quickly: a is mapped, and the entire
4367 // word32 it lives in is addressible.
4368 if (vabits8 == VA_BITS8_DEFINED ) { return V_BITS8_DEFINED; }
4369 else if (vabits8 == VA_BITS8_UNDEFINED) { return V_BITS8_UNDEFINED; }
4370 else {
4371 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4372 // the single byte.
4373 UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
4374 if (vabits2 == VA_BITS2_DEFINED ) { return V_BITS8_DEFINED; }
4375 else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
4376 else {
4377 /* Slow case: the byte is not all-defined or all-undefined. */
4378 PROF_EVENT(262, "mc_LOADV8-slow2");
4379 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4380 }
4381 }
4382 }
4383 #endif
4384 }
4385
4386
4387 VG_REGPARM(2)
MC_(helperc_STOREV8)4388 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
4389 {
4390 PROF_EVENT(270, "mc_STOREV8");
4391
4392 #ifndef PERF_FAST_STOREV
4393 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4394 #else
4395 {
4396 UWord sm_off, vabits8;
4397 SecMap* sm;
4398
4399 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4400 PROF_EVENT(271, "mc_STOREV8-slow1");
4401 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4402 return;
4403 }
4404
4405 sm = get_secmap_for_reading_low(a);
4406 sm_off = SM_OFF(a);
4407 vabits8 = sm->vabits8[sm_off];
4408 if (LIKELY
4409 ( !is_distinguished_sm(sm) &&
4410 ( (VA_BITS8_DEFINED == vabits8 || VA_BITS8_UNDEFINED == vabits8)
4411 || (VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8))
4412 )
4413 )
4414 )
4415 {
4416 /* Handle common case quickly: a is mapped, the entire word32 it
4417 lives in is addressible. */
4418 // Convert full V-bits in register to compact 2-bit form.
4419 if (V_BITS8_DEFINED == vbits8) {
4420 insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
4421 &(sm->vabits8[sm_off]) );
4422 } else if (V_BITS8_UNDEFINED == vbits8) {
4423 insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
4424 &(sm->vabits8[sm_off]) );
4425 } else {
4426 /* Slow but general case -- writing partially defined bytes. */
4427 PROF_EVENT(272, "mc_STOREV8-slow2");
4428 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4429 }
4430 } else {
4431 /* Slow but general case. */
4432 PROF_EVENT(273, "mc_STOREV8-slow3");
4433 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4434 }
4435 }
4436 #endif
4437 }
4438
4439
4440 /*------------------------------------------------------------*/
4441 /*--- Functions called directly from generated code: ---*/
4442 /*--- Value-check failure handlers. ---*/
4443 /*------------------------------------------------------------*/
4444
4445 /* Call these ones when an origin is available ... */
4446 VG_REGPARM(1)
MC_(helperc_value_check0_fail_w_o)4447 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
4448 MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
4449 }
4450
4451 VG_REGPARM(1)
MC_(helperc_value_check1_fail_w_o)4452 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
4453 MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
4454 }
4455
4456 VG_REGPARM(1)
MC_(helperc_value_check4_fail_w_o)4457 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
4458 MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
4459 }
4460
4461 VG_REGPARM(1)
MC_(helperc_value_check8_fail_w_o)4462 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
4463 MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
4464 }
4465
4466 VG_REGPARM(2)
MC_(helperc_value_checkN_fail_w_o)4467 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
4468 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
4469 }
4470
4471 /* ... and these when an origin isn't available. */
4472
4473 VG_REGPARM(0)
MC_(helperc_value_check0_fail_no_o)4474 void MC_(helperc_value_check0_fail_no_o) ( void ) {
4475 MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
4476 }
4477
4478 VG_REGPARM(0)
MC_(helperc_value_check1_fail_no_o)4479 void MC_(helperc_value_check1_fail_no_o) ( void ) {
4480 MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
4481 }
4482
4483 VG_REGPARM(0)
MC_(helperc_value_check4_fail_no_o)4484 void MC_(helperc_value_check4_fail_no_o) ( void ) {
4485 MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
4486 }
4487
4488 VG_REGPARM(0)
MC_(helperc_value_check8_fail_no_o)4489 void MC_(helperc_value_check8_fail_no_o) ( void ) {
4490 MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
4491 }
4492
4493 VG_REGPARM(1)
MC_(helperc_value_checkN_fail_no_o)4494 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
4495 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
4496 }
4497
4498
4499 /*------------------------------------------------------------*/
4500 /*--- Metadata get/set functions, for client requests. ---*/
4501 /*------------------------------------------------------------*/
4502
4503 // Nb: this expands the V+A bits out into register-form V bits, even though
4504 // they're in memory. This is for backward compatibility, and because it's
4505 // probably what the user wants.
4506
4507 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
4508 error [no longer used], 3 == addressing error. */
4509 /* Nb: We used to issue various definedness/addressability errors from here,
4510 but we took them out because they ranged from not-very-helpful to
4511 downright annoying, and they complicated the error data structures. */
mc_get_or_set_vbits_for_client(Addr a,Addr vbits,SizeT szB,Bool setting)4512 static Int mc_get_or_set_vbits_for_client (
4513 Addr a,
4514 Addr vbits,
4515 SizeT szB,
4516 Bool setting /* True <=> set vbits, False <=> get vbits */
4517 )
4518 {
4519 SizeT i;
4520 Bool ok;
4521 UChar vbits8;
4522
4523 /* Check that arrays are addressible before doing any getting/setting. */
4524 for (i = 0; i < szB; i++) {
4525 if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
4526 VA_BITS2_NOACCESS == get_vabits2(vbits + i)) {
4527 return 3;
4528 }
4529 }
4530
4531 /* Do the copy */
4532 if (setting) {
4533 /* setting */
4534 for (i = 0; i < szB; i++) {
4535 ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
4536 tl_assert(ok);
4537 }
4538 } else {
4539 /* getting */
4540 for (i = 0; i < szB; i++) {
4541 ok = get_vbits8(a + i, &vbits8);
4542 tl_assert(ok);
4543 ((UChar*)vbits)[i] = vbits8;
4544 }
4545 // The bytes in vbits[] have now been set, so mark them as such.
4546 MC_(make_mem_defined)(vbits, szB);
4547 }
4548
4549 return 1;
4550 }
4551
4552
4553 /*------------------------------------------------------------*/
4554 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
4555 /*------------------------------------------------------------*/
4556
4557 /* For the memory leak detector, say whether an entire 64k chunk of
4558 address space is possibly in use, or not. If in doubt return
4559 True.
4560 */
MC_(is_within_valid_secondary)4561 Bool MC_(is_within_valid_secondary) ( Addr a )
4562 {
4563 SecMap* sm = maybe_get_secmap_for ( a );
4564 if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]
4565 || MC_(in_ignored_range)(a)) {
4566 /* Definitely not in use. */
4567 return False;
4568 } else {
4569 return True;
4570 }
4571 }
4572
4573
4574 /* For the memory leak detector, say whether or not a given word
4575 address is to be regarded as valid. */
MC_(is_valid_aligned_word)4576 Bool MC_(is_valid_aligned_word) ( Addr a )
4577 {
4578 tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
4579 tl_assert(VG_IS_WORD_ALIGNED(a));
4580 if (is_mem_defined( a, sizeof(UWord), NULL, NULL) == MC_Ok
4581 && !MC_(in_ignored_range)(a)) {
4582 return True;
4583 } else {
4584 return False;
4585 }
4586 }
4587
4588
4589 /*------------------------------------------------------------*/
4590 /*--- Initialisation ---*/
4591 /*------------------------------------------------------------*/
4592
init_shadow_memory(void)4593 static void init_shadow_memory ( void )
4594 {
4595 Int i;
4596 SecMap* sm;
4597
4598 tl_assert(V_BIT_UNDEFINED == 1);
4599 tl_assert(V_BIT_DEFINED == 0);
4600 tl_assert(V_BITS8_UNDEFINED == 0xFF);
4601 tl_assert(V_BITS8_DEFINED == 0);
4602
4603 /* Build the 3 distinguished secondaries */
4604 sm = &sm_distinguished[SM_DIST_NOACCESS];
4605 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
4606
4607 sm = &sm_distinguished[SM_DIST_UNDEFINED];
4608 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
4609
4610 sm = &sm_distinguished[SM_DIST_DEFINED];
4611 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
4612
4613 /* Set up the primary map. */
4614 /* These entries gradually get overwritten as the used address
4615 space expands. */
4616 for (i = 0; i < N_PRIMARY_MAP; i++)
4617 primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
4618
4619 /* Auxiliary primary maps */
4620 init_auxmap_L1_L2();
4621
4622 /* auxmap_size = auxmap_used = 0;
4623 no ... these are statically initialised */
4624
4625 /* Secondary V bit table */
4626 secVBitTable = createSecVBitTable();
4627 }
4628
4629
4630 /*------------------------------------------------------------*/
4631 /*--- Sanity check machinery (permanently engaged) ---*/
4632 /*------------------------------------------------------------*/
4633
mc_cheap_sanity_check(void)4634 static Bool mc_cheap_sanity_check ( void )
4635 {
4636 n_sanity_cheap++;
4637 PROF_EVENT(490, "cheap_sanity_check");
4638 /* Check for sane operating level */
4639 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
4640 return False;
4641 /* nothing else useful we can rapidly check */
4642 return True;
4643 }
4644
mc_expensive_sanity_check(void)4645 static Bool mc_expensive_sanity_check ( void )
4646 {
4647 Int i;
4648 Word n_secmaps_found;
4649 SecMap* sm;
4650 HChar* errmsg;
4651 Bool bad = False;
4652
4653 if (0) VG_(printf)("expensive sanity check\n");
4654 if (0) return True;
4655
4656 n_sanity_expensive++;
4657 PROF_EVENT(491, "expensive_sanity_check");
4658
4659 /* Check for sane operating level */
4660 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
4661 return False;
4662
4663 /* Check that the 3 distinguished SMs are still as they should be. */
4664
4665 /* Check noaccess DSM. */
4666 sm = &sm_distinguished[SM_DIST_NOACCESS];
4667 for (i = 0; i < SM_CHUNKS; i++)
4668 if (sm->vabits8[i] != VA_BITS8_NOACCESS)
4669 bad = True;
4670
4671 /* Check undefined DSM. */
4672 sm = &sm_distinguished[SM_DIST_UNDEFINED];
4673 for (i = 0; i < SM_CHUNKS; i++)
4674 if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
4675 bad = True;
4676
4677 /* Check defined DSM. */
4678 sm = &sm_distinguished[SM_DIST_DEFINED];
4679 for (i = 0; i < SM_CHUNKS; i++)
4680 if (sm->vabits8[i] != VA_BITS8_DEFINED)
4681 bad = True;
4682
4683 if (bad) {
4684 VG_(printf)("memcheck expensive sanity: "
4685 "distinguished_secondaries have changed\n");
4686 return False;
4687 }
4688
4689 /* If we're not checking for undefined value errors, the secondary V bit
4690 * table should be empty. */
4691 if (MC_(clo_mc_level) == 1) {
4692 if (0 != VG_(OSetGen_Size)(secVBitTable))
4693 return False;
4694 }
4695
4696 /* check the auxiliary maps, very thoroughly */
4697 n_secmaps_found = 0;
4698 errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
4699 if (errmsg) {
4700 VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
4701 return False;
4702 }
4703
4704 /* n_secmaps_found is now the number referred to by the auxiliary
4705 primary map. Now add on the ones referred to by the main
4706 primary map. */
4707 for (i = 0; i < N_PRIMARY_MAP; i++) {
4708 if (primary_map[i] == NULL) {
4709 bad = True;
4710 } else {
4711 if (!is_distinguished_sm(primary_map[i]))
4712 n_secmaps_found++;
4713 }
4714 }
4715
4716 /* check that the number of secmaps issued matches the number that
4717 are reachable (iow, no secmap leaks) */
4718 if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
4719 bad = True;
4720
4721 if (bad) {
4722 VG_(printf)("memcheck expensive sanity: "
4723 "apparent secmap leakage\n");
4724 return False;
4725 }
4726
4727 if (bad) {
4728 VG_(printf)("memcheck expensive sanity: "
4729 "auxmap covers wrong address space\n");
4730 return False;
4731 }
4732
4733 /* there is only one pointer to each secmap (expensive) */
4734
4735 return True;
4736 }
4737
4738 /*------------------------------------------------------------*/
4739 /*--- Command line args ---*/
4740 /*------------------------------------------------------------*/
4741
4742 Bool MC_(clo_partial_loads_ok) = False;
4743 Long MC_(clo_freelist_vol) = 20*1000*1000LL;
4744 LeakCheckMode MC_(clo_leak_check) = LC_Summary;
4745 VgRes MC_(clo_leak_resolution) = Vg_HighRes;
4746 Bool MC_(clo_show_reachable) = False;
4747 Bool MC_(clo_show_possibly_lost) = True;
4748 Bool MC_(clo_workaround_gcc296_bugs) = False;
4749 Int MC_(clo_malloc_fill) = -1;
4750 Int MC_(clo_free_fill) = -1;
4751 Int MC_(clo_mc_level) = 2;
4752 const char* MC_(clo_summary_file) = NULL;
4753
4754
mc_process_cmd_line_options(Char * arg)4755 static Bool mc_process_cmd_line_options(Char* arg)
4756 {
4757 Char* tmp_str;
4758
4759 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
4760
4761 /* Set MC_(clo_mc_level):
4762 1 = A bit tracking only
4763 2 = A and V bit tracking, but no V bit origins
4764 3 = A and V bit tracking, and V bit origins
4765
4766 Do this by inspecting --undef-value-errors= and
4767 --track-origins=. Reject the case --undef-value-errors=no
4768 --track-origins=yes as meaningless.
4769 */
4770 if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
4771 if (MC_(clo_mc_level) == 3) {
4772 goto bad_level;
4773 } else {
4774 MC_(clo_mc_level) = 1;
4775 return True;
4776 }
4777 }
4778 if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
4779 if (MC_(clo_mc_level) == 1)
4780 MC_(clo_mc_level) = 2;
4781 return True;
4782 }
4783 if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
4784 if (MC_(clo_mc_level) == 3)
4785 MC_(clo_mc_level) = 2;
4786 return True;
4787 }
4788 if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
4789 if (MC_(clo_mc_level) == 1) {
4790 goto bad_level;
4791 } else {
4792 MC_(clo_mc_level) = 3;
4793 return True;
4794 }
4795 }
4796
4797 if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
4798 else if VG_BOOL_CLO(arg, "--show-reachable", MC_(clo_show_reachable)) {}
4799 else if VG_BOOL_CLO(arg, "--show-possibly-lost",
4800 MC_(clo_show_possibly_lost)) {}
4801 else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
4802 MC_(clo_workaround_gcc296_bugs)) {}
4803
4804 else if VG_BINT_CLO(arg, "--freelist-vol", MC_(clo_freelist_vol),
4805 0, 10*1000*1000*1000LL) {}
4806
4807 else if VG_XACT_CLO(arg, "--leak-check=no",
4808 MC_(clo_leak_check), LC_Off) {}
4809 else if VG_XACT_CLO(arg, "--leak-check=summary",
4810 MC_(clo_leak_check), LC_Summary) {}
4811 else if VG_XACT_CLO(arg, "--leak-check=yes",
4812 MC_(clo_leak_check), LC_Full) {}
4813 else if VG_XACT_CLO(arg, "--leak-check=full",
4814 MC_(clo_leak_check), LC_Full) {}
4815
4816 else if VG_XACT_CLO(arg, "--leak-resolution=low",
4817 MC_(clo_leak_resolution), Vg_LowRes) {}
4818 else if VG_XACT_CLO(arg, "--leak-resolution=med",
4819 MC_(clo_leak_resolution), Vg_MedRes) {}
4820 else if VG_XACT_CLO(arg, "--leak-resolution=high",
4821 MC_(clo_leak_resolution), Vg_HighRes) {}
4822
4823 else if VG_STR_CLO(arg, "--summary-file", tmp_str) {
4824 MC_(clo_summary_file) = VG_(strdup)("clo_summary_file", tmp_str);
4825 }
4826 else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
4827 Int i;
4828 Bool ok = parse_ignore_ranges(tmp_str);
4829 if (!ok)
4830 return False;
4831 tl_assert(ignoreRanges.used >= 0);
4832 tl_assert(ignoreRanges.used < M_IGNORE_RANGES);
4833 for (i = 0; i < ignoreRanges.used; i++) {
4834 Addr s = ignoreRanges.start[i];
4835 Addr e = ignoreRanges.end[i];
4836 Addr limit = 0x4000000; /* 64M - entirely arbitrary limit */
4837 if (e <= s) {
4838 VG_(message)(Vg_DebugMsg,
4839 "ERROR: --ignore-ranges: end <= start in range:\n");
4840 VG_(message)(Vg_DebugMsg,
4841 " 0x%lx-0x%lx\n", s, e);
4842 return False;
4843 }
4844 if (e - s > limit) {
4845 VG_(message)(Vg_DebugMsg,
4846 "ERROR: --ignore-ranges: suspiciously large range:\n");
4847 VG_(message)(Vg_DebugMsg,
4848 " 0x%lx-0x%lx (size %ld)\n", s, e, (UWord)(e-s));
4849 return False;
4850 }
4851 }
4852 }
4853
4854 else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
4855 else if VG_BHEX_CLO(arg, "--free-fill", MC_(clo_free_fill), 0x00,0xFF) {}
4856
4857 else
4858 return VG_(replacement_malloc_process_cmd_line_option)(arg);
4859
4860 return True;
4861
4862
4863 bad_level:
4864 VG_(fmsg_bad_option)(arg,
4865 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
4866 }
4867
mc_print_usage(void)4868 static void mc_print_usage(void)
4869 {
4870 VG_(printf)(
4871 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
4872 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
4873 " --show-reachable=no|yes show reachable blocks in leak check? [no]\n"
4874 " --show-possibly-lost=no|yes show possibly lost blocks in leak check?\n"
4875 " [yes]\n"
4876 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
4877 " --track-origins=no|yes show origins of undefined values? [no]\n"
4878 " --partial-loads-ok=no|yes too hard to explain here; see manual [no]\n"
4879 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
4880 " --workaround-gcc296-bugs=no|yes self explanatory [no]\n"
4881 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
4882 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
4883 " --free-fill=<hexnumber> fill free'd areas with given value\n"
4884 );
4885 }
4886
mc_print_debug_usage(void)4887 static void mc_print_debug_usage(void)
4888 {
4889 VG_(printf)(
4890 " (none)\n"
4891 );
4892 }
4893
4894
4895 /*------------------------------------------------------------*/
4896 /*--- Client blocks ---*/
4897 /*------------------------------------------------------------*/
4898
4899 /* Client block management:
4900
4901 This is managed as an expanding array of client block descriptors.
4902 Indices of live descriptors are issued to the client, so it can ask
4903 to free them later. Therefore we cannot slide live entries down
4904 over dead ones. Instead we must use free/inuse flags and scan for
4905 an empty slot at allocation time. This in turn means allocation is
4906 relatively expensive, so we hope this does not happen too often.
4907
4908 An unused block has start == size == 0
4909 */
4910
4911 /* type CGenBlock is defined in mc_include.h */
4912
4913 /* This subsystem is self-initialising. */
4914 static UWord cgb_size = 0;
4915 static UWord cgb_used = 0;
4916 static CGenBlock* cgbs = NULL;
4917
4918 /* Stats for this subsystem. */
4919 static ULong cgb_used_MAX = 0; /* Max in use. */
4920 static ULong cgb_allocs = 0; /* Number of allocs. */
4921 static ULong cgb_discards = 0; /* Number of discards. */
4922 static ULong cgb_search = 0; /* Number of searches. */
4923
4924
4925 /* Get access to the client block array. */
MC_(get_ClientBlock_array)4926 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
4927 /*OUT*/UWord* nBlocks )
4928 {
4929 *blocks = cgbs;
4930 *nBlocks = cgb_used;
4931 }
4932
4933
4934 static
alloc_client_block(void)4935 Int alloc_client_block ( void )
4936 {
4937 UWord i, sz_new;
4938 CGenBlock* cgbs_new;
4939
4940 cgb_allocs++;
4941
4942 for (i = 0; i < cgb_used; i++) {
4943 cgb_search++;
4944 if (cgbs[i].start == 0 && cgbs[i].size == 0)
4945 return i;
4946 }
4947
4948 /* Not found. Try to allocate one at the end. */
4949 if (cgb_used < cgb_size) {
4950 cgb_used++;
4951 return cgb_used-1;
4952 }
4953
4954 /* Ok, we have to allocate a new one. */
4955 tl_assert(cgb_used == cgb_size);
4956 sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
4957
4958 cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
4959 for (i = 0; i < cgb_used; i++)
4960 cgbs_new[i] = cgbs[i];
4961
4962 if (cgbs != NULL)
4963 VG_(free)( cgbs );
4964 cgbs = cgbs_new;
4965
4966 cgb_size = sz_new;
4967 cgb_used++;
4968 if (cgb_used > cgb_used_MAX)
4969 cgb_used_MAX = cgb_used;
4970 return cgb_used-1;
4971 }
4972
4973
show_client_block_stats(void)4974 static void show_client_block_stats ( void )
4975 {
4976 VG_(message)(Vg_DebugMsg,
4977 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
4978 cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
4979 );
4980 }
4981
4982
4983 /*------------------------------------------------------------*/
4984 /*--- Client requests ---*/
4985 /*------------------------------------------------------------*/
4986
mc_handle_client_request(ThreadId tid,UWord * arg,UWord * ret)4987 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
4988 {
4989 Int i;
4990 Bool ok;
4991 Addr bad_addr;
4992
4993 if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
4994 && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
4995 && VG_USERREQ__FREELIKE_BLOCK != arg[0]
4996 && VG_USERREQ__CREATE_MEMPOOL != arg[0]
4997 && VG_USERREQ__DESTROY_MEMPOOL != arg[0]
4998 && VG_USERREQ__MEMPOOL_ALLOC != arg[0]
4999 && VG_USERREQ__MEMPOOL_FREE != arg[0]
5000 && VG_USERREQ__MEMPOOL_TRIM != arg[0]
5001 && VG_USERREQ__MOVE_MEMPOOL != arg[0]
5002 && VG_USERREQ__MEMPOOL_CHANGE != arg[0]
5003 && VG_USERREQ__MEMPOOL_EXISTS != arg[0])
5004 return False;
5005
5006 switch (arg[0]) {
5007 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE:
5008 ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
5009 if (!ok)
5010 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
5011 *ret = ok ? (UWord)NULL : bad_addr;
5012 break;
5013
5014 case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
5015 MC_ReadResult res;
5016 UInt otag = 0;
5017 res = is_mem_defined ( arg[1], arg[2], &bad_addr, &otag );
5018 if (MC_AddrErr == res)
5019 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
5020 else if (MC_ValueErr == res)
5021 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/False, otag );
5022 *ret = ( res==MC_Ok ? (UWord)NULL : bad_addr );
5023 break;
5024 }
5025
5026 case VG_USERREQ__DO_LEAK_CHECK:
5027 MC_(detect_memory_leaks)(tid, arg[1] ? LC_Summary : LC_Full);
5028 *ret = 0; /* return value is meaningless */
5029 break;
5030
5031 case VG_USERREQ__MAKE_MEM_NOACCESS:
5032 MC_(make_mem_noaccess) ( arg[1], arg[2] );
5033 *ret = -1;
5034 break;
5035
5036 case VG_USERREQ__MAKE_MEM_UNDEFINED:
5037 make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
5038 MC_OKIND_USER );
5039 *ret = -1;
5040 break;
5041
5042 case VG_USERREQ__MAKE_MEM_DEFINED:
5043 MC_(make_mem_defined) ( arg[1], arg[2] );
5044 *ret = -1;
5045 break;
5046
5047 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
5048 make_mem_defined_if_addressable ( arg[1], arg[2] );
5049 *ret = -1;
5050 break;
5051
5052 case VG_USERREQ__CREATE_BLOCK: /* describe a block */
5053 if (arg[1] != 0 && arg[2] != 0) {
5054 i = alloc_client_block();
5055 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
5056 cgbs[i].start = arg[1];
5057 cgbs[i].size = arg[2];
5058 cgbs[i].desc = VG_(strdup)("mc.mhcr.1", (Char *)arg[3]);
5059 cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
5060 *ret = i;
5061 } else
5062 *ret = -1;
5063 break;
5064
5065 case VG_USERREQ__DISCARD: /* discard */
5066 if (cgbs == NULL
5067 || arg[2] >= cgb_used ||
5068 (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
5069 *ret = 1;
5070 } else {
5071 tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
5072 cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
5073 VG_(free)(cgbs[arg[2]].desc);
5074 cgb_discards++;
5075 *ret = 0;
5076 }
5077 break;
5078
5079 case VG_USERREQ__GET_VBITS:
5080 *ret = mc_get_or_set_vbits_for_client
5081 ( arg[1], arg[2], arg[3], False /* get them */ );
5082 break;
5083
5084 case VG_USERREQ__SET_VBITS:
5085 *ret = mc_get_or_set_vbits_for_client
5086 ( arg[1], arg[2], arg[3], True /* set them */ );
5087 break;
5088
5089 case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
5090 UWord** argp = (UWord**)arg;
5091 // MC_(bytes_leaked) et al were set by the last leak check (or zero
5092 // if no prior leak checks performed).
5093 *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
5094 *argp[2] = MC_(bytes_dubious);
5095 *argp[3] = MC_(bytes_reachable);
5096 *argp[4] = MC_(bytes_suppressed);
5097 // there is no argp[5]
5098 //*argp[5] = MC_(bytes_indirect);
5099 // XXX need to make *argp[1-4] defined; currently done in the
5100 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
5101 *ret = 0;
5102 return True;
5103 }
5104 case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
5105 UWord** argp = (UWord**)arg;
5106 // MC_(blocks_leaked) et al were set by the last leak check (or zero
5107 // if no prior leak checks performed).
5108 *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
5109 *argp[2] = MC_(blocks_dubious);
5110 *argp[3] = MC_(blocks_reachable);
5111 *argp[4] = MC_(blocks_suppressed);
5112 // there is no argp[5]
5113 //*argp[5] = MC_(blocks_indirect);
5114 // XXX need to make *argp[1-4] defined; currently done in the
5115 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
5116 *ret = 0;
5117 return True;
5118 }
5119 case VG_USERREQ__MALLOCLIKE_BLOCK: {
5120 Addr p = (Addr)arg[1];
5121 SizeT sizeB = arg[2];
5122 //UInt rzB = arg[3]; XXX: unused!
5123 Bool is_zeroed = (Bool)arg[4];
5124
5125 MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
5126 MC_AllocCustom, MC_(malloc_list) );
5127 return True;
5128 }
5129 case VG_USERREQ__FREELIKE_BLOCK: {
5130 Addr p = (Addr)arg[1];
5131 UInt rzB = arg[2];
5132
5133 MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
5134 return True;
5135 }
5136
5137 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
5138 Char* s = (Char*)arg[1];
5139 Addr dst = (Addr) arg[2];
5140 Addr src = (Addr) arg[3];
5141 SizeT len = (SizeT)arg[4];
5142 MC_(record_overlap_error)(tid, s, src, dst, len);
5143 return True;
5144 }
5145
5146 case VG_USERREQ__CREATE_MEMPOOL: {
5147 Addr pool = (Addr)arg[1];
5148 UInt rzB = arg[2];
5149 Bool is_zeroed = (Bool)arg[3];
5150
5151 MC_(create_mempool) ( pool, rzB, is_zeroed );
5152 return True;
5153 }
5154
5155 case VG_USERREQ__DESTROY_MEMPOOL: {
5156 Addr pool = (Addr)arg[1];
5157
5158 MC_(destroy_mempool) ( pool );
5159 return True;
5160 }
5161
5162 case VG_USERREQ__MEMPOOL_ALLOC: {
5163 Addr pool = (Addr)arg[1];
5164 Addr addr = (Addr)arg[2];
5165 UInt size = arg[3];
5166
5167 MC_(mempool_alloc) ( tid, pool, addr, size );
5168 return True;
5169 }
5170
5171 case VG_USERREQ__MEMPOOL_FREE: {
5172 Addr pool = (Addr)arg[1];
5173 Addr addr = (Addr)arg[2];
5174
5175 MC_(mempool_free) ( pool, addr );
5176 return True;
5177 }
5178
5179 case VG_USERREQ__MEMPOOL_TRIM: {
5180 Addr pool = (Addr)arg[1];
5181 Addr addr = (Addr)arg[2];
5182 UInt size = arg[3];
5183
5184 MC_(mempool_trim) ( pool, addr, size );
5185 return True;
5186 }
5187
5188 case VG_USERREQ__MOVE_MEMPOOL: {
5189 Addr poolA = (Addr)arg[1];
5190 Addr poolB = (Addr)arg[2];
5191
5192 MC_(move_mempool) ( poolA, poolB );
5193 return True;
5194 }
5195
5196 case VG_USERREQ__MEMPOOL_CHANGE: {
5197 Addr pool = (Addr)arg[1];
5198 Addr addrA = (Addr)arg[2];
5199 Addr addrB = (Addr)arg[3];
5200 UInt size = arg[4];
5201
5202 MC_(mempool_change) ( pool, addrA, addrB, size );
5203 return True;
5204 }
5205
5206 case VG_USERREQ__MEMPOOL_EXISTS: {
5207 Addr pool = (Addr)arg[1];
5208
5209 *ret = (UWord) MC_(mempool_exists) ( pool );
5210 return True;
5211 }
5212
5213
5214 default:
5215 VG_(message)(
5216 Vg_UserMsg,
5217 "Warning: unknown memcheck client request code %llx\n",
5218 (ULong)arg[0]
5219 );
5220 return False;
5221 }
5222 return True;
5223 }
5224
5225
5226 /*------------------------------------------------------------*/
5227 /*--- Crude profiling machinery. ---*/
5228 /*------------------------------------------------------------*/
5229
5230 // We track a number of interesting events (using PROF_EVENT)
5231 // if MC_PROFILE_MEMORY is defined.
5232
5233 #ifdef MC_PROFILE_MEMORY
5234
5235 UInt MC_(event_ctr)[N_PROF_EVENTS];
5236 HChar* MC_(event_ctr_name)[N_PROF_EVENTS];
5237
init_prof_mem(void)5238 static void init_prof_mem ( void )
5239 {
5240 Int i;
5241 for (i = 0; i < N_PROF_EVENTS; i++) {
5242 MC_(event_ctr)[i] = 0;
5243 MC_(event_ctr_name)[i] = NULL;
5244 }
5245 }
5246
done_prof_mem(void)5247 static void done_prof_mem ( void )
5248 {
5249 Int i;
5250 Bool spaced = False;
5251 for (i = 0; i < N_PROF_EVENTS; i++) {
5252 if (!spaced && (i % 10) == 0) {
5253 VG_(printf)("\n");
5254 spaced = True;
5255 }
5256 if (MC_(event_ctr)[i] > 0) {
5257 spaced = False;
5258 VG_(printf)( "prof mem event %3d: %9d %s\n",
5259 i, MC_(event_ctr)[i],
5260 MC_(event_ctr_name)[i]
5261 ? MC_(event_ctr_name)[i] : "unnamed");
5262 }
5263 }
5264 }
5265
5266 #else
5267
init_prof_mem(void)5268 static void init_prof_mem ( void ) { }
done_prof_mem(void)5269 static void done_prof_mem ( void ) { }
5270
5271 #endif
5272
5273
5274 /*------------------------------------------------------------*/
5275 /*--- Origin tracking stuff ---*/
5276 /*------------------------------------------------------------*/
5277
5278 /*--------------------------------------------*/
5279 /*--- Origin tracking: load handlers ---*/
5280 /*--------------------------------------------*/
5281
merge_origins(UInt or1,UInt or2)5282 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
5283 return or1 > or2 ? or1 : or2;
5284 }
5285
MC_(helperc_b_load1)5286 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
5287 OCacheLine* line;
5288 UChar descr;
5289 UWord lineoff = oc_line_offset(a);
5290 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
5291
5292 if (OC_ENABLE_ASSERTIONS) {
5293 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5294 }
5295
5296 line = find_OCacheLine( a );
5297
5298 descr = line->descr[lineoff];
5299 if (OC_ENABLE_ASSERTIONS) {
5300 tl_assert(descr < 0x10);
5301 }
5302
5303 if (LIKELY(0 == (descr & (1 << byteoff)))) {
5304 return 0;
5305 } else {
5306 return line->w32[lineoff];
5307 }
5308 }
5309
MC_(helperc_b_load2)5310 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
5311 OCacheLine* line;
5312 UChar descr;
5313 UWord lineoff, byteoff;
5314
5315 if (UNLIKELY(a & 1)) {
5316 /* Handle misaligned case, slowly. */
5317 UInt oLo = (UInt)MC_(helperc_b_load1)( a + 0 );
5318 UInt oHi = (UInt)MC_(helperc_b_load1)( a + 1 );
5319 return merge_origins(oLo, oHi);
5320 }
5321
5322 lineoff = oc_line_offset(a);
5323 byteoff = a & 3; /* 0 or 2 */
5324
5325 if (OC_ENABLE_ASSERTIONS) {
5326 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5327 }
5328 line = find_OCacheLine( a );
5329
5330 descr = line->descr[lineoff];
5331 if (OC_ENABLE_ASSERTIONS) {
5332 tl_assert(descr < 0x10);
5333 }
5334
5335 if (LIKELY(0 == (descr & (3 << byteoff)))) {
5336 return 0;
5337 } else {
5338 return line->w32[lineoff];
5339 }
5340 }
5341
MC_(helperc_b_load4)5342 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
5343 OCacheLine* line;
5344 UChar descr;
5345 UWord lineoff;
5346
5347 if (UNLIKELY(a & 3)) {
5348 /* Handle misaligned case, slowly. */
5349 UInt oLo = (UInt)MC_(helperc_b_load2)( a + 0 );
5350 UInt oHi = (UInt)MC_(helperc_b_load2)( a + 2 );
5351 return merge_origins(oLo, oHi);
5352 }
5353
5354 lineoff = oc_line_offset(a);
5355 if (OC_ENABLE_ASSERTIONS) {
5356 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5357 }
5358
5359 line = find_OCacheLine( a );
5360
5361 descr = line->descr[lineoff];
5362 if (OC_ENABLE_ASSERTIONS) {
5363 tl_assert(descr < 0x10);
5364 }
5365
5366 if (LIKELY(0 == descr)) {
5367 return 0;
5368 } else {
5369 return line->w32[lineoff];
5370 }
5371 }
5372
MC_(helperc_b_load8)5373 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
5374 OCacheLine* line;
5375 UChar descrLo, descrHi, descr;
5376 UWord lineoff;
5377
5378 if (UNLIKELY(a & 7)) {
5379 /* Handle misaligned case, slowly. */
5380 UInt oLo = (UInt)MC_(helperc_b_load4)( a + 0 );
5381 UInt oHi = (UInt)MC_(helperc_b_load4)( a + 4 );
5382 return merge_origins(oLo, oHi);
5383 }
5384
5385 lineoff = oc_line_offset(a);
5386 if (OC_ENABLE_ASSERTIONS) {
5387 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
5388 }
5389
5390 line = find_OCacheLine( a );
5391
5392 descrLo = line->descr[lineoff + 0];
5393 descrHi = line->descr[lineoff + 1];
5394 descr = descrLo | descrHi;
5395 if (OC_ENABLE_ASSERTIONS) {
5396 tl_assert(descr < 0x10);
5397 }
5398
5399 if (LIKELY(0 == descr)) {
5400 return 0; /* both 32-bit chunks are defined */
5401 } else {
5402 UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
5403 UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
5404 return merge_origins(oLo, oHi);
5405 }
5406 }
5407
MC_(helperc_b_load16)5408 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
5409 UInt oLo = (UInt)MC_(helperc_b_load8)( a + 0 );
5410 UInt oHi = (UInt)MC_(helperc_b_load8)( a + 8 );
5411 UInt oBoth = merge_origins(oLo, oHi);
5412 return (UWord)oBoth;
5413 }
5414
5415
5416 /*--------------------------------------------*/
5417 /*--- Origin tracking: store handlers ---*/
5418 /*--------------------------------------------*/
5419
MC_(helperc_b_store1)5420 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
5421 OCacheLine* line;
5422 UWord lineoff = oc_line_offset(a);
5423 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
5424
5425 if (OC_ENABLE_ASSERTIONS) {
5426 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5427 }
5428
5429 line = find_OCacheLine( a );
5430
5431 if (d32 == 0) {
5432 line->descr[lineoff] &= ~(1 << byteoff);
5433 } else {
5434 line->descr[lineoff] |= (1 << byteoff);
5435 line->w32[lineoff] = d32;
5436 }
5437 }
5438
MC_(helperc_b_store2)5439 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
5440 OCacheLine* line;
5441 UWord lineoff, byteoff;
5442
5443 if (UNLIKELY(a & 1)) {
5444 /* Handle misaligned case, slowly. */
5445 MC_(helperc_b_store1)( a + 0, d32 );
5446 MC_(helperc_b_store1)( a + 1, d32 );
5447 return;
5448 }
5449
5450 lineoff = oc_line_offset(a);
5451 byteoff = a & 3; /* 0 or 2 */
5452
5453 if (OC_ENABLE_ASSERTIONS) {
5454 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5455 }
5456
5457 line = find_OCacheLine( a );
5458
5459 if (d32 == 0) {
5460 line->descr[lineoff] &= ~(3 << byteoff);
5461 } else {
5462 line->descr[lineoff] |= (3 << byteoff);
5463 line->w32[lineoff] = d32;
5464 }
5465 }
5466
MC_(helperc_b_store4)5467 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
5468 OCacheLine* line;
5469 UWord lineoff;
5470
5471 if (UNLIKELY(a & 3)) {
5472 /* Handle misaligned case, slowly. */
5473 MC_(helperc_b_store2)( a + 0, d32 );
5474 MC_(helperc_b_store2)( a + 2, d32 );
5475 return;
5476 }
5477
5478 lineoff = oc_line_offset(a);
5479 if (OC_ENABLE_ASSERTIONS) {
5480 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5481 }
5482
5483 line = find_OCacheLine( a );
5484
5485 if (d32 == 0) {
5486 line->descr[lineoff] = 0;
5487 } else {
5488 line->descr[lineoff] = 0xF;
5489 line->w32[lineoff] = d32;
5490 }
5491 }
5492
MC_(helperc_b_store8)5493 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
5494 OCacheLine* line;
5495 UWord lineoff;
5496
5497 if (UNLIKELY(a & 7)) {
5498 /* Handle misaligned case, slowly. */
5499 MC_(helperc_b_store4)( a + 0, d32 );
5500 MC_(helperc_b_store4)( a + 4, d32 );
5501 return;
5502 }
5503
5504 lineoff = oc_line_offset(a);
5505 if (OC_ENABLE_ASSERTIONS) {
5506 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
5507 }
5508
5509 line = find_OCacheLine( a );
5510
5511 if (d32 == 0) {
5512 line->descr[lineoff + 0] = 0;
5513 line->descr[lineoff + 1] = 0;
5514 } else {
5515 line->descr[lineoff + 0] = 0xF;
5516 line->descr[lineoff + 1] = 0xF;
5517 line->w32[lineoff + 0] = d32;
5518 line->w32[lineoff + 1] = d32;
5519 }
5520 }
5521
MC_(helperc_b_store16)5522 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
5523 MC_(helperc_b_store8)( a + 0, d32 );
5524 MC_(helperc_b_store8)( a + 8, d32 );
5525 }
5526
5527
5528 /*--------------------------------------------*/
5529 /*--- Origin tracking: sarp handlers ---*/
5530 /*--------------------------------------------*/
5531
5532 __attribute__((noinline))
ocache_sarp_Set_Origins(Addr a,UWord len,UInt otag)5533 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
5534 if ((a & 1) && len >= 1) {
5535 MC_(helperc_b_store1)( a, otag );
5536 a++;
5537 len--;
5538 }
5539 if ((a & 2) && len >= 2) {
5540 MC_(helperc_b_store2)( a, otag );
5541 a += 2;
5542 len -= 2;
5543 }
5544 if (len >= 4)
5545 tl_assert(0 == (a & 3));
5546 while (len >= 4) {
5547 MC_(helperc_b_store4)( a, otag );
5548 a += 4;
5549 len -= 4;
5550 }
5551 if (len >= 2) {
5552 MC_(helperc_b_store2)( a, otag );
5553 a += 2;
5554 len -= 2;
5555 }
5556 if (len >= 1) {
5557 MC_(helperc_b_store1)( a, otag );
5558 //a++;
5559 len--;
5560 }
5561 tl_assert(len == 0);
5562 }
5563
5564 __attribute__((noinline))
ocache_sarp_Clear_Origins(Addr a,UWord len)5565 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
5566 if ((a & 1) && len >= 1) {
5567 MC_(helperc_b_store1)( a, 0 );
5568 a++;
5569 len--;
5570 }
5571 if ((a & 2) && len >= 2) {
5572 MC_(helperc_b_store2)( a, 0 );
5573 a += 2;
5574 len -= 2;
5575 }
5576 if (len >= 4)
5577 tl_assert(0 == (a & 3));
5578 while (len >= 4) {
5579 MC_(helperc_b_store4)( a, 0 );
5580 a += 4;
5581 len -= 4;
5582 }
5583 if (len >= 2) {
5584 MC_(helperc_b_store2)( a, 0 );
5585 a += 2;
5586 len -= 2;
5587 }
5588 if (len >= 1) {
5589 MC_(helperc_b_store1)( a, 0 );
5590 //a++;
5591 len--;
5592 }
5593 tl_assert(len == 0);
5594 }
5595
5596
5597 /*------------------------------------------------------------*/
5598 /*--- Setup and finalisation ---*/
5599 /*------------------------------------------------------------*/
5600
mc_post_clo_init(void)5601 static void mc_post_clo_init ( void )
5602 {
5603 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
5604
5605 if (MC_(clo_mc_level) == 3) {
5606 /* We're doing origin tracking. */
5607 # ifdef PERF_FAST_STACK
5608 VG_(track_new_mem_stack_4_w_ECU) ( mc_new_mem_stack_4_w_ECU );
5609 VG_(track_new_mem_stack_8_w_ECU) ( mc_new_mem_stack_8_w_ECU );
5610 VG_(track_new_mem_stack_12_w_ECU) ( mc_new_mem_stack_12_w_ECU );
5611 VG_(track_new_mem_stack_16_w_ECU) ( mc_new_mem_stack_16_w_ECU );
5612 VG_(track_new_mem_stack_32_w_ECU) ( mc_new_mem_stack_32_w_ECU );
5613 VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
5614 VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
5615 VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
5616 VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
5617 # endif
5618 VG_(track_new_mem_stack_w_ECU) ( mc_new_mem_stack_w_ECU );
5619 } else {
5620 /* Not doing origin tracking */
5621 # ifdef PERF_FAST_STACK
5622 VG_(track_new_mem_stack_4) ( mc_new_mem_stack_4 );
5623 VG_(track_new_mem_stack_8) ( mc_new_mem_stack_8 );
5624 VG_(track_new_mem_stack_12) ( mc_new_mem_stack_12 );
5625 VG_(track_new_mem_stack_16) ( mc_new_mem_stack_16 );
5626 VG_(track_new_mem_stack_32) ( mc_new_mem_stack_32 );
5627 VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
5628 VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
5629 VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
5630 VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
5631 # endif
5632 VG_(track_new_mem_stack) ( mc_new_mem_stack );
5633 }
5634
5635 /* This origin tracking cache is huge (~100M), so only initialise
5636 if we need it. */
5637 if (MC_(clo_mc_level) >= 3) {
5638 init_OCache();
5639 tl_assert(ocacheL1 != NULL);
5640 tl_assert(ocacheL2 != NULL);
5641 } else {
5642 tl_assert(ocacheL1 == NULL);
5643 tl_assert(ocacheL2 == NULL);
5644 }
5645 }
5646
print_SM_info(char * type,int n_SMs)5647 static void print_SM_info(char* type, int n_SMs)
5648 {
5649 VG_(message)(Vg_DebugMsg,
5650 " memcheck: SMs: %s = %d (%ldk, %ldM)\n",
5651 type,
5652 n_SMs,
5653 n_SMs * sizeof(SecMap) / 1024UL,
5654 n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
5655 }
5656
mc_fini(Int exitcode)5657 static void mc_fini ( Int exitcode )
5658 {
5659 MC_(print_malloc_stats)();
5660
5661 if (MC_(clo_leak_check) != LC_Off) {
5662 MC_(detect_memory_leaks)(1/*bogus ThreadId*/, MC_(clo_leak_check));
5663 } else {
5664 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
5665 VG_(umsg)(
5666 "For a detailed leak analysis, rerun with: --leak-check=full\n"
5667 "\n"
5668 );
5669 }
5670 }
5671
5672 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
5673 VG_(message)(Vg_UserMsg,
5674 "For counts of detected and suppressed errors, rerun with: -v\n");
5675 }
5676
5677 if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
5678 && MC_(clo_mc_level) == 2) {
5679 VG_(message)(Vg_UserMsg,
5680 "Use --track-origins=yes to see where "
5681 "uninitialised values come from\n");
5682 }
5683
5684 done_prof_mem();
5685
5686 if (VG_(clo_stats)) {
5687 SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
5688
5689 VG_(message)(Vg_DebugMsg,
5690 " memcheck: sanity checks: %d cheap, %d expensive\n",
5691 n_sanity_cheap, n_sanity_expensive );
5692 VG_(message)(Vg_DebugMsg,
5693 " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n",
5694 n_auxmap_L2_nodes,
5695 n_auxmap_L2_nodes * 64,
5696 n_auxmap_L2_nodes / 16 );
5697 VG_(message)(Vg_DebugMsg,
5698 " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n",
5699 n_auxmap_L1_searches, n_auxmap_L1_cmps,
5700 (10ULL * n_auxmap_L1_cmps)
5701 / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
5702 );
5703 VG_(message)(Vg_DebugMsg,
5704 " memcheck: auxmaps_L2: %lld searches, %lld nodes\n",
5705 n_auxmap_L2_searches, n_auxmap_L2_nodes
5706 );
5707
5708 print_SM_info("n_issued ", n_issued_SMs);
5709 print_SM_info("n_deissued ", n_deissued_SMs);
5710 print_SM_info("max_noaccess ", max_noaccess_SMs);
5711 print_SM_info("max_undefined", max_undefined_SMs);
5712 print_SM_info("max_defined ", max_defined_SMs);
5713 print_SM_info("max_non_DSM ", max_non_DSM_SMs);
5714
5715 // Three DSMs, plus the non-DSM ones
5716 max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
5717 // The 3*sizeof(Word) bytes is the AVL node metadata size.
5718 // The 4*sizeof(Word) bytes is the malloc metadata size.
5719 // Hardwiring these sizes in sucks, but I don't see how else to do it.
5720 max_secVBit_szB = max_secVBit_nodes *
5721 (sizeof(SecVBitNode) + 3*sizeof(Word) + 4*sizeof(Word));
5722 max_shmem_szB = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
5723
5724 VG_(message)(Vg_DebugMsg,
5725 " memcheck: max sec V bit nodes: %d (%ldk, %ldM)\n",
5726 max_secVBit_nodes, max_secVBit_szB / 1024,
5727 max_secVBit_szB / (1024 * 1024));
5728 VG_(message)(Vg_DebugMsg,
5729 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
5730 sec_vbits_new_nodes + sec_vbits_updates,
5731 sec_vbits_new_nodes, sec_vbits_updates );
5732 VG_(message)(Vg_DebugMsg,
5733 " memcheck: max shadow mem size: %ldk, %ldM\n",
5734 max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
5735
5736 if (MC_(clo_mc_level) >= 3) {
5737 VG_(message)(Vg_DebugMsg,
5738 " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n",
5739 stats_ocacheL1_find,
5740 stats_ocacheL1_misses,
5741 stats_ocacheL1_lossage );
5742 VG_(message)(Vg_DebugMsg,
5743 " ocacheL1: %'12lu at 0 %'12lu at 1\n",
5744 stats_ocacheL1_find - stats_ocacheL1_misses
5745 - stats_ocacheL1_found_at_1
5746 - stats_ocacheL1_found_at_N,
5747 stats_ocacheL1_found_at_1 );
5748 VG_(message)(Vg_DebugMsg,
5749 " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n",
5750 stats_ocacheL1_found_at_N,
5751 stats_ocacheL1_movefwds );
5752 VG_(message)(Vg_DebugMsg,
5753 " ocacheL1: %'12lu sizeB %'12u useful\n",
5754 (UWord)sizeof(OCache),
5755 4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
5756 VG_(message)(Vg_DebugMsg,
5757 " ocacheL2: %'12lu refs %'12lu misses\n",
5758 stats__ocacheL2_refs,
5759 stats__ocacheL2_misses );
5760 VG_(message)(Vg_DebugMsg,
5761 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
5762 stats__ocacheL2_n_nodes_max,
5763 stats__ocacheL2_n_nodes );
5764 VG_(message)(Vg_DebugMsg,
5765 " niacache: %'12lu refs %'12lu misses\n",
5766 stats__nia_cache_queries, stats__nia_cache_misses);
5767 } else {
5768 tl_assert(ocacheL1 == NULL);
5769 tl_assert(ocacheL2 == NULL);
5770 }
5771 }
5772
5773 if (0) {
5774 VG_(message)(Vg_DebugMsg,
5775 "------ Valgrind's client block stats follow ---------------\n" );
5776 show_client_block_stats();
5777 }
5778 }
5779
mc_pre_clo_init(void)5780 static void mc_pre_clo_init(void)
5781 {
5782 VG_(details_name) ("Memcheck");
5783 VG_(details_version) (NULL);
5784 VG_(details_description) ("a memory error detector");
5785 VG_(details_copyright_author)(
5786 "Copyright (C) 2002-2010, and GNU GPL'd, by Julian Seward et al.");
5787 VG_(details_bug_reports_to) (VG_BUGS_TO);
5788 VG_(details_avg_translation_sizeB) ( 556 );
5789
5790 VG_(basic_tool_funcs) (mc_post_clo_init,
5791 MC_(instrument),
5792 mc_fini);
5793
5794 VG_(needs_final_IR_tidy_pass) ( MC_(final_tidy) );
5795
5796
5797 VG_(needs_core_errors) ();
5798 VG_(needs_tool_errors) (MC_(eq_Error),
5799 MC_(before_pp_Error),
5800 MC_(pp_Error),
5801 True,/*show TIDs for errors*/
5802 MC_(update_Error_extra),
5803 MC_(is_recognised_suppression),
5804 MC_(read_extra_suppression_info),
5805 MC_(error_matches_suppression),
5806 MC_(get_error_name),
5807 MC_(get_extra_suppression_info));
5808 VG_(needs_libc_freeres) ();
5809 VG_(needs_command_line_options)(mc_process_cmd_line_options,
5810 mc_print_usage,
5811 mc_print_debug_usage);
5812 VG_(needs_client_requests) (mc_handle_client_request);
5813 VG_(needs_sanity_checks) (mc_cheap_sanity_check,
5814 mc_expensive_sanity_check);
5815 VG_(needs_malloc_replacement) (MC_(malloc),
5816 MC_(__builtin_new),
5817 MC_(__builtin_vec_new),
5818 MC_(memalign),
5819 MC_(calloc),
5820 MC_(free),
5821 MC_(__builtin_delete),
5822 MC_(__builtin_vec_delete),
5823 MC_(realloc),
5824 MC_(malloc_usable_size),
5825 MC_MALLOC_REDZONE_SZB );
5826
5827 VG_(needs_xml_output) ();
5828
5829 VG_(track_new_mem_startup) ( mc_new_mem_startup );
5830 VG_(track_new_mem_stack_signal)( make_mem_undefined_w_tid );
5831 // We assume that brk()/sbrk() does not initialise new memory. Is this
5832 // accurate? John Reiser says:
5833 //
5834 // 0) sbrk() can *decrease* process address space. No zero fill is done
5835 // for a decrease, not even the fragment on the high end of the last page
5836 // that is beyond the new highest address. For maximum safety and
5837 // portability, then the bytes in the last page that reside above [the
5838 // new] sbrk(0) should be considered to be uninitialized, but in practice
5839 // it is exceedingly likely that they will retain their previous
5840 // contents.
5841 //
5842 // 1) If an increase is large enough to require new whole pages, then
5843 // those new whole pages (like all new pages) are zero-filled by the
5844 // operating system. So if sbrk(0) already is page aligned, then
5845 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
5846 //
5847 // 2) Any increase that lies within an existing allocated page is not
5848 // changed. So if (x = sbrk(0)) is not page aligned, then
5849 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
5850 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
5851 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
5852 // of them come along for the ride because the operating system deals
5853 // only in whole pages. Again, for maximum safety and portability, then
5854 // anything that lives above [the new] sbrk(0) should be considered
5855 // uninitialized, but in practice will retain previous contents [zero in
5856 // this case.]"
5857 //
5858 // In short:
5859 //
5860 // A key property of sbrk/brk is that new whole pages that are supplied
5861 // by the operating system *do* get initialized to zero.
5862 //
5863 // As for the portability of all this:
5864 //
5865 // sbrk and brk are not POSIX. However, any system that is a derivative
5866 // of *nix has sbrk and brk because there are too many softwares (such as
5867 // the Bourne shell) which rely on the traditional memory map (.text,
5868 // .data+.bss, stack) and the existence of sbrk/brk.
5869 //
5870 // So we should arguably observe all this. However:
5871 // - The current inaccuracy has caused maybe one complaint in seven years(?)
5872 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
5873 // doubt most programmers know the above information.
5874 // So I'm not terribly unhappy with marking it as undefined. --njn.
5875 //
5876 // [More: I think most of what John said only applies to sbrk(). It seems
5877 // that brk() always deals in whole pages. And since this event deals
5878 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
5879 // just mark all memory it allocates as defined.]
5880 //
5881 VG_(track_new_mem_brk) ( make_mem_undefined_w_tid );
5882
5883 // Handling of mmap and mprotect isn't simple (well, it is simple,
5884 // but the justification isn't.) See comments above, just prior to
5885 // mc_new_mem_mmap.
5886 VG_(track_new_mem_mmap) ( mc_new_mem_mmap );
5887 VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
5888
5889 VG_(track_copy_mem_remap) ( MC_(copy_address_range_state) );
5890
5891 VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
5892 VG_(track_die_mem_brk) ( MC_(make_mem_noaccess) );
5893 VG_(track_die_mem_munmap) ( MC_(make_mem_noaccess) );
5894
5895 /* Defer the specification of the new_mem_stack functions to the
5896 post_clo_init function, since we need to first parse the command
5897 line before deciding which set to use. */
5898
5899 # ifdef PERF_FAST_STACK
5900 VG_(track_die_mem_stack_4) ( mc_die_mem_stack_4 );
5901 VG_(track_die_mem_stack_8) ( mc_die_mem_stack_8 );
5902 VG_(track_die_mem_stack_12) ( mc_die_mem_stack_12 );
5903 VG_(track_die_mem_stack_16) ( mc_die_mem_stack_16 );
5904 VG_(track_die_mem_stack_32) ( mc_die_mem_stack_32 );
5905 VG_(track_die_mem_stack_112) ( mc_die_mem_stack_112 );
5906 VG_(track_die_mem_stack_128) ( mc_die_mem_stack_128 );
5907 VG_(track_die_mem_stack_144) ( mc_die_mem_stack_144 );
5908 VG_(track_die_mem_stack_160) ( mc_die_mem_stack_160 );
5909 # endif
5910 VG_(track_die_mem_stack) ( mc_die_mem_stack );
5911
5912 VG_(track_ban_mem_stack) ( MC_(make_mem_noaccess) );
5913
5914 VG_(track_pre_mem_read) ( check_mem_is_defined );
5915 VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
5916 VG_(track_pre_mem_write) ( check_mem_is_addressable );
5917 VG_(track_post_mem_write) ( mc_post_mem_write );
5918
5919 if (MC_(clo_mc_level) >= 2)
5920 VG_(track_pre_reg_read) ( mc_pre_reg_read );
5921
5922 VG_(track_post_reg_write) ( mc_post_reg_write );
5923 VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
5924
5925 init_shadow_memory();
5926 MC_(malloc_list) = VG_(HT_construct)( "MC_(malloc_list)" );
5927 MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
5928 init_prof_mem();
5929
5930 tl_assert( mc_expensive_sanity_check() );
5931
5932 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
5933 tl_assert(sizeof(UWord) == sizeof(Addr));
5934 // Call me paranoid. I don't care.
5935 tl_assert(sizeof(void*) == sizeof(Addr));
5936
5937 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
5938 tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
5939
5940 /* This is small. Always initialise it. */
5941 init_nia_to_ecu_cache();
5942
5943 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
5944 if we need to, since the command line args haven't been
5945 processed yet. Hence defer it to mc_post_clo_init. */
5946 tl_assert(ocacheL1 == NULL);
5947 tl_assert(ocacheL2 == NULL);
5948
5949 /* Check some important stuff. See extensive comments above
5950 re UNALIGNED_OR_HIGH for background. */
5951 # if VG_WORDSIZE == 4
5952 tl_assert(sizeof(void*) == 4);
5953 tl_assert(sizeof(Addr) == 4);
5954 tl_assert(sizeof(UWord) == 4);
5955 tl_assert(sizeof(Word) == 4);
5956 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
5957 tl_assert(MASK(1) == 0UL);
5958 tl_assert(MASK(2) == 1UL);
5959 tl_assert(MASK(4) == 3UL);
5960 tl_assert(MASK(8) == 7UL);
5961 # else
5962 tl_assert(VG_WORDSIZE == 8);
5963 tl_assert(sizeof(void*) == 8);
5964 tl_assert(sizeof(Addr) == 8);
5965 tl_assert(sizeof(UWord) == 8);
5966 tl_assert(sizeof(Word) == 8);
5967 tl_assert(MAX_PRIMARY_ADDRESS == 0x3FFFFFFFFFULL);
5968 tl_assert(MASK(1) == 0xFFFFFFC000000000ULL);
5969 tl_assert(MASK(2) == 0xFFFFFFC000000001ULL);
5970 tl_assert(MASK(4) == 0xFFFFFFC000000003ULL);
5971 tl_assert(MASK(8) == 0xFFFFFFC000000007ULL);
5972 # endif
5973 }
5974
5975 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
5976
5977 /*--------------------------------------------------------------------*/
5978 /*--- end mc_main.c ---*/
5979 /*--------------------------------------------------------------------*/
5980