1
2 /*--------------------------------------------------------------------*/
3 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
4 /*--- accessibility (A) and validity (V) status of each byte. ---*/
5 /*--- mc_main.c ---*/
6 /*--------------------------------------------------------------------*/
7
8 /*
9 This file is part of MemCheck, a heavyweight Valgrind tool for
10 detecting memory errors.
11
12 Copyright (C) 2000-2011 Julian Seward
13 jseward@acm.org
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31 */
32
33 #include "pub_tool_basics.h"
34 #include "pub_tool_aspacemgr.h"
35 #include "pub_tool_gdbserver.h"
36 #include "pub_tool_hashtable.h" // For mc_include.h
37 #include "pub_tool_libcbase.h"
38 #include "pub_tool_libcassert.h"
39 #include "pub_tool_libcprint.h"
40 #include "pub_tool_machine.h"
41 #include "pub_tool_mallocfree.h"
42 #include "pub_tool_options.h"
43 #include "pub_tool_oset.h"
44 #include "pub_tool_replacemalloc.h"
45 #include "pub_tool_tooliface.h"
46 #include "pub_tool_threadstate.h"
47
48 #include "mc_include.h"
49 #include "memcheck.h" /* for client requests */
50
51
52 /* We really want this frame-pointer-less on all platforms, since the
53 helper functions are small and called very frequently. By default
54 on x86-linux, though, Makefile.all.am doesn't specify it, so do it
55 here. Requires gcc >= 4.4, unfortunately. */
56 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
57 # pragma GCC optimize("-fomit-frame-pointer")
58 #endif
59
60
61 /* Set to 1 to do a little more sanity checking */
62 #define VG_DEBUG_MEMORY 0
63
64 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
65
66 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
67 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
68
69
70 /*------------------------------------------------------------*/
71 /*--- Fast-case knobs ---*/
72 /*------------------------------------------------------------*/
73
74 // Comment these out to disable the fast cases (don't just set them to zero).
75
76 #define PERF_FAST_LOADV 1
77 #define PERF_FAST_STOREV 1
78
79 #define PERF_FAST_SARP 1
80
81 #define PERF_FAST_STACK 1
82 #define PERF_FAST_STACK2 1
83
84 /* Change this to 1 to enable assertions on origin tracking cache fast
85 paths */
86 #define OC_ENABLE_ASSERTIONS 0
87
88
89 /*------------------------------------------------------------*/
90 /*--- Comments on the origin tracking implementation ---*/
91 /*------------------------------------------------------------*/
92
93 /* See detailed comment entitled
94 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
95 which is contained further on in this file. */
96
97
98 /*------------------------------------------------------------*/
99 /*--- V bits and A bits ---*/
100 /*------------------------------------------------------------*/
101
102 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
103 thinks the corresponding value bit is defined. And every memory byte
104 has an A bit, which tracks whether Memcheck thinks the program can access
105 it safely (ie. it's mapped, and has at least one of the RWX permission bits
106 set). So every N-bit register is shadowed with N V bits, and every memory
107 byte is shadowed with 8 V bits and one A bit.
108
109 In the implementation, we use two forms of compression (compressed V bits
110 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
111 for memory.
112
113 Memcheck also tracks extra information about each heap block that is
114 allocated, for detecting memory leaks and other purposes.
115 */
116
117 /*------------------------------------------------------------*/
118 /*--- Basic A/V bitmap representation. ---*/
119 /*------------------------------------------------------------*/
120
121 /* All reads and writes are checked against a memory map (a.k.a. shadow
122 memory), which records the state of all memory in the process.
123
124 On 32-bit machines the memory map is organised as follows.
125 The top 16 bits of an address are used to index into a top-level
126 map table, containing 65536 entries. Each entry is a pointer to a
127 second-level map, which records the accesibililty and validity
128 permissions for the 65536 bytes indexed by the lower 16 bits of the
129 address. Each byte is represented by two bits (details are below). So
130 each second-level map contains 16384 bytes. This two-level arrangement
131 conveniently divides the 4G address space into 64k lumps, each size 64k
132 bytes.
133
134 All entries in the primary (top-level) map must point to a valid
135 secondary (second-level) map. Since many of the 64kB chunks will
136 have the same status for every bit -- ie. noaccess (for unused
137 address space) or entirely addressable and defined (for code segments) --
138 there are three distinguished secondary maps, which indicate 'noaccess',
139 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
140 map entry points to the relevant distinguished map. In practice,
141 typically more than half of the addressable memory is represented with
142 the 'undefined' or 'defined' distinguished secondary map, so it gives a
143 good saving. It also lets us set the V+A bits of large address regions
144 quickly in set_address_range_perms().
145
146 On 64-bit machines it's more complicated. If we followed the same basic
147 scheme we'd have a four-level table which would require too many memory
148 accesses. So instead the top-level map table has 2^19 entries (indexed
149 using bits 16..34 of the address); this covers the bottom 32GB. Any
150 accesses above 32GB are handled with a slow, sparse auxiliary table.
151 Valgrind's address space manager tries very hard to keep things below
152 this 32GB barrier so that performance doesn't suffer too much.
153
154 Note that this file has a lot of different functions for reading and
155 writing shadow memory. Only a couple are strictly necessary (eg.
156 get_vabits2 and set_vabits2), most are just specialised for specific
157 common cases to improve performance.
158
159 Aside: the V+A bits are less precise than they could be -- we have no way
160 of marking memory as read-only. It would be great if we could add an
161 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
162 which requires 2.3 bits to hold, and there's no way to do that elegantly
163 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
164 seem worth it.
165 */
166
167 /* --------------- Basic configuration --------------- */
168
169 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
170
171 #if VG_WORDSIZE == 4
172
173 /* cover the entire address space */
174 # define N_PRIMARY_BITS 16
175
176 #else
177
178 /* Just handle the first 256G fast and the rest via auxiliary
179 primaries. If you change this, Memcheck will assert at startup.
180 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
181 # define N_PRIMARY_BITS 22
182
183 #endif
184
185
186 /* Do not change this. */
187 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
188
189 /* Do not change this. */
190 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
191
192
193 /* --------------- Secondary maps --------------- */
194
195 // Each byte of memory conceptually has an A bit, which indicates its
196 // addressability, and 8 V bits, which indicates its definedness.
197 //
198 // But because very few bytes are partially defined, we can use a nice
199 // compression scheme to reduce the size of shadow memory. Each byte of
200 // memory has 2 bits which indicates its state (ie. V+A bits):
201 //
202 // 00: noaccess (unaddressable but treated as fully defined)
203 // 01: undefined (addressable and fully undefined)
204 // 10: defined (addressable and fully defined)
205 // 11: partdefined (addressable and partially defined)
206 //
207 // In the "partdefined" case, we use a secondary table to store the V bits.
208 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
209 // bits.
210 //
211 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
212 // four bytes (32 bits) of memory are in each chunk. Hence the name
213 // "vabits8". This lets us get the V+A bits for four bytes at a time
214 // easily (without having to do any shifting and/or masking), and that is a
215 // very common operation. (Note that although each vabits8 chunk
216 // is 8 bits in size, it represents 32 bits of memory.)
217 //
218 // The representation is "inverse" little-endian... each 4 bytes of
219 // memory is represented by a 1 byte value, where:
220 //
221 // - the status of byte (a+0) is held in bits [1..0]
222 // - the status of byte (a+1) is held in bits [3..2]
223 // - the status of byte (a+2) is held in bits [5..4]
224 // - the status of byte (a+3) is held in bits [7..6]
225 //
226 // It's "inverse" because endianness normally describes a mapping from
227 // value bits to memory addresses; in this case the mapping is inverted.
228 // Ie. instead of particular value bits being held in certain addresses, in
229 // this case certain addresses are represented by particular value bits.
230 // See insert_vabits2_into_vabits8() for an example.
231 //
232 // But note that we don't compress the V bits stored in registers; they
233 // need to be explicit to made the shadow operations possible. Therefore
234 // when moving values between registers and memory we need to convert
235 // between the expanded in-register format and the compressed in-memory
236 // format. This isn't so difficult, it just requires careful attention in a
237 // few places.
238
239 // These represent eight bits of memory.
240 #define VA_BITS2_NOACCESS 0x0 // 00b
241 #define VA_BITS2_UNDEFINED 0x1 // 01b
242 #define VA_BITS2_DEFINED 0x2 // 10b
243 #define VA_BITS2_PARTDEFINED 0x3 // 11b
244
245 // These represent 16 bits of memory.
246 #define VA_BITS4_NOACCESS 0x0 // 00_00b
247 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
248 #define VA_BITS4_DEFINED 0xa // 10_10b
249
250 // These represent 32 bits of memory.
251 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
252 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
253 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
254
255 // These represent 64 bits of memory.
256 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
257 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
258 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
259
260
261 #define SM_CHUNKS 16384
262 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
263 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
264
265 // Paranoia: it's critical for performance that the requested inlining
266 // occurs. So try extra hard.
267 #define INLINE inline __attribute__((always_inline))
268
start_of_this_sm(Addr a)269 static INLINE Addr start_of_this_sm ( Addr a ) {
270 return (a & (~SM_MASK));
271 }
is_start_of_sm(Addr a)272 static INLINE Bool is_start_of_sm ( Addr a ) {
273 return (start_of_this_sm(a) == a);
274 }
275
276 typedef
277 struct {
278 UChar vabits8[SM_CHUNKS];
279 }
280 SecMap;
281
282 // 3 distinguished secondary maps, one for no-access, one for
283 // accessible but undefined, and one for accessible and defined.
284 // Distinguished secondaries may never be modified.
285 #define SM_DIST_NOACCESS 0
286 #define SM_DIST_UNDEFINED 1
287 #define SM_DIST_DEFINED 2
288
289 static SecMap sm_distinguished[3];
290
is_distinguished_sm(SecMap * sm)291 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
292 return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
293 }
294
295 // Forward declaration
296 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
297
298 /* dist_sm points to one of our three distinguished secondaries. Make
299 a copy of it so that we can write to it.
300 */
copy_for_writing(SecMap * dist_sm)301 static SecMap* copy_for_writing ( SecMap* dist_sm )
302 {
303 SecMap* new_sm;
304 tl_assert(dist_sm == &sm_distinguished[0]
305 || dist_sm == &sm_distinguished[1]
306 || dist_sm == &sm_distinguished[2]);
307
308 new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
309 if (new_sm == NULL)
310 VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
311 sizeof(SecMap) );
312 VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
313 update_SM_counts(dist_sm, new_sm);
314 return new_sm;
315 }
316
317 /* --------------- Stats --------------- */
318
319 static Int n_issued_SMs = 0;
320 static Int n_deissued_SMs = 0;
321 static Int n_noaccess_SMs = N_PRIMARY_MAP; // start with many noaccess DSMs
322 static Int n_undefined_SMs = 0;
323 static Int n_defined_SMs = 0;
324 static Int n_non_DSM_SMs = 0;
325 static Int max_noaccess_SMs = 0;
326 static Int max_undefined_SMs = 0;
327 static Int max_defined_SMs = 0;
328 static Int max_non_DSM_SMs = 0;
329
330 /* # searches initiated in auxmap_L1, and # base cmps required */
331 static ULong n_auxmap_L1_searches = 0;
332 static ULong n_auxmap_L1_cmps = 0;
333 /* # of searches that missed in auxmap_L1 and therefore had to
334 be handed to auxmap_L2. And the number of nodes inserted. */
335 static ULong n_auxmap_L2_searches = 0;
336 static ULong n_auxmap_L2_nodes = 0;
337
338 static Int n_sanity_cheap = 0;
339 static Int n_sanity_expensive = 0;
340
341 static Int n_secVBit_nodes = 0;
342 static Int max_secVBit_nodes = 0;
343
update_SM_counts(SecMap * oldSM,SecMap * newSM)344 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
345 {
346 if (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
347 else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
348 else if (oldSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs --;
349 else { n_non_DSM_SMs --;
350 n_deissued_SMs ++; }
351
352 if (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
353 else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
354 else if (newSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs ++;
355 else { n_non_DSM_SMs ++;
356 n_issued_SMs ++; }
357
358 if (n_noaccess_SMs > max_noaccess_SMs ) max_noaccess_SMs = n_noaccess_SMs;
359 if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
360 if (n_defined_SMs > max_defined_SMs ) max_defined_SMs = n_defined_SMs;
361 if (n_non_DSM_SMs > max_non_DSM_SMs ) max_non_DSM_SMs = n_non_DSM_SMs;
362 }
363
364 /* --------------- Primary maps --------------- */
365
366 /* The main primary map. This covers some initial part of the address
367 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
368 handled using the auxiliary primary map.
369 */
370 static SecMap* primary_map[N_PRIMARY_MAP];
371
372
373 /* An entry in the auxiliary primary map. base must be a 64k-aligned
374 value, and sm points at the relevant secondary map. As with the
375 main primary map, the secondary may be either a real secondary, or
376 one of the three distinguished secondaries. DO NOT CHANGE THIS
377 LAYOUT: the first word has to be the key for OSet fast lookups.
378 */
379 typedef
380 struct {
381 Addr base;
382 SecMap* sm;
383 }
384 AuxMapEnt;
385
386 /* Tunable parameter: How big is the L1 queue? */
387 #define N_AUXMAP_L1 24
388
389 /* Tunable parameter: How far along the L1 queue to insert
390 entries resulting from L2 lookups? */
391 #define AUXMAP_L1_INSERT_IX 12
392
393 static struct {
394 Addr base;
395 AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
396 }
397 auxmap_L1[N_AUXMAP_L1];
398
399 static OSet* auxmap_L2 = NULL;
400
init_auxmap_L1_L2(void)401 static void init_auxmap_L1_L2 ( void )
402 {
403 Int i;
404 for (i = 0; i < N_AUXMAP_L1; i++) {
405 auxmap_L1[i].base = 0;
406 auxmap_L1[i].ent = NULL;
407 }
408
409 tl_assert(0 == offsetof(AuxMapEnt,base));
410 tl_assert(sizeof(Addr) == sizeof(void*));
411 auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/ offsetof(AuxMapEnt,base),
412 /*fastCmp*/ NULL,
413 VG_(malloc), "mc.iaLL.1", VG_(free) );
414 }
415
416 /* Check representation invariants; if OK return NULL; else a
417 descriptive bit of text. Also return the number of
418 non-distinguished secondary maps referred to from the auxiliary
419 primary maps. */
420
check_auxmap_L1_L2_sanity(Word * n_secmaps_found)421 static HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
422 {
423 Word i, j;
424 /* On a 32-bit platform, the L2 and L1 tables should
425 both remain empty forever.
426
427 On a 64-bit platform:
428 In the L2 table:
429 all .base & 0xFFFF == 0
430 all .base > MAX_PRIMARY_ADDRESS
431 In the L1 table:
432 all .base & 0xFFFF == 0
433 all (.base > MAX_PRIMARY_ADDRESS
434 .base & 0xFFFF == 0
435 and .ent points to an AuxMapEnt with the same .base)
436 or
437 (.base == 0 and .ent == NULL)
438 */
439 *n_secmaps_found = 0;
440 if (sizeof(void*) == 4) {
441 /* 32-bit platform */
442 if (VG_(OSetGen_Size)(auxmap_L2) != 0)
443 return "32-bit: auxmap_L2 is non-empty";
444 for (i = 0; i < N_AUXMAP_L1; i++)
445 if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
446 return "32-bit: auxmap_L1 is non-empty";
447 } else {
448 /* 64-bit platform */
449 UWord elems_seen = 0;
450 AuxMapEnt *elem, *res;
451 AuxMapEnt key;
452 /* L2 table */
453 VG_(OSetGen_ResetIter)(auxmap_L2);
454 while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
455 elems_seen++;
456 if (0 != (elem->base & (Addr)0xFFFF))
457 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
458 if (elem->base <= MAX_PRIMARY_ADDRESS)
459 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
460 if (elem->sm == NULL)
461 return "64-bit: .sm in _L2 is NULL";
462 if (!is_distinguished_sm(elem->sm))
463 (*n_secmaps_found)++;
464 }
465 if (elems_seen != n_auxmap_L2_nodes)
466 return "64-bit: disagreement on number of elems in _L2";
467 /* Check L1-L2 correspondence */
468 for (i = 0; i < N_AUXMAP_L1; i++) {
469 if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
470 continue;
471 if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
472 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
473 if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
474 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
475 if (auxmap_L1[i].ent == NULL)
476 return "64-bit: .ent is NULL in auxmap_L1";
477 if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
478 return "64-bit: _L1 and _L2 bases are inconsistent";
479 /* Look it up in auxmap_L2. */
480 key.base = auxmap_L1[i].base;
481 key.sm = 0;
482 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
483 if (res == NULL)
484 return "64-bit: _L1 .base not found in _L2";
485 if (res != auxmap_L1[i].ent)
486 return "64-bit: _L1 .ent disagrees with _L2 entry";
487 }
488 /* Check L1 contains no duplicates */
489 for (i = 0; i < N_AUXMAP_L1; i++) {
490 if (auxmap_L1[i].base == 0)
491 continue;
492 for (j = i+1; j < N_AUXMAP_L1; j++) {
493 if (auxmap_L1[j].base == 0)
494 continue;
495 if (auxmap_L1[j].base == auxmap_L1[i].base)
496 return "64-bit: duplicate _L1 .base entries";
497 }
498 }
499 }
500 return NULL; /* ok */
501 }
502
insert_into_auxmap_L1_at(Word rank,AuxMapEnt * ent)503 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
504 {
505 Word i;
506 tl_assert(ent);
507 tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
508 for (i = N_AUXMAP_L1-1; i > rank; i--)
509 auxmap_L1[i] = auxmap_L1[i-1];
510 auxmap_L1[rank].base = ent->base;
511 auxmap_L1[rank].ent = ent;
512 }
513
maybe_find_in_auxmap(Addr a)514 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
515 {
516 AuxMapEnt key;
517 AuxMapEnt* res;
518 Word i;
519
520 tl_assert(a > MAX_PRIMARY_ADDRESS);
521 a &= ~(Addr)0xFFFF;
522
523 /* First search the front-cache, which is a self-organising
524 list containing the most popular entries. */
525
526 if (LIKELY(auxmap_L1[0].base == a))
527 return auxmap_L1[0].ent;
528 if (LIKELY(auxmap_L1[1].base == a)) {
529 Addr t_base = auxmap_L1[0].base;
530 AuxMapEnt* t_ent = auxmap_L1[0].ent;
531 auxmap_L1[0].base = auxmap_L1[1].base;
532 auxmap_L1[0].ent = auxmap_L1[1].ent;
533 auxmap_L1[1].base = t_base;
534 auxmap_L1[1].ent = t_ent;
535 return auxmap_L1[0].ent;
536 }
537
538 n_auxmap_L1_searches++;
539
540 for (i = 0; i < N_AUXMAP_L1; i++) {
541 if (auxmap_L1[i].base == a) {
542 break;
543 }
544 }
545 tl_assert(i >= 0 && i <= N_AUXMAP_L1);
546
547 n_auxmap_L1_cmps += (ULong)(i+1);
548
549 if (i < N_AUXMAP_L1) {
550 if (i > 0) {
551 Addr t_base = auxmap_L1[i-1].base;
552 AuxMapEnt* t_ent = auxmap_L1[i-1].ent;
553 auxmap_L1[i-1].base = auxmap_L1[i-0].base;
554 auxmap_L1[i-1].ent = auxmap_L1[i-0].ent;
555 auxmap_L1[i-0].base = t_base;
556 auxmap_L1[i-0].ent = t_ent;
557 i--;
558 }
559 return auxmap_L1[i].ent;
560 }
561
562 n_auxmap_L2_searches++;
563
564 /* First see if we already have it. */
565 key.base = a;
566 key.sm = 0;
567
568 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
569 if (res)
570 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
571 return res;
572 }
573
find_or_alloc_in_auxmap(Addr a)574 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
575 {
576 AuxMapEnt *nyu, *res;
577
578 /* First see if we already have it. */
579 res = maybe_find_in_auxmap( a );
580 if (LIKELY(res))
581 return res;
582
583 /* Ok, there's no entry in the secondary map, so we'll have
584 to allocate one. */
585 a &= ~(Addr)0xFFFF;
586
587 nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
588 tl_assert(nyu);
589 nyu->base = a;
590 nyu->sm = &sm_distinguished[SM_DIST_NOACCESS];
591 VG_(OSetGen_Insert)( auxmap_L2, nyu );
592 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
593 n_auxmap_L2_nodes++;
594 return nyu;
595 }
596
597 /* --------------- SecMap fundamentals --------------- */
598
599 // In all these, 'low' means it's definitely in the main primary map,
600 // 'high' means it's definitely in the auxiliary table.
601
get_secmap_low_ptr(Addr a)602 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
603 {
604 UWord pm_off = a >> 16;
605 # if VG_DEBUG_MEMORY >= 1
606 tl_assert(pm_off < N_PRIMARY_MAP);
607 # endif
608 return &primary_map[ pm_off ];
609 }
610
get_secmap_high_ptr(Addr a)611 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
612 {
613 AuxMapEnt* am = find_or_alloc_in_auxmap(a);
614 return &am->sm;
615 }
616
get_secmap_ptr(Addr a)617 static SecMap** get_secmap_ptr ( Addr a )
618 {
619 return ( a <= MAX_PRIMARY_ADDRESS
620 ? get_secmap_low_ptr(a)
621 : get_secmap_high_ptr(a));
622 }
623
get_secmap_for_reading_low(Addr a)624 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
625 {
626 return *get_secmap_low_ptr(a);
627 }
628
get_secmap_for_reading_high(Addr a)629 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
630 {
631 return *get_secmap_high_ptr(a);
632 }
633
get_secmap_for_writing_low(Addr a)634 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
635 {
636 SecMap** p = get_secmap_low_ptr(a);
637 if (UNLIKELY(is_distinguished_sm(*p)))
638 *p = copy_for_writing(*p);
639 return *p;
640 }
641
get_secmap_for_writing_high(Addr a)642 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
643 {
644 SecMap** p = get_secmap_high_ptr(a);
645 if (UNLIKELY(is_distinguished_sm(*p)))
646 *p = copy_for_writing(*p);
647 return *p;
648 }
649
650 /* Produce the secmap for 'a', either from the primary map or by
651 ensuring there is an entry for it in the aux primary map. The
652 secmap may be a distinguished one as the caller will only want to
653 be able to read it.
654 */
get_secmap_for_reading(Addr a)655 static INLINE SecMap* get_secmap_for_reading ( Addr a )
656 {
657 return ( a <= MAX_PRIMARY_ADDRESS
658 ? get_secmap_for_reading_low (a)
659 : get_secmap_for_reading_high(a) );
660 }
661
662 /* Produce the secmap for 'a', either from the primary map or by
663 ensuring there is an entry for it in the aux primary map. The
664 secmap may not be a distinguished one, since the caller will want
665 to be able to write it. If it is a distinguished secondary, make a
666 writable copy of it, install it, and return the copy instead. (COW
667 semantics).
668 */
get_secmap_for_writing(Addr a)669 static SecMap* get_secmap_for_writing ( Addr a )
670 {
671 return ( a <= MAX_PRIMARY_ADDRESS
672 ? get_secmap_for_writing_low (a)
673 : get_secmap_for_writing_high(a) );
674 }
675
676 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
677 allocate one if one doesn't already exist. This is used by the
678 leak checker.
679 */
maybe_get_secmap_for(Addr a)680 static SecMap* maybe_get_secmap_for ( Addr a )
681 {
682 if (a <= MAX_PRIMARY_ADDRESS) {
683 return get_secmap_for_reading_low(a);
684 } else {
685 AuxMapEnt* am = maybe_find_in_auxmap(a);
686 return am ? am->sm : NULL;
687 }
688 }
689
690 /* --------------- Fundamental functions --------------- */
691
692 static INLINE
insert_vabits2_into_vabits8(Addr a,UChar vabits2,UChar * vabits8)693 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
694 {
695 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
696 *vabits8 &= ~(0x3 << shift); // mask out the two old bits
697 *vabits8 |= (vabits2 << shift); // mask in the two new bits
698 }
699
700 static INLINE
insert_vabits4_into_vabits8(Addr a,UChar vabits4,UChar * vabits8)701 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
702 {
703 UInt shift;
704 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
705 shift = (a & 2) << 1; // shift by 0 or 4
706 *vabits8 &= ~(0xf << shift); // mask out the four old bits
707 *vabits8 |= (vabits4 << shift); // mask in the four new bits
708 }
709
710 static INLINE
extract_vabits2_from_vabits8(Addr a,UChar vabits8)711 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
712 {
713 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
714 vabits8 >>= shift; // shift the two bits to the bottom
715 return 0x3 & vabits8; // mask out the rest
716 }
717
718 static INLINE
extract_vabits4_from_vabits8(Addr a,UChar vabits8)719 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
720 {
721 UInt shift;
722 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
723 shift = (a & 2) << 1; // shift by 0 or 4
724 vabits8 >>= shift; // shift the four bits to the bottom
725 return 0xf & vabits8; // mask out the rest
726 }
727
728 // Note that these four are only used in slow cases. The fast cases do
729 // clever things like combine the auxmap check (in
730 // get_secmap_{read,writ}able) with alignment checks.
731
732 // *** WARNING! ***
733 // Any time this function is called, if it is possible that vabits2
734 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
735 // sec-V-bits table must also be set!
736 static INLINE
set_vabits2(Addr a,UChar vabits2)737 void set_vabits2 ( Addr a, UChar vabits2 )
738 {
739 SecMap* sm = get_secmap_for_writing(a);
740 UWord sm_off = SM_OFF(a);
741 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
742 }
743
744 static INLINE
get_vabits2(Addr a)745 UChar get_vabits2 ( Addr a )
746 {
747 SecMap* sm = get_secmap_for_reading(a);
748 UWord sm_off = SM_OFF(a);
749 UChar vabits8 = sm->vabits8[sm_off];
750 return extract_vabits2_from_vabits8(a, vabits8);
751 }
752
753 // *** WARNING! ***
754 // Any time this function is called, if it is possible that any of the
755 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
756 // corresponding entry(s) in the sec-V-bits table must also be set!
757 static INLINE
get_vabits8_for_aligned_word32(Addr a)758 UChar get_vabits8_for_aligned_word32 ( Addr a )
759 {
760 SecMap* sm = get_secmap_for_reading(a);
761 UWord sm_off = SM_OFF(a);
762 UChar vabits8 = sm->vabits8[sm_off];
763 return vabits8;
764 }
765
766 static INLINE
set_vabits8_for_aligned_word32(Addr a,UChar vabits8)767 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
768 {
769 SecMap* sm = get_secmap_for_writing(a);
770 UWord sm_off = SM_OFF(a);
771 sm->vabits8[sm_off] = vabits8;
772 }
773
774
775 // Forward declarations
776 static UWord get_sec_vbits8(Addr a);
777 static void set_sec_vbits8(Addr a, UWord vbits8);
778
779 // Returns False if there was an addressability error.
780 static INLINE
set_vbits8(Addr a,UChar vbits8)781 Bool set_vbits8 ( Addr a, UChar vbits8 )
782 {
783 Bool ok = True;
784 UChar vabits2 = get_vabits2(a);
785 if ( VA_BITS2_NOACCESS != vabits2 ) {
786 // Addressable. Convert in-register format to in-memory format.
787 // Also remove any existing sec V bit entry for the byte if no
788 // longer necessary.
789 if ( V_BITS8_DEFINED == vbits8 ) { vabits2 = VA_BITS2_DEFINED; }
790 else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
791 else { vabits2 = VA_BITS2_PARTDEFINED;
792 set_sec_vbits8(a, vbits8); }
793 set_vabits2(a, vabits2);
794
795 } else {
796 // Unaddressable! Do nothing -- when writing to unaddressable
797 // memory it acts as a black hole, and the V bits can never be seen
798 // again. So we don't have to write them at all.
799 ok = False;
800 }
801 return ok;
802 }
803
804 // Returns False if there was an addressability error. In that case, we put
805 // all defined bits into vbits8.
806 static INLINE
get_vbits8(Addr a,UChar * vbits8)807 Bool get_vbits8 ( Addr a, UChar* vbits8 )
808 {
809 Bool ok = True;
810 UChar vabits2 = get_vabits2(a);
811
812 // Convert the in-memory format to in-register format.
813 if ( VA_BITS2_DEFINED == vabits2 ) { *vbits8 = V_BITS8_DEFINED; }
814 else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
815 else if ( VA_BITS2_NOACCESS == vabits2 ) {
816 *vbits8 = V_BITS8_DEFINED; // Make V bits defined!
817 ok = False;
818 } else {
819 tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
820 *vbits8 = get_sec_vbits8(a);
821 }
822 return ok;
823 }
824
825
826 /* --------------- Secondary V bit table ------------ */
827
828 // This table holds the full V bit pattern for partially-defined bytes
829 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
830 // memory.
831 //
832 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
833 // then overwrite the same address with a fully defined byte, the sec-V-bit
834 // node will not necessarily be removed. This is because checking for
835 // whether removal is necessary would slow down the fast paths.
836 //
837 // To avoid the stale nodes building up too much, we periodically (once the
838 // table reaches a certain size) garbage collect (GC) the table by
839 // traversing it and evicting any "sufficiently stale" nodes, ie. nodes that
840 // are stale and haven't been touched for a certain number of collections.
841 // If more than a certain proportion of nodes survived, we increase the
842 // table size so that GCs occur less often.
843 //
844 // (So this a bit different to a traditional GC, where you definitely want
845 // to remove any dead nodes. It's more like we have a resizable cache and
846 // we're trying to find the right balance how many elements to evict and how
847 // big to make the cache.)
848 //
849 // This policy is designed to avoid bad table bloat in the worst case where
850 // a program creates huge numbers of stale PDBs -- we would get this bloat
851 // if we had no GC -- while handling well the case where a node becomes
852 // stale but shortly afterwards is rewritten with a PDB and so becomes
853 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
854 // remove all stale nodes as soon as possible, we just end up re-adding a
855 // lot of them in later again. The "sufficiently stale" approach avoids
856 // this. (If a program has many live PDBs, performance will just suck,
857 // there's no way around that.)
858
859 static OSet* secVBitTable;
860
861 // Stats
862 static ULong sec_vbits_new_nodes = 0;
863 static ULong sec_vbits_updates = 0;
864
865 // This must be a power of two; this is checked in mc_pre_clo_init().
866 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
867 // a larger address range) they take more space but we can get multiple
868 // partially-defined bytes in one if they are close to each other, reducing
869 // the number of total nodes. In practice sometimes they are clustered (eg.
870 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
871 // row), but often not. So we choose something intermediate.
872 #define BYTES_PER_SEC_VBIT_NODE 16
873
874 // We make the table bigger if more than this many nodes survive a GC.
875 #define MAX_SURVIVOR_PROPORTION 0.5
876
877 // Each time we make the table bigger, we increase it by this much.
878 #define TABLE_GROWTH_FACTOR 2
879
880 // This defines "sufficiently stale" -- any node that hasn't been touched in
881 // this many GCs will be removed.
882 #define MAX_STALE_AGE 2
883
884 // We GC the table when it gets this many nodes in it, ie. it's effectively
885 // the table size. It can change.
886 static Int secVBitLimit = 1024;
887
888 // The number of GCs done, used to age sec-V-bit nodes for eviction.
889 // Because it's unsigned, wrapping doesn't matter -- the right answer will
890 // come out anyway.
891 static UInt GCs_done = 0;
892
893 typedef
894 struct {
895 Addr a;
896 UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
897 UInt last_touched;
898 }
899 SecVBitNode;
900
createSecVBitTable(void)901 static OSet* createSecVBitTable(void)
902 {
903 return VG_(OSetGen_Create)( offsetof(SecVBitNode, a),
904 NULL, // use fast comparisons
905 VG_(malloc), "mc.cSVT.1 (sec VBit table)",
906 VG_(free) );
907 }
908
gcSecVBitTable(void)909 static void gcSecVBitTable(void)
910 {
911 OSet* secVBitTable2;
912 SecVBitNode* n;
913 Int i, n_nodes = 0, n_survivors = 0;
914
915 GCs_done++;
916
917 // Create the new table.
918 secVBitTable2 = createSecVBitTable();
919
920 // Traverse the table, moving fresh nodes into the new table.
921 VG_(OSetGen_ResetIter)(secVBitTable);
922 while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
923 Bool keep = False;
924 if ( (GCs_done - n->last_touched) <= MAX_STALE_AGE ) {
925 // Keep node if it's been touched recently enough (regardless of
926 // freshness/staleness).
927 keep = True;
928 } else {
929 // Keep node if any of its bytes are non-stale. Using
930 // get_vabits2() for the lookup is not very efficient, but I don't
931 // think it matters.
932 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
933 if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
934 keep = True; // Found a non-stale byte, so keep
935 break;
936 }
937 }
938 }
939
940 if ( keep ) {
941 // Insert a copy of the node into the new table.
942 SecVBitNode* n2 =
943 VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
944 *n2 = *n;
945 VG_(OSetGen_Insert)(secVBitTable2, n2);
946 }
947 }
948
949 // Get the before and after sizes.
950 n_nodes = VG_(OSetGen_Size)(secVBitTable);
951 n_survivors = VG_(OSetGen_Size)(secVBitTable2);
952
953 // Destroy the old table, and put the new one in its place.
954 VG_(OSetGen_Destroy)(secVBitTable);
955 secVBitTable = secVBitTable2;
956
957 if (VG_(clo_verbosity) > 1) {
958 Char percbuf[6];
959 VG_(percentify)(n_survivors, n_nodes, 1, 6, percbuf);
960 VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%s)\n",
961 n_nodes, n_survivors, percbuf);
962 }
963
964 // Increase table size if necessary.
965 if (n_survivors > (secVBitLimit * MAX_SURVIVOR_PROPORTION)) {
966 secVBitLimit *= TABLE_GROWTH_FACTOR;
967 if (VG_(clo_verbosity) > 1)
968 VG_(message)(Vg_DebugMsg, "memcheck GC: increase table size to %d\n",
969 secVBitLimit);
970 }
971 }
972
get_sec_vbits8(Addr a)973 static UWord get_sec_vbits8(Addr a)
974 {
975 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
976 Int amod = a % BYTES_PER_SEC_VBIT_NODE;
977 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
978 UChar vbits8;
979 tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
980 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
981 // make it to the secondary V bits table.
982 vbits8 = n->vbits8[amod];
983 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
984 return vbits8;
985 }
986
set_sec_vbits8(Addr a,UWord vbits8)987 static void set_sec_vbits8(Addr a, UWord vbits8)
988 {
989 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
990 Int i, amod = a % BYTES_PER_SEC_VBIT_NODE;
991 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
992 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
993 // make it to the secondary V bits table.
994 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
995 if (n) {
996 n->vbits8[amod] = vbits8; // update
997 n->last_touched = GCs_done;
998 sec_vbits_updates++;
999 } else {
1000 // New node: assign the specific byte, make the rest invalid (they
1001 // should never be read as-is, but be cautious).
1002 n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1003 n->a = aAligned;
1004 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1005 n->vbits8[i] = V_BITS8_UNDEFINED;
1006 }
1007 n->vbits8[amod] = vbits8;
1008 n->last_touched = GCs_done;
1009
1010 // Do a table GC if necessary. Nb: do this before inserting the new
1011 // node, to avoid erroneously GC'ing the new node.
1012 if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1013 gcSecVBitTable();
1014 }
1015
1016 // Insert the new node.
1017 VG_(OSetGen_Insert)(secVBitTable, n);
1018 sec_vbits_new_nodes++;
1019
1020 n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1021 if (n_secVBit_nodes > max_secVBit_nodes)
1022 max_secVBit_nodes = n_secVBit_nodes;
1023 }
1024 }
1025
1026 /* --------------- Endianness helpers --------------- */
1027
1028 /* Returns the offset in memory of the byteno-th most significant byte
1029 in a wordszB-sized word, given the specified endianness. */
byte_offset_w(UWord wordszB,Bool bigendian,UWord byteno)1030 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1031 UWord byteno ) {
1032 return bigendian ? (wordszB-1-byteno) : byteno;
1033 }
1034
1035
1036 /* --------------- Ignored address ranges --------------- */
1037
1038 #define M_IGNORE_RANGES 4
1039
1040 typedef
1041 struct {
1042 Int used;
1043 Addr start[M_IGNORE_RANGES];
1044 Addr end[M_IGNORE_RANGES];
1045 }
1046 IgnoreRanges;
1047
1048 static IgnoreRanges ignoreRanges;
1049
MC_(in_ignored_range)1050 INLINE Bool MC_(in_ignored_range) ( Addr a )
1051 {
1052 Int i;
1053 if (LIKELY(ignoreRanges.used == 0))
1054 return False;
1055 for (i = 0; i < ignoreRanges.used; i++) {
1056 if (a >= ignoreRanges.start[i] && a < ignoreRanges.end[i])
1057 return True;
1058 }
1059 return False;
1060 }
1061
1062 /* Parse two Addr separated by a dash, or fail. */
1063
parse_range(UChar ** ppc,Addr * result1,Addr * result2)1064 static Bool parse_range ( UChar** ppc, Addr* result1, Addr* result2 )
1065 {
1066 Bool ok = VG_(parse_Addr) (ppc, result1);
1067 if (!ok)
1068 return False;
1069 if (**ppc != '-')
1070 return False;
1071 (*ppc)++;
1072 ok = VG_(parse_Addr) (ppc, result2);
1073 if (!ok)
1074 return False;
1075 return True;
1076 }
1077
1078 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1079 fail. */
1080
parse_ignore_ranges(UChar * str0)1081 static Bool parse_ignore_ranges ( UChar* str0 )
1082 {
1083 Addr start, end;
1084 Bool ok;
1085 UChar* str = str0;
1086 UChar** ppc = &str;
1087 ignoreRanges.used = 0;
1088 while (1) {
1089 ok = parse_range(ppc, &start, &end);
1090 if (!ok)
1091 return False;
1092 if (ignoreRanges.used >= M_IGNORE_RANGES)
1093 return False;
1094 ignoreRanges.start[ignoreRanges.used] = start;
1095 ignoreRanges.end[ignoreRanges.used] = end;
1096 ignoreRanges.used++;
1097 if (**ppc == 0)
1098 return True;
1099 if (**ppc != ',')
1100 return False;
1101 (*ppc)++;
1102 }
1103 /*NOTREACHED*/
1104 return False;
1105 }
1106
1107
1108 /* --------------- Load/store slow cases. --------------- */
1109
1110 static
1111 __attribute__((noinline))
mc_LOADVn_slow(Addr a,SizeT nBits,Bool bigendian)1112 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1113 {
1114 /* Make up a 64-bit result V word, which contains the loaded data for
1115 valid addresses and Defined for invalid addresses. Iterate over
1116 the bytes in the word, from the most significant down to the
1117 least. */
1118 ULong vbits64 = V_BITS64_UNDEFINED;
1119 SizeT szB = nBits / 8;
1120 SSizeT i; // Must be signed.
1121 SizeT n_addrs_bad = 0;
1122 Addr ai;
1123 Bool partial_load_exemption_applies;
1124 UChar vbits8;
1125 Bool ok;
1126
1127 PROF_EVENT(30, "mc_LOADVn_slow");
1128
1129 /* ------------ BEGIN semi-fast cases ------------ */
1130 /* These deal quickly-ish with the common auxiliary primary map
1131 cases on 64-bit platforms. Are merely a speedup hack; can be
1132 omitted without loss of correctness/functionality. Note that in
1133 both cases the "sizeof(void*) == 8" causes these cases to be
1134 folded out by compilers on 32-bit platforms. These are derived
1135 from LOADV64 and LOADV32.
1136 */
1137 if (LIKELY(sizeof(void*) == 8
1138 && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1139 SecMap* sm = get_secmap_for_reading(a);
1140 UWord sm_off16 = SM_OFF_16(a);
1141 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1142 if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1143 return V_BITS64_DEFINED;
1144 if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1145 return V_BITS64_UNDEFINED;
1146 /* else fall into the slow case */
1147 }
1148 if (LIKELY(sizeof(void*) == 8
1149 && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1150 SecMap* sm = get_secmap_for_reading(a);
1151 UWord sm_off = SM_OFF(a);
1152 UWord vabits8 = sm->vabits8[sm_off];
1153 if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1154 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1155 if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1156 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1157 /* else fall into slow case */
1158 }
1159 /* ------------ END semi-fast cases ------------ */
1160
1161 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1162
1163 for (i = szB-1; i >= 0; i--) {
1164 PROF_EVENT(31, "mc_LOADVn_slow(loop)");
1165 ai = a + byte_offset_w(szB, bigendian, i);
1166 ok = get_vbits8(ai, &vbits8);
1167 if (!ok) n_addrs_bad++;
1168 vbits64 <<= 8;
1169 vbits64 |= vbits8;
1170 }
1171
1172 /* This is a hack which avoids producing errors for code which
1173 insists in stepping along byte strings in aligned word-sized
1174 chunks, and there is a partially defined word at the end. (eg,
1175 optimised strlen). Such code is basically broken at least WRT
1176 semantics of ANSI C, but sometimes users don't have the option
1177 to fix it, and so this option is provided. Note it is now
1178 defaulted to not-engaged.
1179
1180 A load from a partially-addressible place is allowed if:
1181 - the command-line flag is set
1182 - it's a word-sized, word-aligned load
1183 - at least one of the addresses in the word *is* valid
1184 */
1185 partial_load_exemption_applies
1186 = MC_(clo_partial_loads_ok) && szB == VG_WORDSIZE
1187 && VG_IS_WORD_ALIGNED(a)
1188 && n_addrs_bad < VG_WORDSIZE;
1189
1190 if (n_addrs_bad > 0 && !partial_load_exemption_applies)
1191 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1192
1193 return vbits64;
1194 }
1195
1196
1197 static
1198 __attribute__((noinline))
mc_STOREVn_slow(Addr a,SizeT nBits,ULong vbytes,Bool bigendian)1199 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1200 {
1201 SizeT szB = nBits / 8;
1202 SizeT i, n_addrs_bad = 0;
1203 UChar vbits8;
1204 Addr ai;
1205 Bool ok;
1206
1207 PROF_EVENT(35, "mc_STOREVn_slow");
1208
1209 /* ------------ BEGIN semi-fast cases ------------ */
1210 /* These deal quickly-ish with the common auxiliary primary map
1211 cases on 64-bit platforms. Are merely a speedup hack; can be
1212 omitted without loss of correctness/functionality. Note that in
1213 both cases the "sizeof(void*) == 8" causes these cases to be
1214 folded out by compilers on 32-bit platforms. These are derived
1215 from STOREV64 and STOREV32.
1216 */
1217 if (LIKELY(sizeof(void*) == 8
1218 && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1219 SecMap* sm = get_secmap_for_reading(a);
1220 UWord sm_off16 = SM_OFF_16(a);
1221 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1222 if (LIKELY( !is_distinguished_sm(sm) &&
1223 (VA_BITS16_DEFINED == vabits16 ||
1224 VA_BITS16_UNDEFINED == vabits16) )) {
1225 /* Handle common case quickly: a is suitably aligned, */
1226 /* is mapped, and is addressible. */
1227 // Convert full V-bits in register to compact 2-bit form.
1228 if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1229 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
1230 return;
1231 } else if (V_BITS64_UNDEFINED == vbytes) {
1232 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
1233 return;
1234 }
1235 /* else fall into the slow case */
1236 }
1237 /* else fall into the slow case */
1238 }
1239 if (LIKELY(sizeof(void*) == 8
1240 && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1241 SecMap* sm = get_secmap_for_reading(a);
1242 UWord sm_off = SM_OFF(a);
1243 UWord vabits8 = sm->vabits8[sm_off];
1244 if (LIKELY( !is_distinguished_sm(sm) &&
1245 (VA_BITS8_DEFINED == vabits8 ||
1246 VA_BITS8_UNDEFINED == vabits8) )) {
1247 /* Handle common case quickly: a is suitably aligned, */
1248 /* is mapped, and is addressible. */
1249 // Convert full V-bits in register to compact 2-bit form.
1250 if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1251 sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1252 return;
1253 } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1254 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1255 return;
1256 }
1257 /* else fall into the slow case */
1258 }
1259 /* else fall into the slow case */
1260 }
1261 /* ------------ END semi-fast cases ------------ */
1262
1263 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1264
1265 /* Dump vbytes in memory, iterating from least to most significant
1266 byte. At the same time establish addressibility of the location. */
1267 for (i = 0; i < szB; i++) {
1268 PROF_EVENT(36, "mc_STOREVn_slow(loop)");
1269 ai = a + byte_offset_w(szB, bigendian, i);
1270 vbits8 = vbytes & 0xff;
1271 ok = set_vbits8(ai, vbits8);
1272 if (!ok) n_addrs_bad++;
1273 vbytes >>= 8;
1274 }
1275
1276 /* If an address error has happened, report it. */
1277 if (n_addrs_bad > 0)
1278 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1279 }
1280
1281
1282 /*------------------------------------------------------------*/
1283 /*--- Setting permissions over address ranges. ---*/
1284 /*------------------------------------------------------------*/
1285
set_address_range_perms(Addr a,SizeT lenT,UWord vabits16,UWord dsm_num)1286 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1287 UWord dsm_num )
1288 {
1289 UWord sm_off, sm_off16;
1290 UWord vabits2 = vabits16 & 0x3;
1291 SizeT lenA, lenB, len_to_next_secmap;
1292 Addr aNext;
1293 SecMap* sm;
1294 SecMap** sm_ptr;
1295 SecMap* example_dsm;
1296
1297 PROF_EVENT(150, "set_address_range_perms");
1298
1299 /* Check the V+A bits make sense. */
1300 tl_assert(VA_BITS16_NOACCESS == vabits16 ||
1301 VA_BITS16_UNDEFINED == vabits16 ||
1302 VA_BITS16_DEFINED == vabits16);
1303
1304 // This code should never write PDBs; ensure this. (See comment above
1305 // set_vabits2().)
1306 tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1307
1308 if (lenT == 0)
1309 return;
1310
1311 if (lenT > 256 * 1024 * 1024) {
1312 if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1313 Char* s = "unknown???";
1314 if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1315 if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1316 if (vabits16 == VA_BITS16_DEFINED ) s = "defined";
1317 VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1318 "large range [0x%lx, 0x%lx) (%s)\n",
1319 a, a + lenT, s);
1320 }
1321 }
1322
1323 #ifndef PERF_FAST_SARP
1324 /*------------------ debug-only case ------------------ */
1325 {
1326 // Endianness doesn't matter here because all bytes are being set to
1327 // the same value.
1328 // Nb: We don't have to worry about updating the sec-V-bits table
1329 // after these set_vabits2() calls because this code never writes
1330 // VA_BITS2_PARTDEFINED values.
1331 SizeT i;
1332 for (i = 0; i < lenT; i++) {
1333 set_vabits2(a + i, vabits2);
1334 }
1335 return;
1336 }
1337 #endif
1338
1339 /*------------------ standard handling ------------------ */
1340
1341 /* Get the distinguished secondary that we might want
1342 to use (part of the space-compression scheme). */
1343 example_dsm = &sm_distinguished[dsm_num];
1344
1345 // We have to handle ranges covering various combinations of partial and
1346 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1347 // Cases marked with a '*' are common.
1348 //
1349 // TYPE PARTS USED
1350 // ---- ----------
1351 // * one partial sec-map (p) 1
1352 // - one whole sec-map (P) 2
1353 //
1354 // * two partial sec-maps (pp) 1,3
1355 // - one partial, one whole sec-map (pP) 1,2
1356 // - one whole, one partial sec-map (Pp) 2,3
1357 // - two whole sec-maps (PP) 2,2
1358 //
1359 // * one partial, one whole, one partial (pPp) 1,2,3
1360 // - one partial, two whole (pPP) 1,2,2
1361 // - two whole, one partial (PPp) 2,2,3
1362 // - three whole (PPP) 2,2,2
1363 //
1364 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1365 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1366 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1367 // - N whole (PP...PP) 2,2...2,3
1368
1369 // Break up total length (lenT) into two parts: length in the first
1370 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1371 aNext = start_of_this_sm(a) + SM_SIZE;
1372 len_to_next_secmap = aNext - a;
1373 if ( lenT <= len_to_next_secmap ) {
1374 // Range entirely within one sec-map. Covers almost all cases.
1375 PROF_EVENT(151, "set_address_range_perms-single-secmap");
1376 lenA = lenT;
1377 lenB = 0;
1378 } else if (is_start_of_sm(a)) {
1379 // Range spans at least one whole sec-map, and starts at the beginning
1380 // of a sec-map; skip to Part 2.
1381 PROF_EVENT(152, "set_address_range_perms-startof-secmap");
1382 lenA = 0;
1383 lenB = lenT;
1384 goto part2;
1385 } else {
1386 // Range spans two or more sec-maps, first one is partial.
1387 PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
1388 lenA = len_to_next_secmap;
1389 lenB = lenT - lenA;
1390 }
1391
1392 //------------------------------------------------------------------------
1393 // Part 1: Deal with the first sec_map. Most of the time the range will be
1394 // entirely within a sec_map and this part alone will suffice. Also,
1395 // doing it this way lets us avoid repeatedly testing for the crossing of
1396 // a sec-map boundary within these loops.
1397 //------------------------------------------------------------------------
1398
1399 // If it's distinguished, make it undistinguished if necessary.
1400 sm_ptr = get_secmap_ptr(a);
1401 if (is_distinguished_sm(*sm_ptr)) {
1402 if (*sm_ptr == example_dsm) {
1403 // Sec-map already has the V+A bits that we want, so skip.
1404 PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
1405 a = aNext;
1406 lenA = 0;
1407 } else {
1408 PROF_EVENT(155, "set_address_range_perms-dist-sm1");
1409 *sm_ptr = copy_for_writing(*sm_ptr);
1410 }
1411 }
1412 sm = *sm_ptr;
1413
1414 // 1 byte steps
1415 while (True) {
1416 if (VG_IS_8_ALIGNED(a)) break;
1417 if (lenA < 1) break;
1418 PROF_EVENT(156, "set_address_range_perms-loop1a");
1419 sm_off = SM_OFF(a);
1420 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1421 a += 1;
1422 lenA -= 1;
1423 }
1424 // 8-aligned, 8 byte steps
1425 while (True) {
1426 if (lenA < 8) break;
1427 PROF_EVENT(157, "set_address_range_perms-loop8a");
1428 sm_off16 = SM_OFF_16(a);
1429 ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1430 a += 8;
1431 lenA -= 8;
1432 }
1433 // 1 byte steps
1434 while (True) {
1435 if (lenA < 1) break;
1436 PROF_EVENT(158, "set_address_range_perms-loop1b");
1437 sm_off = SM_OFF(a);
1438 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1439 a += 1;
1440 lenA -= 1;
1441 }
1442
1443 // We've finished the first sec-map. Is that it?
1444 if (lenB == 0)
1445 return;
1446
1447 //------------------------------------------------------------------------
1448 // Part 2: Fast-set entire sec-maps at a time.
1449 //------------------------------------------------------------------------
1450 part2:
1451 // 64KB-aligned, 64KB steps.
1452 // Nb: we can reach here with lenB < SM_SIZE
1453 tl_assert(0 == lenA);
1454 while (True) {
1455 if (lenB < SM_SIZE) break;
1456 tl_assert(is_start_of_sm(a));
1457 PROF_EVENT(159, "set_address_range_perms-loop64K");
1458 sm_ptr = get_secmap_ptr(a);
1459 if (!is_distinguished_sm(*sm_ptr)) {
1460 PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
1461 // Free the non-distinguished sec-map that we're replacing. This
1462 // case happens moderately often, enough to be worthwhile.
1463 VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1464 }
1465 update_SM_counts(*sm_ptr, example_dsm);
1466 // Make the sec-map entry point to the example DSM
1467 *sm_ptr = example_dsm;
1468 lenB -= SM_SIZE;
1469 a += SM_SIZE;
1470 }
1471
1472 // We've finished the whole sec-maps. Is that it?
1473 if (lenB == 0)
1474 return;
1475
1476 //------------------------------------------------------------------------
1477 // Part 3: Finish off the final partial sec-map, if necessary.
1478 //------------------------------------------------------------------------
1479
1480 tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1481
1482 // If it's distinguished, make it undistinguished if necessary.
1483 sm_ptr = get_secmap_ptr(a);
1484 if (is_distinguished_sm(*sm_ptr)) {
1485 if (*sm_ptr == example_dsm) {
1486 // Sec-map already has the V+A bits that we want, so stop.
1487 PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
1488 return;
1489 } else {
1490 PROF_EVENT(162, "set_address_range_perms-dist-sm2");
1491 *sm_ptr = copy_for_writing(*sm_ptr);
1492 }
1493 }
1494 sm = *sm_ptr;
1495
1496 // 8-aligned, 8 byte steps
1497 while (True) {
1498 if (lenB < 8) break;
1499 PROF_EVENT(163, "set_address_range_perms-loop8b");
1500 sm_off16 = SM_OFF_16(a);
1501 ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1502 a += 8;
1503 lenB -= 8;
1504 }
1505 // 1 byte steps
1506 while (True) {
1507 if (lenB < 1) return;
1508 PROF_EVENT(164, "set_address_range_perms-loop1c");
1509 sm_off = SM_OFF(a);
1510 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1511 a += 1;
1512 lenB -= 1;
1513 }
1514 }
1515
1516
1517 /* --- Set permissions for arbitrary address ranges --- */
1518
MC_(make_mem_noaccess)1519 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1520 {
1521 PROF_EVENT(40, "MC_(make_mem_noaccess)");
1522 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1523 set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1524 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1525 ocache_sarp_Clear_Origins ( a, len );
1526 }
1527
make_mem_undefined(Addr a,SizeT len)1528 static void make_mem_undefined ( Addr a, SizeT len )
1529 {
1530 PROF_EVENT(41, "make_mem_undefined");
1531 DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1532 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1533 }
1534
MC_(make_mem_undefined_w_otag)1535 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1536 {
1537 PROF_EVENT(41, "MC_(make_mem_undefined)");
1538 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1539 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1540 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1541 ocache_sarp_Set_Origins ( a, len, otag );
1542 }
1543
1544 static
make_mem_undefined_w_tid_and_okind(Addr a,SizeT len,ThreadId tid,UInt okind)1545 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1546 ThreadId tid, UInt okind )
1547 {
1548 UInt ecu;
1549 ExeContext* here;
1550 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1551 if it is invalid. So no need to do it here. */
1552 tl_assert(okind <= 3);
1553 here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1554 tl_assert(here);
1555 ecu = VG_(get_ECU_from_ExeContext)(here);
1556 tl_assert(VG_(is_plausible_ECU)(ecu));
1557 MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1558 }
1559
1560 static
make_mem_undefined_w_tid(Addr a,SizeT len,ThreadId tid)1561 void make_mem_undefined_w_tid ( Addr a, SizeT len, ThreadId tid ) {
1562 make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1563 }
1564
1565
MC_(make_mem_defined)1566 void MC_(make_mem_defined) ( Addr a, SizeT len )
1567 {
1568 PROF_EVENT(42, "MC_(make_mem_defined)");
1569 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1570 set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1571 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1572 ocache_sarp_Clear_Origins ( a, len );
1573 }
1574
1575 /* For each byte in [a,a+len), if the byte is addressable, make it be
1576 defined, but if it isn't addressible, leave it alone. In other
1577 words a version of MC_(make_mem_defined) that doesn't mess with
1578 addressibility. Low-performance implementation. */
make_mem_defined_if_addressable(Addr a,SizeT len)1579 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1580 {
1581 SizeT i;
1582 UChar vabits2;
1583 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1584 for (i = 0; i < len; i++) {
1585 vabits2 = get_vabits2( a+i );
1586 if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1587 set_vabits2(a+i, VA_BITS2_DEFINED);
1588 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1589 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1590 }
1591 }
1592 }
1593 }
1594
1595 /* Similarly (needed for mprotect handling ..) */
make_mem_defined_if_noaccess(Addr a,SizeT len)1596 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1597 {
1598 SizeT i;
1599 UChar vabits2;
1600 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1601 for (i = 0; i < len; i++) {
1602 vabits2 = get_vabits2( a+i );
1603 if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1604 set_vabits2(a+i, VA_BITS2_DEFINED);
1605 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1606 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1607 }
1608 }
1609 }
1610 }
1611
1612 /* --- Block-copy permissions (needed for implementing realloc() and
1613 sys_mremap). --- */
1614
MC_(copy_address_range_state)1615 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
1616 {
1617 SizeT i, j;
1618 UChar vabits2, vabits8;
1619 Bool aligned, nooverlap;
1620
1621 DEBUG("MC_(copy_address_range_state)\n");
1622 PROF_EVENT(50, "MC_(copy_address_range_state)");
1623
1624 if (len == 0 || src == dst)
1625 return;
1626
1627 aligned = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
1628 nooverlap = src+len <= dst || dst+len <= src;
1629
1630 if (nooverlap && aligned) {
1631
1632 /* Vectorised fast case, when no overlap and suitably aligned */
1633 /* vector loop */
1634 i = 0;
1635 while (len >= 4) {
1636 vabits8 = get_vabits8_for_aligned_word32( src+i );
1637 set_vabits8_for_aligned_word32( dst+i, vabits8 );
1638 if (LIKELY(VA_BITS8_DEFINED == vabits8
1639 || VA_BITS8_UNDEFINED == vabits8
1640 || VA_BITS8_NOACCESS == vabits8)) {
1641 /* do nothing */
1642 } else {
1643 /* have to copy secondary map info */
1644 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
1645 set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
1646 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
1647 set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
1648 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
1649 set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
1650 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
1651 set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
1652 }
1653 i += 4;
1654 len -= 4;
1655 }
1656 /* fixup loop */
1657 while (len >= 1) {
1658 vabits2 = get_vabits2( src+i );
1659 set_vabits2( dst+i, vabits2 );
1660 if (VA_BITS2_PARTDEFINED == vabits2) {
1661 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1662 }
1663 i++;
1664 len--;
1665 }
1666
1667 } else {
1668
1669 /* We have to do things the slow way */
1670 if (src < dst) {
1671 for (i = 0, j = len-1; i < len; i++, j--) {
1672 PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
1673 vabits2 = get_vabits2( src+j );
1674 set_vabits2( dst+j, vabits2 );
1675 if (VA_BITS2_PARTDEFINED == vabits2) {
1676 set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
1677 }
1678 }
1679 }
1680
1681 if (src > dst) {
1682 for (i = 0; i < len; i++) {
1683 PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
1684 vabits2 = get_vabits2( src+i );
1685 set_vabits2( dst+i, vabits2 );
1686 if (VA_BITS2_PARTDEFINED == vabits2) {
1687 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1688 }
1689 }
1690 }
1691 }
1692
1693 }
1694
1695
1696 /*------------------------------------------------------------*/
1697 /*--- Origin tracking stuff - cache basics ---*/
1698 /*------------------------------------------------------------*/
1699
1700 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
1701 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1702
1703 Note that this implementation draws inspiration from the "origin
1704 tracking by value piggybacking" scheme described in "Tracking Bad
1705 Apples: Reporting the Origin of Null and Undefined Value Errors"
1706 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
1707 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
1708 implemented completely differently.
1709
1710 Origin tags and ECUs -- about the shadow values
1711 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1712
1713 This implementation tracks the defining point of all uninitialised
1714 values using so called "origin tags", which are 32-bit integers,
1715 rather than using the values themselves to encode the origins. The
1716 latter, so-called value piggybacking", is what the OOPSLA07 paper
1717 describes.
1718
1719 Origin tags, as tracked by the machinery below, are 32-bit unsigned
1720 ints (UInts), regardless of the machine's word size. Each tag
1721 comprises an upper 30-bit ECU field and a lower 2-bit
1722 'kind' field. The ECU field is a number given out by m_execontext
1723 and has a 1-1 mapping with ExeContext*s. An ECU can be used
1724 directly as an origin tag (otag), but in fact we want to put
1725 additional information 'kind' field to indicate roughly where the
1726 tag came from. This helps print more understandable error messages
1727 for the user -- it has no other purpose. In summary:
1728
1729 * Both ECUs and origin tags are represented as 32-bit words
1730
1731 * m_execontext and the core-tool interface deal purely in ECUs.
1732 They have no knowledge of origin tags - that is a purely
1733 Memcheck-internal matter.
1734
1735 * all valid ECUs have the lowest 2 bits zero and at least
1736 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
1737
1738 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
1739 constants defined in mc_include.h.
1740
1741 * to convert an otag back to an ECU, AND it with ~3
1742
1743 One important fact is that no valid otag is zero. A zero otag is
1744 used by the implementation to indicate "no origin", which could
1745 mean that either the value is defined, or it is undefined but the
1746 implementation somehow managed to lose the origin.
1747
1748 The ECU used for memory created by malloc etc is derived from the
1749 stack trace at the time the malloc etc happens. This means the
1750 mechanism can show the exact allocation point for heap-created
1751 uninitialised values.
1752
1753 In contrast, it is simply too expensive to create a complete
1754 backtrace for each stack allocation. Therefore we merely use a
1755 depth-1 backtrace for stack allocations, which can be done once at
1756 translation time, rather than N times at run time. The result of
1757 this is that, for stack created uninitialised values, Memcheck can
1758 only show the allocating function, and not what called it.
1759 Furthermore, compilers tend to move the stack pointer just once at
1760 the start of the function, to allocate all locals, and so in fact
1761 the stack origin almost always simply points to the opening brace
1762 of the function. Net result is, for stack origins, the mechanism
1763 can tell you in which function the undefined value was created, but
1764 that's all. Users will need to carefully check all locals in the
1765 specified function.
1766
1767 Shadowing registers and memory
1768 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1769
1770 Memory is shadowed using a two level cache structure (ocacheL1 and
1771 ocacheL2). Memory references are first directed to ocacheL1. This
1772 is a traditional 2-way set associative cache with 32-byte lines and
1773 approximate LRU replacement within each set.
1774
1775 A naive implementation would require storing one 32 bit otag for
1776 each byte of memory covered, a 4:1 space overhead. Instead, there
1777 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
1778 that shows which of the 4 bytes have that shadow value and which
1779 have a shadow value of zero (indicating no origin). Hence a lot of
1780 space is saved, but the cost is that only one different origin per
1781 4 bytes of address space can be represented. This is a source of
1782 imprecision, but how much of a problem it really is remains to be
1783 seen.
1784
1785 A cache line that contains all zeroes ("no origins") contains no
1786 useful information, and can be ejected from the L1 cache "for
1787 free", in the sense that a read miss on the L1 causes a line of
1788 zeroes to be installed. However, ejecting a line containing
1789 nonzeroes risks losing origin information permanently. In order to
1790 prevent such lossage, ejected nonzero lines are placed in a
1791 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
1792 lines. This can grow arbitrarily large, and so should ensure that
1793 Memcheck runs out of memory in preference to losing useful origin
1794 info due to cache size limitations.
1795
1796 Shadowing registers is a bit tricky, because the shadow values are
1797 32 bits, regardless of the size of the register. That gives a
1798 problem for registers smaller than 32 bits. The solution is to
1799 find spaces in the guest state that are unused, and use those to
1800 shadow guest state fragments smaller than 32 bits. For example, on
1801 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
1802 shadow are allocated for the register's otag, then there are still
1803 12 bytes left over which could be used to shadow 3 other values.
1804
1805 This implies there is some non-obvious mapping from guest state
1806 (start,length) pairs to the relevant shadow offset (for the origin
1807 tags). And it is unfortunately guest-architecture specific. The
1808 mapping is contained in mc_machine.c, which is quite lengthy but
1809 straightforward.
1810
1811 Instrumenting the IR
1812 ~~~~~~~~~~~~~~~~~~~~
1813
1814 Instrumentation is largely straightforward, and done by the
1815 functions schemeE and schemeS in mc_translate.c. These generate
1816 code for handling the origin tags of expressions (E) and statements
1817 (S) respectively. The rather strange names are a reference to the
1818 "compilation schemes" shown in Simon Peyton Jones' book "The
1819 Implementation of Functional Programming Languages" (Prentice Hall,
1820 1987, see
1821 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
1822
1823 schemeS merely arranges to move shadow values around the guest
1824 state to track the incoming IR. schemeE is largely trivial too.
1825 The only significant point is how to compute the otag corresponding
1826 to binary (or ternary, quaternary, etc) operator applications. The
1827 rule is simple: just take whichever value is larger (32-bit
1828 unsigned max). Constants get the special value zero. Hence this
1829 rule always propagates a nonzero (known) otag in preference to a
1830 zero (unknown, or more likely, value-is-defined) tag, as we want.
1831 If two different undefined values are inputs to a binary operator
1832 application, then which is propagated is arbitrary, but that
1833 doesn't matter, since the program is erroneous in using either of
1834 the values, and so there's no point in attempting to propagate
1835 both.
1836
1837 Since constants are abstracted to (otag) zero, much of the
1838 instrumentation code can be folded out without difficulty by the
1839 generic post-instrumentation IR cleanup pass, using these rules:
1840 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
1841 constants is evaluated at JIT time. And the resulting dead code
1842 removal. In practice this causes surprisingly few Max32Us to
1843 survive through to backend code generation.
1844
1845 Integration with the V-bits machinery
1846 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1847
1848 This is again largely straightforward. Mostly the otag and V bits
1849 stuff are independent. The only point of interaction is when the V
1850 bits instrumenter creates a call to a helper function to report an
1851 uninitialised value error -- in that case it must first use schemeE
1852 to get hold of the origin tag expression for the value, and pass
1853 that to the helper too.
1854
1855 There is the usual stuff to do with setting address range
1856 permissions. When memory is painted undefined, we must also know
1857 the origin tag to paint with, which involves some tedious plumbing,
1858 particularly to do with the fast case stack handlers. When memory
1859 is painted defined or noaccess then the origin tags must be forced
1860 to zero.
1861
1862 One of the goals of the implementation was to ensure that the
1863 non-origin tracking mode isn't slowed down at all. To do this,
1864 various functions to do with memory permissions setting (again,
1865 mostly pertaining to the stack) are duplicated for the with- and
1866 without-otag case.
1867
1868 Dealing with stack redzones, and the NIA cache
1869 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1870
1871 This is one of the few non-obvious parts of the implementation.
1872
1873 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
1874 reserved area below the stack pointer, that can be used as scratch
1875 space by compiler generated code for functions. In the Memcheck
1876 sources this is referred to as the "stack redzone". The important
1877 thing here is that such redzones are considered volatile across
1878 function calls and returns. So Memcheck takes care to mark them as
1879 undefined for each call and return, on the afflicted platforms.
1880 Past experience shows this is essential in order to get reliable
1881 messages about uninitialised values that come from the stack.
1882
1883 So the question is, when we paint a redzone undefined, what origin
1884 tag should we use for it? Consider a function f() calling g(). If
1885 we paint the redzone using an otag derived from the ExeContext of
1886 the CALL/BL instruction in f, then any errors in g causing it to
1887 use uninitialised values that happen to lie in the redzone, will be
1888 reported as having their origin in f. Which is highly confusing.
1889
1890 The same applies for returns: if, on a return, we paint the redzone
1891 using a origin tag derived from the ExeContext of the RET/BLR
1892 instruction in g, then any later errors in f causing it to use
1893 uninitialised values in the redzone, will be reported as having
1894 their origin in g. Which is just as confusing.
1895
1896 To do it right, in both cases we need to use an origin tag which
1897 pertains to the instruction which dynamically follows the CALL/BL
1898 or RET/BLR. In short, one derived from the NIA - the "next
1899 instruction address".
1900
1901 To make this work, Memcheck's redzone-painting helper,
1902 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
1903 NIA. It converts the NIA to a 1-element ExeContext, and uses that
1904 ExeContext's ECU as the basis for the otag used to paint the
1905 redzone. The expensive part of this is converting an NIA into an
1906 ECU, since this happens once for every call and every return. So
1907 we use a simple 511-line, 2-way set associative cache
1908 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
1909 the cost out.
1910
1911 Further background comments
1912 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1913
1914 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
1915 > it really just the address of the relevant ExeContext?
1916
1917 Well, it's not the address, but a value which has a 1-1 mapping
1918 with ExeContexts, and is guaranteed not to be zero, since zero
1919 denotes (to memcheck) "unknown origin or defined value". So these
1920 UInts are just numbers starting at 4 and incrementing by 4; each
1921 ExeContext is given a number when it is created. (*** NOTE this
1922 confuses otags and ECUs; see comments above ***).
1923
1924 Making these otags 32-bit regardless of the machine's word size
1925 makes the 64-bit implementation easier (next para). And it doesn't
1926 really limit us in any way, since for the tags to overflow would
1927 require that the program somehow caused 2^30-1 different
1928 ExeContexts to be created, in which case it is probably in deep
1929 trouble. Not to mention V will have soaked up many tens of
1930 gigabytes of memory merely to store them all.
1931
1932 So having 64-bit origins doesn't really buy you anything, and has
1933 the following downsides:
1934
1935 Suppose that instead, an otag is a UWord. This would mean that, on
1936 a 64-bit target,
1937
1938 1. It becomes hard to shadow any element of guest state which is
1939 smaller than 8 bytes. To do so means you'd need to find some
1940 8-byte-sized hole in the guest state which you don't want to
1941 shadow, and use that instead to hold the otag. On ppc64, the
1942 condition code register(s) are split into 20 UChar sized pieces,
1943 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
1944 and so that would entail finding 160 bytes somewhere else in the
1945 guest state.
1946
1947 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
1948 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
1949 same) and so I had to look for 4 untracked otag-sized areas in
1950 the guest state to make that possible.
1951
1952 The same problem exists of course when origin tags are only 32
1953 bits, but it's less extreme.
1954
1955 2. (More compelling) it doubles the size of the origin shadow
1956 memory. Given that the shadow memory is organised as a fixed
1957 size cache, and that accuracy of tracking is limited by origins
1958 falling out the cache due to space conflicts, this isn't good.
1959
1960 > Another question: is the origin tracking perfect, or are there
1961 > cases where it fails to determine an origin?
1962
1963 It is imperfect for at least for the following reasons, and
1964 probably more:
1965
1966 * Insufficient capacity in the origin cache. When a line is
1967 evicted from the cache it is gone forever, and so subsequent
1968 queries for the line produce zero, indicating no origin
1969 information. Interestingly, a line containing all zeroes can be
1970 evicted "free" from the cache, since it contains no useful
1971 information, so there is scope perhaps for some cleverer cache
1972 management schemes. (*** NOTE, with the introduction of the
1973 second level origin tag cache, ocacheL2, this is no longer a
1974 problem. ***)
1975
1976 * The origin cache only stores one otag per 32-bits of address
1977 space, plus 4 bits indicating which of the 4 bytes has that tag
1978 and which are considered defined. The result is that if two
1979 undefined bytes in the same word are stored in memory, the first
1980 stored byte's origin will be lost and replaced by the origin for
1981 the second byte.
1982
1983 * Nonzero origin tags for defined values. Consider a binary
1984 operator application op(x,y). Suppose y is undefined (and so has
1985 a valid nonzero origin tag), and x is defined, but erroneously
1986 has a nonzero origin tag (defined values should have tag zero).
1987 If the erroneous tag has a numeric value greater than y's tag,
1988 then the rule for propagating origin tags though binary
1989 operations, which is simply to take the unsigned max of the two
1990 tags, will erroneously propagate x's tag rather than y's.
1991
1992 * Some obscure uses of x86/amd64 byte registers can cause lossage
1993 or confusion of origins. %AH .. %DH are treated as different
1994 from, and unrelated to, their parent registers, %EAX .. %EDX.
1995 So some wierd sequences like
1996
1997 movb undefined-value, %AH
1998 movb defined-value, %AL
1999 .. use %AX or %EAX ..
2000
2001 will cause the origin attributed to %AH to be ignored, since %AL,
2002 %AX, %EAX are treated as the same register, and %AH as a
2003 completely separate one.
2004
2005 But having said all that, it actually seems to work fairly well in
2006 practice.
2007 */
2008
2009 static UWord stats_ocacheL1_find = 0;
2010 static UWord stats_ocacheL1_found_at_1 = 0;
2011 static UWord stats_ocacheL1_found_at_N = 0;
2012 static UWord stats_ocacheL1_misses = 0;
2013 static UWord stats_ocacheL1_lossage = 0;
2014 static UWord stats_ocacheL1_movefwds = 0;
2015
2016 static UWord stats__ocacheL2_refs = 0;
2017 static UWord stats__ocacheL2_misses = 0;
2018 static UWord stats__ocacheL2_n_nodes_max = 0;
2019
2020 /* Cache of 32-bit values, one every 32 bits of address space */
2021
2022 #define OC_BITS_PER_LINE 5
2023 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2024
oc_line_offset(Addr a)2025 static INLINE UWord oc_line_offset ( Addr a ) {
2026 return (a >> 2) & (OC_W32S_PER_LINE - 1);
2027 }
is_valid_oc_tag(Addr tag)2028 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2029 return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2030 }
2031
2032 #define OC_LINES_PER_SET 2
2033
2034 #define OC_N_SET_BITS 20
2035 #define OC_N_SETS (1 << OC_N_SET_BITS)
2036
2037 /* These settings give:
2038 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2039 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2040 */
2041
2042 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2043
2044
2045 typedef
2046 struct {
2047 Addr tag;
2048 UInt w32[OC_W32S_PER_LINE];
2049 UChar descr[OC_W32S_PER_LINE];
2050 }
2051 OCacheLine;
2052
2053 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2054 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2055 and 'z' if all the represented tags are zero. */
classify_OCacheLine(OCacheLine * line)2056 static UChar classify_OCacheLine ( OCacheLine* line )
2057 {
2058 UWord i;
2059 if (line->tag == 1/*invalid*/)
2060 return 'e'; /* EMPTY */
2061 tl_assert(is_valid_oc_tag(line->tag));
2062 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2063 tl_assert(0 == ((~0xF) & line->descr[i]));
2064 if (line->w32[i] > 0 && line->descr[i] > 0)
2065 return 'n'; /* NONZERO - contains useful info */
2066 }
2067 return 'z'; /* ZERO - no useful info */
2068 }
2069
2070 typedef
2071 struct {
2072 OCacheLine line[OC_LINES_PER_SET];
2073 }
2074 OCacheSet;
2075
2076 typedef
2077 struct {
2078 OCacheSet set[OC_N_SETS];
2079 }
2080 OCache;
2081
2082 static OCache* ocacheL1 = NULL;
2083 static UWord ocacheL1_event_ctr = 0;
2084
2085 static void init_ocacheL2 ( void ); /* fwds */
init_OCache(void)2086 static void init_OCache ( void )
2087 {
2088 UWord line, set;
2089 tl_assert(MC_(clo_mc_level) >= 3);
2090 tl_assert(ocacheL1 == NULL);
2091 ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2092 if (ocacheL1 == NULL) {
2093 VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2094 sizeof(OCache) );
2095 }
2096 tl_assert(ocacheL1 != NULL);
2097 for (set = 0; set < OC_N_SETS; set++) {
2098 for (line = 0; line < OC_LINES_PER_SET; line++) {
2099 ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2100 }
2101 }
2102 init_ocacheL2();
2103 }
2104
moveLineForwards(OCacheSet * set,UWord lineno)2105 static void moveLineForwards ( OCacheSet* set, UWord lineno )
2106 {
2107 OCacheLine tmp;
2108 stats_ocacheL1_movefwds++;
2109 tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2110 tmp = set->line[lineno-1];
2111 set->line[lineno-1] = set->line[lineno];
2112 set->line[lineno] = tmp;
2113 }
2114
zeroise_OCacheLine(OCacheLine * line,Addr tag)2115 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2116 UWord i;
2117 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2118 line->w32[i] = 0; /* NO ORIGIN */
2119 line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2120 }
2121 line->tag = tag;
2122 }
2123
2124 //////////////////////////////////////////////////////////////
2125 //// OCache backing store
2126
2127 static OSet* ocacheL2 = NULL;
2128
ocacheL2_malloc(HChar * cc,SizeT szB)2129 static void* ocacheL2_malloc ( HChar* cc, SizeT szB ) {
2130 return VG_(malloc)(cc, szB);
2131 }
ocacheL2_free(void * v)2132 static void ocacheL2_free ( void* v ) {
2133 VG_(free)( v );
2134 }
2135
2136 /* Stats: # nodes currently in tree */
2137 static UWord stats__ocacheL2_n_nodes = 0;
2138
init_ocacheL2(void)2139 static void init_ocacheL2 ( void )
2140 {
2141 tl_assert(!ocacheL2);
2142 tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2143 tl_assert(0 == offsetof(OCacheLine,tag));
2144 ocacheL2
2145 = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2146 NULL, /* fast cmp */
2147 ocacheL2_malloc, "mc.ioL2", ocacheL2_free );
2148 tl_assert(ocacheL2);
2149 stats__ocacheL2_n_nodes = 0;
2150 }
2151
2152 /* Find line with the given tag in the tree, or NULL if not found. */
ocacheL2_find_tag(Addr tag)2153 static OCacheLine* ocacheL2_find_tag ( Addr tag )
2154 {
2155 OCacheLine* line;
2156 tl_assert(is_valid_oc_tag(tag));
2157 stats__ocacheL2_refs++;
2158 line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2159 return line;
2160 }
2161
2162 /* Delete the line with the given tag from the tree, if it is present, and
2163 free up the associated memory. */
ocacheL2_del_tag(Addr tag)2164 static void ocacheL2_del_tag ( Addr tag )
2165 {
2166 OCacheLine* line;
2167 tl_assert(is_valid_oc_tag(tag));
2168 stats__ocacheL2_refs++;
2169 line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2170 if (line) {
2171 VG_(OSetGen_FreeNode)(ocacheL2, line);
2172 tl_assert(stats__ocacheL2_n_nodes > 0);
2173 stats__ocacheL2_n_nodes--;
2174 }
2175 }
2176
2177 /* Add a copy of the given line to the tree. It must not already be
2178 present. */
ocacheL2_add_line(OCacheLine * line)2179 static void ocacheL2_add_line ( OCacheLine* line )
2180 {
2181 OCacheLine* copy;
2182 tl_assert(is_valid_oc_tag(line->tag));
2183 copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2184 tl_assert(copy);
2185 *copy = *line;
2186 stats__ocacheL2_refs++;
2187 VG_(OSetGen_Insert)( ocacheL2, copy );
2188 stats__ocacheL2_n_nodes++;
2189 if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2190 stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2191 }
2192
2193 ////
2194 //////////////////////////////////////////////////////////////
2195
2196 __attribute__((noinline))
find_OCacheLine_SLOW(Addr a)2197 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2198 {
2199 OCacheLine *victim, *inL2;
2200 UChar c;
2201 UWord line;
2202 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2203 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2204 UWord tag = a & tagmask;
2205 tl_assert(setno >= 0 && setno < OC_N_SETS);
2206
2207 /* we already tried line == 0; skip therefore. */
2208 for (line = 1; line < OC_LINES_PER_SET; line++) {
2209 if (ocacheL1->set[setno].line[line].tag == tag) {
2210 if (line == 1) {
2211 stats_ocacheL1_found_at_1++;
2212 } else {
2213 stats_ocacheL1_found_at_N++;
2214 }
2215 if (UNLIKELY(0 == (ocacheL1_event_ctr++
2216 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2217 moveLineForwards( &ocacheL1->set[setno], line );
2218 line--;
2219 }
2220 return &ocacheL1->set[setno].line[line];
2221 }
2222 }
2223
2224 /* A miss. Use the last slot. Implicitly this means we're
2225 ejecting the line in the last slot. */
2226 stats_ocacheL1_misses++;
2227 tl_assert(line == OC_LINES_PER_SET);
2228 line--;
2229 tl_assert(line > 0);
2230
2231 /* First, move the to-be-ejected line to the L2 cache. */
2232 victim = &ocacheL1->set[setno].line[line];
2233 c = classify_OCacheLine(victim);
2234 switch (c) {
2235 case 'e':
2236 /* the line is empty (has invalid tag); ignore it. */
2237 break;
2238 case 'z':
2239 /* line contains zeroes. We must ensure the backing store is
2240 updated accordingly, either by copying the line there
2241 verbatim, or by ensuring it isn't present there. We
2242 chosse the latter on the basis that it reduces the size of
2243 the backing store. */
2244 ocacheL2_del_tag( victim->tag );
2245 break;
2246 case 'n':
2247 /* line contains at least one real, useful origin. Copy it
2248 to the backing store. */
2249 stats_ocacheL1_lossage++;
2250 inL2 = ocacheL2_find_tag( victim->tag );
2251 if (inL2) {
2252 *inL2 = *victim;
2253 } else {
2254 ocacheL2_add_line( victim );
2255 }
2256 break;
2257 default:
2258 tl_assert(0);
2259 }
2260
2261 /* Now we must reload the L1 cache from the backing tree, if
2262 possible. */
2263 tl_assert(tag != victim->tag); /* stay sane */
2264 inL2 = ocacheL2_find_tag( tag );
2265 if (inL2) {
2266 /* We're in luck. It's in the L2. */
2267 ocacheL1->set[setno].line[line] = *inL2;
2268 } else {
2269 /* Missed at both levels of the cache hierarchy. We have to
2270 declare it as full of zeroes (unknown origins). */
2271 stats__ocacheL2_misses++;
2272 zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2273 }
2274
2275 /* Move it one forwards */
2276 moveLineForwards( &ocacheL1->set[setno], line );
2277 line--;
2278
2279 return &ocacheL1->set[setno].line[line];
2280 }
2281
find_OCacheLine(Addr a)2282 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2283 {
2284 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2285 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2286 UWord tag = a & tagmask;
2287
2288 stats_ocacheL1_find++;
2289
2290 if (OC_ENABLE_ASSERTIONS) {
2291 tl_assert(setno >= 0 && setno < OC_N_SETS);
2292 tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2293 }
2294
2295 if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2296 return &ocacheL1->set[setno].line[0];
2297 }
2298
2299 return find_OCacheLine_SLOW( a );
2300 }
2301
set_aligned_word64_Origin_to_undef(Addr a,UInt otag)2302 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2303 {
2304 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2305 //// Set the origins for a+0 .. a+7
2306 { OCacheLine* line;
2307 UWord lineoff = oc_line_offset(a);
2308 if (OC_ENABLE_ASSERTIONS) {
2309 tl_assert(lineoff >= 0
2310 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2311 }
2312 line = find_OCacheLine( a );
2313 line->descr[lineoff+0] = 0xF;
2314 line->descr[lineoff+1] = 0xF;
2315 line->w32[lineoff+0] = otag;
2316 line->w32[lineoff+1] = otag;
2317 }
2318 //// END inlined, specialised version of MC_(helperc_b_store8)
2319 }
2320
2321
2322 /*------------------------------------------------------------*/
2323 /*--- Aligned fast case permission setters, ---*/
2324 /*--- for dealing with stacks ---*/
2325 /*------------------------------------------------------------*/
2326
2327 /*--------------------- 32-bit ---------------------*/
2328
2329 /* Nb: by "aligned" here we mean 4-byte aligned */
2330
make_aligned_word32_undefined(Addr a)2331 static INLINE void make_aligned_word32_undefined ( Addr a )
2332 {
2333 PROF_EVENT(300, "make_aligned_word32_undefined");
2334
2335 #ifndef PERF_FAST_STACK2
2336 make_mem_undefined(a, 4);
2337 #else
2338 {
2339 UWord sm_off;
2340 SecMap* sm;
2341
2342 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2343 PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
2344 make_mem_undefined(a, 4);
2345 return;
2346 }
2347
2348 sm = get_secmap_for_writing_low(a);
2349 sm_off = SM_OFF(a);
2350 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2351 }
2352 #endif
2353 }
2354
2355 static INLINE
make_aligned_word32_undefined_w_otag(Addr a,UInt otag)2356 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2357 {
2358 make_aligned_word32_undefined(a);
2359 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2360 //// Set the origins for a+0 .. a+3
2361 { OCacheLine* line;
2362 UWord lineoff = oc_line_offset(a);
2363 if (OC_ENABLE_ASSERTIONS) {
2364 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2365 }
2366 line = find_OCacheLine( a );
2367 line->descr[lineoff] = 0xF;
2368 line->w32[lineoff] = otag;
2369 }
2370 //// END inlined, specialised version of MC_(helperc_b_store4)
2371 }
2372
2373 static INLINE
make_aligned_word32_noaccess(Addr a)2374 void make_aligned_word32_noaccess ( Addr a )
2375 {
2376 PROF_EVENT(310, "make_aligned_word32_noaccess");
2377
2378 #ifndef PERF_FAST_STACK2
2379 MC_(make_mem_noaccess)(a, 4);
2380 #else
2381 {
2382 UWord sm_off;
2383 SecMap* sm;
2384
2385 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2386 PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
2387 MC_(make_mem_noaccess)(a, 4);
2388 return;
2389 }
2390
2391 sm = get_secmap_for_writing_low(a);
2392 sm_off = SM_OFF(a);
2393 sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2394
2395 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2396 //// Set the origins for a+0 .. a+3.
2397 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2398 OCacheLine* line;
2399 UWord lineoff = oc_line_offset(a);
2400 if (OC_ENABLE_ASSERTIONS) {
2401 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2402 }
2403 line = find_OCacheLine( a );
2404 line->descr[lineoff] = 0;
2405 }
2406 //// END inlined, specialised version of MC_(helperc_b_store4)
2407 }
2408 #endif
2409 }
2410
2411 /*--------------------- 64-bit ---------------------*/
2412
2413 /* Nb: by "aligned" here we mean 8-byte aligned */
2414
make_aligned_word64_undefined(Addr a)2415 static INLINE void make_aligned_word64_undefined ( Addr a )
2416 {
2417 PROF_EVENT(320, "make_aligned_word64_undefined");
2418
2419 #ifndef PERF_FAST_STACK2
2420 make_mem_undefined(a, 8);
2421 #else
2422 {
2423 UWord sm_off16;
2424 SecMap* sm;
2425
2426 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2427 PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
2428 make_mem_undefined(a, 8);
2429 return;
2430 }
2431
2432 sm = get_secmap_for_writing_low(a);
2433 sm_off16 = SM_OFF_16(a);
2434 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
2435 }
2436 #endif
2437 }
2438
2439 static INLINE
make_aligned_word64_undefined_w_otag(Addr a,UInt otag)2440 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2441 {
2442 make_aligned_word64_undefined(a);
2443 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2444 //// Set the origins for a+0 .. a+7
2445 { OCacheLine* line;
2446 UWord lineoff = oc_line_offset(a);
2447 tl_assert(lineoff >= 0
2448 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2449 line = find_OCacheLine( a );
2450 line->descr[lineoff+0] = 0xF;
2451 line->descr[lineoff+1] = 0xF;
2452 line->w32[lineoff+0] = otag;
2453 line->w32[lineoff+1] = otag;
2454 }
2455 //// END inlined, specialised version of MC_(helperc_b_store8)
2456 }
2457
2458 static INLINE
make_aligned_word64_noaccess(Addr a)2459 void make_aligned_word64_noaccess ( Addr a )
2460 {
2461 PROF_EVENT(330, "make_aligned_word64_noaccess");
2462
2463 #ifndef PERF_FAST_STACK2
2464 MC_(make_mem_noaccess)(a, 8);
2465 #else
2466 {
2467 UWord sm_off16;
2468 SecMap* sm;
2469
2470 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2471 PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
2472 MC_(make_mem_noaccess)(a, 8);
2473 return;
2474 }
2475
2476 sm = get_secmap_for_writing_low(a);
2477 sm_off16 = SM_OFF_16(a);
2478 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
2479
2480 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2481 //// Clear the origins for a+0 .. a+7.
2482 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2483 OCacheLine* line;
2484 UWord lineoff = oc_line_offset(a);
2485 tl_assert(lineoff >= 0
2486 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2487 line = find_OCacheLine( a );
2488 line->descr[lineoff+0] = 0;
2489 line->descr[lineoff+1] = 0;
2490 }
2491 //// END inlined, specialised version of MC_(helperc_b_store8)
2492 }
2493 #endif
2494 }
2495
2496
2497 /*------------------------------------------------------------*/
2498 /*--- Stack pointer adjustment ---*/
2499 /*------------------------------------------------------------*/
2500
2501 #ifdef PERF_FAST_STACK
2502 # define MAYBE_USED
2503 #else
2504 # define MAYBE_USED __attribute__((unused))
2505 #endif
2506
2507 /*--------------- adjustment by 4 bytes ---------------*/
2508
2509 MAYBE_USED
mc_new_mem_stack_4_w_ECU(Addr new_SP,UInt ecu)2510 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2511 {
2512 UInt otag = ecu | MC_OKIND_STACK;
2513 PROF_EVENT(110, "new_mem_stack_4");
2514 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2515 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2516 } else {
2517 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2518 }
2519 }
2520
2521 MAYBE_USED
mc_new_mem_stack_4(Addr new_SP)2522 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2523 {
2524 PROF_EVENT(110, "new_mem_stack_4");
2525 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2526 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2527 } else {
2528 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2529 }
2530 }
2531
2532 MAYBE_USED
mc_die_mem_stack_4(Addr new_SP)2533 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2534 {
2535 PROF_EVENT(120, "die_mem_stack_4");
2536 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2537 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2538 } else {
2539 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2540 }
2541 }
2542
2543 /*--------------- adjustment by 8 bytes ---------------*/
2544
2545 MAYBE_USED
mc_new_mem_stack_8_w_ECU(Addr new_SP,UInt ecu)2546 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2547 {
2548 UInt otag = ecu | MC_OKIND_STACK;
2549 PROF_EVENT(111, "new_mem_stack_8");
2550 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2551 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2552 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2553 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2554 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2555 } else {
2556 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2557 }
2558 }
2559
2560 MAYBE_USED
mc_new_mem_stack_8(Addr new_SP)2561 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2562 {
2563 PROF_EVENT(111, "new_mem_stack_8");
2564 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2565 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2566 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2567 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2568 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2569 } else {
2570 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2571 }
2572 }
2573
2574 MAYBE_USED
mc_die_mem_stack_8(Addr new_SP)2575 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2576 {
2577 PROF_EVENT(121, "die_mem_stack_8");
2578 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2579 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2580 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2581 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2582 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2583 } else {
2584 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2585 }
2586 }
2587
2588 /*--------------- adjustment by 12 bytes ---------------*/
2589
2590 MAYBE_USED
mc_new_mem_stack_12_w_ECU(Addr new_SP,UInt ecu)2591 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2592 {
2593 UInt otag = ecu | MC_OKIND_STACK;
2594 PROF_EVENT(112, "new_mem_stack_12");
2595 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2596 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2597 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2598 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2599 /* from previous test we don't have 8-alignment at offset +0,
2600 hence must have 8 alignment at offsets +4/-4. Hence safe to
2601 do 4 at +0 and then 8 at +4/. */
2602 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2603 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2604 } else {
2605 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2606 }
2607 }
2608
2609 MAYBE_USED
mc_new_mem_stack_12(Addr new_SP)2610 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
2611 {
2612 PROF_EVENT(112, "new_mem_stack_12");
2613 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2614 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2615 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2616 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2617 /* from previous test we don't have 8-alignment at offset +0,
2618 hence must have 8 alignment at offsets +4/-4. Hence safe to
2619 do 4 at +0 and then 8 at +4/. */
2620 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2621 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2622 } else {
2623 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
2624 }
2625 }
2626
2627 MAYBE_USED
mc_die_mem_stack_12(Addr new_SP)2628 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
2629 {
2630 PROF_EVENT(122, "die_mem_stack_12");
2631 /* Note the -12 in the test */
2632 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
2633 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2634 -4. */
2635 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2636 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2637 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2638 /* We have 4-alignment at +0, but we don't have 8-alignment at
2639 -12. So we must have 8-alignment at -8. Hence do 4 at -12
2640 and then 8 at -8. */
2641 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2642 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2643 } else {
2644 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
2645 }
2646 }
2647
2648 /*--------------- adjustment by 16 bytes ---------------*/
2649
2650 MAYBE_USED
mc_new_mem_stack_16_w_ECU(Addr new_SP,UInt ecu)2651 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
2652 {
2653 UInt otag = ecu | MC_OKIND_STACK;
2654 PROF_EVENT(113, "new_mem_stack_16");
2655 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2656 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2657 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2658 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2659 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2660 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2661 Hence do 4 at +0, 8 at +4, 4 at +12. */
2662 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2663 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2664 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2665 } else {
2666 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
2667 }
2668 }
2669
2670 MAYBE_USED
mc_new_mem_stack_16(Addr new_SP)2671 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
2672 {
2673 PROF_EVENT(113, "new_mem_stack_16");
2674 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2675 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2676 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2677 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2678 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2679 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2680 Hence do 4 at +0, 8 at +4, 4 at +12. */
2681 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2682 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2683 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2684 } else {
2685 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
2686 }
2687 }
2688
2689 MAYBE_USED
mc_die_mem_stack_16(Addr new_SP)2690 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
2691 {
2692 PROF_EVENT(123, "die_mem_stack_16");
2693 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2694 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
2695 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2696 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2697 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2698 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
2699 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2700 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2701 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2702 } else {
2703 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
2704 }
2705 }
2706
2707 /*--------------- adjustment by 32 bytes ---------------*/
2708
2709 MAYBE_USED
mc_new_mem_stack_32_w_ECU(Addr new_SP,UInt ecu)2710 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
2711 {
2712 UInt otag = ecu | MC_OKIND_STACK;
2713 PROF_EVENT(114, "new_mem_stack_32");
2714 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2715 /* Straightforward */
2716 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2717 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2718 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2719 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2720 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2721 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
2722 +0,+28. */
2723 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2724 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2725 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2726 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
2727 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
2728 } else {
2729 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
2730 }
2731 }
2732
2733 MAYBE_USED
mc_new_mem_stack_32(Addr new_SP)2734 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
2735 {
2736 PROF_EVENT(114, "new_mem_stack_32");
2737 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2738 /* Straightforward */
2739 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2740 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2741 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2742 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2743 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2744 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
2745 +0,+28. */
2746 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2747 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2748 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2749 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
2750 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
2751 } else {
2752 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
2753 }
2754 }
2755
2756 MAYBE_USED
mc_die_mem_stack_32(Addr new_SP)2757 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
2758 {
2759 PROF_EVENT(124, "die_mem_stack_32");
2760 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2761 /* Straightforward */
2762 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2763 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2764 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2765 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2766 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2767 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
2768 4 at -32,-4. */
2769 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2770 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
2771 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
2772 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2773 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2774 } else {
2775 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
2776 }
2777 }
2778
2779 /*--------------- adjustment by 112 bytes ---------------*/
2780
2781 MAYBE_USED
mc_new_mem_stack_112_w_ECU(Addr new_SP,UInt ecu)2782 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
2783 {
2784 UInt otag = ecu | MC_OKIND_STACK;
2785 PROF_EVENT(115, "new_mem_stack_112");
2786 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2787 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2788 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2789 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2790 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2791 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
2792 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
2793 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
2794 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
2795 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
2796 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
2797 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
2798 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
2799 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
2800 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
2801 } else {
2802 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
2803 }
2804 }
2805
2806 MAYBE_USED
mc_new_mem_stack_112(Addr new_SP)2807 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
2808 {
2809 PROF_EVENT(115, "new_mem_stack_112");
2810 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2811 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2812 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2813 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2814 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2815 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
2816 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
2817 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
2818 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
2819 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
2820 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
2821 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
2822 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
2823 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
2824 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
2825 } else {
2826 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
2827 }
2828 }
2829
2830 MAYBE_USED
mc_die_mem_stack_112(Addr new_SP)2831 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
2832 {
2833 PROF_EVENT(125, "die_mem_stack_112");
2834 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2835 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
2836 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
2837 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
2838 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
2839 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
2840 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
2841 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
2842 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
2843 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
2844 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
2845 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2846 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2847 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2848 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2849 } else {
2850 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
2851 }
2852 }
2853
2854 /*--------------- adjustment by 128 bytes ---------------*/
2855
2856 MAYBE_USED
mc_new_mem_stack_128_w_ECU(Addr new_SP,UInt ecu)2857 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
2858 {
2859 UInt otag = ecu | MC_OKIND_STACK;
2860 PROF_EVENT(116, "new_mem_stack_128");
2861 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2862 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2863 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2864 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2865 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2866 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
2867 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
2868 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
2869 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
2870 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
2871 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
2872 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
2873 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
2874 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
2875 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
2876 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
2877 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
2878 } else {
2879 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
2880 }
2881 }
2882
2883 MAYBE_USED
mc_new_mem_stack_128(Addr new_SP)2884 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
2885 {
2886 PROF_EVENT(116, "new_mem_stack_128");
2887 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2888 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2889 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2890 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2891 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2892 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
2893 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
2894 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
2895 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
2896 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
2897 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
2898 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
2899 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
2900 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
2901 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
2902 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
2903 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
2904 } else {
2905 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
2906 }
2907 }
2908
2909 MAYBE_USED
mc_die_mem_stack_128(Addr new_SP)2910 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
2911 {
2912 PROF_EVENT(126, "die_mem_stack_128");
2913 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2914 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
2915 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
2916 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
2917 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
2918 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
2919 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
2920 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
2921 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
2922 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
2923 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
2924 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
2925 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
2926 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2927 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2928 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2929 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2930 } else {
2931 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
2932 }
2933 }
2934
2935 /*--------------- adjustment by 144 bytes ---------------*/
2936
2937 MAYBE_USED
mc_new_mem_stack_144_w_ECU(Addr new_SP,UInt ecu)2938 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
2939 {
2940 UInt otag = ecu | MC_OKIND_STACK;
2941 PROF_EVENT(117, "new_mem_stack_144");
2942 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2943 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2944 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2945 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2946 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2947 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
2948 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
2949 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
2950 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
2951 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
2952 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
2953 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
2954 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
2955 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
2956 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
2957 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
2958 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
2959 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
2960 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
2961 } else {
2962 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
2963 }
2964 }
2965
2966 MAYBE_USED
mc_new_mem_stack_144(Addr new_SP)2967 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
2968 {
2969 PROF_EVENT(117, "new_mem_stack_144");
2970 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2971 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2972 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2973 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2974 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2975 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
2976 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
2977 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
2978 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
2979 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
2980 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
2981 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
2982 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
2983 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
2984 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
2985 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
2986 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
2987 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
2988 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
2989 } else {
2990 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
2991 }
2992 }
2993
2994 MAYBE_USED
mc_die_mem_stack_144(Addr new_SP)2995 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
2996 {
2997 PROF_EVENT(127, "die_mem_stack_144");
2998 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2999 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3000 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3001 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3002 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3003 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3004 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3005 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3006 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3007 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3008 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3009 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3010 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3011 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3012 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3013 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3014 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3015 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3016 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3017 } else {
3018 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3019 }
3020 }
3021
3022 /*--------------- adjustment by 160 bytes ---------------*/
3023
3024 MAYBE_USED
mc_new_mem_stack_160_w_ECU(Addr new_SP,UInt ecu)3025 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3026 {
3027 UInt otag = ecu | MC_OKIND_STACK;
3028 PROF_EVENT(118, "new_mem_stack_160");
3029 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3030 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3031 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3032 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3033 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3034 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3035 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3036 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3037 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3038 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3039 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3040 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3041 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3042 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3043 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3044 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3045 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3046 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3047 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3048 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3049 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3050 } else {
3051 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3052 }
3053 }
3054
3055 MAYBE_USED
mc_new_mem_stack_160(Addr new_SP)3056 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3057 {
3058 PROF_EVENT(118, "new_mem_stack_160");
3059 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3060 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3061 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3062 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3063 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3064 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3065 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3066 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3067 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3068 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3069 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3070 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3071 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3072 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3073 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3074 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3075 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3076 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3077 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3078 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3079 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3080 } else {
3081 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3082 }
3083 }
3084
3085 MAYBE_USED
mc_die_mem_stack_160(Addr new_SP)3086 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3087 {
3088 PROF_EVENT(128, "die_mem_stack_160");
3089 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3090 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3091 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3092 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3093 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3094 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3095 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3096 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3097 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3098 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3099 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3100 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3101 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3102 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3103 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3104 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3105 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3106 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3107 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3108 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3109 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3110 } else {
3111 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3112 }
3113 }
3114
3115 /*--------------- adjustment by N bytes ---------------*/
3116
mc_new_mem_stack_w_ECU(Addr a,SizeT len,UInt ecu)3117 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3118 {
3119 UInt otag = ecu | MC_OKIND_STACK;
3120 PROF_EVENT(115, "new_mem_stack_w_otag");
3121 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3122 }
3123
mc_new_mem_stack(Addr a,SizeT len)3124 static void mc_new_mem_stack ( Addr a, SizeT len )
3125 {
3126 PROF_EVENT(115, "new_mem_stack");
3127 make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3128 }
3129
mc_die_mem_stack(Addr a,SizeT len)3130 static void mc_die_mem_stack ( Addr a, SizeT len )
3131 {
3132 PROF_EVENT(125, "die_mem_stack");
3133 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3134 }
3135
3136
3137 /* The AMD64 ABI says:
3138
3139 "The 128-byte area beyond the location pointed to by %rsp is considered
3140 to be reserved and shall not be modified by signal or interrupt
3141 handlers. Therefore, functions may use this area for temporary data
3142 that is not needed across function calls. In particular, leaf functions
3143 may use this area for their entire stack frame, rather than adjusting
3144 the stack pointer in the prologue and epilogue. This area is known as
3145 red zone [sic]."
3146
3147 So after any call or return we need to mark this redzone as containing
3148 undefined values.
3149
3150 Consider this: we're in function f. f calls g. g moves rsp down
3151 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3152 defined. g returns. f is buggy and reads from parts of the red zone
3153 that it didn't write on. But because g filled that area in, f is going
3154 to be picking up defined V bits and so any errors from reading bits of
3155 the red zone it didn't write, will be missed. The only solution I could
3156 think of was to make the red zone undefined when g returns to f.
3157
3158 This is in accordance with the ABI, which makes it clear the redzone
3159 is volatile across function calls.
3160
3161 The problem occurs the other way round too: f could fill the RZ up
3162 with defined values and g could mistakenly read them. So the RZ
3163 also needs to be nuked on function calls.
3164 */
3165
3166
3167 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3168 improved so as to have a lower miss rate. */
3169
3170 static UWord stats__nia_cache_queries = 0;
3171 static UWord stats__nia_cache_misses = 0;
3172
3173 typedef
3174 struct { UWord nia0; UWord ecu0; /* nia0 maps to ecu0 */
3175 UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3176 WCacheEnt;
3177
3178 #define N_NIA_TO_ECU_CACHE 511
3179
3180 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3181
init_nia_to_ecu_cache(void)3182 static void init_nia_to_ecu_cache ( void )
3183 {
3184 UWord i;
3185 Addr zero_addr = 0;
3186 ExeContext* zero_ec;
3187 UInt zero_ecu;
3188 /* Fill all the slots with an entry for address zero, and the
3189 relevant otags accordingly. Hence the cache is initially filled
3190 with valid data. */
3191 zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3192 tl_assert(zero_ec);
3193 zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3194 tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3195 for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3196 nia_to_ecu_cache[i].nia0 = zero_addr;
3197 nia_to_ecu_cache[i].ecu0 = zero_ecu;
3198 nia_to_ecu_cache[i].nia1 = zero_addr;
3199 nia_to_ecu_cache[i].ecu1 = zero_ecu;
3200 }
3201 }
3202
convert_nia_to_ecu(Addr nia)3203 static inline UInt convert_nia_to_ecu ( Addr nia )
3204 {
3205 UWord i;
3206 UInt ecu;
3207 ExeContext* ec;
3208
3209 tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3210
3211 stats__nia_cache_queries++;
3212 i = nia % N_NIA_TO_ECU_CACHE;
3213 tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3214
3215 if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3216 return nia_to_ecu_cache[i].ecu0;
3217
3218 if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3219 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3220 SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3221 SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3222 # undef SWAP
3223 return nia_to_ecu_cache[i].ecu0;
3224 }
3225
3226 stats__nia_cache_misses++;
3227 ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3228 tl_assert(ec);
3229 ecu = VG_(get_ECU_from_ExeContext)(ec);
3230 tl_assert(VG_(is_plausible_ECU)(ecu));
3231
3232 nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3233 nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3234
3235 nia_to_ecu_cache[i].nia0 = nia;
3236 nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3237 return ecu;
3238 }
3239
3240
3241 /* Note that this serves both the origin-tracking and
3242 no-origin-tracking modes. We assume that calls to it are
3243 sufficiently infrequent that it isn't worth specialising for the
3244 with/without origin-tracking cases. */
MC_(helperc_MAKE_STACK_UNINIT)3245 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
3246 {
3247 UInt otag;
3248 tl_assert(sizeof(UWord) == sizeof(SizeT));
3249 if (0)
3250 VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
3251 base, len, nia );
3252
3253 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3254 UInt ecu = convert_nia_to_ecu ( nia );
3255 tl_assert(VG_(is_plausible_ECU)(ecu));
3256 otag = ecu | MC_OKIND_STACK;
3257 } else {
3258 tl_assert(nia == 0);
3259 otag = 0;
3260 }
3261
3262 # if 0
3263 /* Really slow version */
3264 MC_(make_mem_undefined)(base, len, otag);
3265 # endif
3266
3267 # if 0
3268 /* Slow(ish) version, which is fairly easily seen to be correct.
3269 */
3270 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3271 make_aligned_word64_undefined(base + 0, otag);
3272 make_aligned_word64_undefined(base + 8, otag);
3273 make_aligned_word64_undefined(base + 16, otag);
3274 make_aligned_word64_undefined(base + 24, otag);
3275
3276 make_aligned_word64_undefined(base + 32, otag);
3277 make_aligned_word64_undefined(base + 40, otag);
3278 make_aligned_word64_undefined(base + 48, otag);
3279 make_aligned_word64_undefined(base + 56, otag);
3280
3281 make_aligned_word64_undefined(base + 64, otag);
3282 make_aligned_word64_undefined(base + 72, otag);
3283 make_aligned_word64_undefined(base + 80, otag);
3284 make_aligned_word64_undefined(base + 88, otag);
3285
3286 make_aligned_word64_undefined(base + 96, otag);
3287 make_aligned_word64_undefined(base + 104, otag);
3288 make_aligned_word64_undefined(base + 112, otag);
3289 make_aligned_word64_undefined(base + 120, otag);
3290 } else {
3291 MC_(make_mem_undefined)(base, len, otag);
3292 }
3293 # endif
3294
3295 /* Idea is: go fast when
3296 * 8-aligned and length is 128
3297 * the sm is available in the main primary map
3298 * the address range falls entirely with a single secondary map
3299 If all those conditions hold, just update the V+A bits by writing
3300 directly into the vabits array. (If the sm was distinguished, this
3301 will make a copy and then write to it.)
3302 */
3303
3304 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3305 /* Now we know the address range is suitably sized and aligned. */
3306 UWord a_lo = (UWord)(base);
3307 UWord a_hi = (UWord)(base + 128 - 1);
3308 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3309 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3310 // Now we know the entire range is within the main primary map.
3311 SecMap* sm = get_secmap_for_writing_low(a_lo);
3312 SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3313 /* Now we know that the entire address range falls within a
3314 single secondary map, and that that secondary 'lives' in
3315 the main primary map. */
3316 if (LIKELY(sm == sm_hi)) {
3317 // Finally, we know that the range is entirely within one secmap.
3318 UWord v_off = SM_OFF(a_lo);
3319 UShort* p = (UShort*)(&sm->vabits8[v_off]);
3320 p[ 0] = VA_BITS16_UNDEFINED;
3321 p[ 1] = VA_BITS16_UNDEFINED;
3322 p[ 2] = VA_BITS16_UNDEFINED;
3323 p[ 3] = VA_BITS16_UNDEFINED;
3324 p[ 4] = VA_BITS16_UNDEFINED;
3325 p[ 5] = VA_BITS16_UNDEFINED;
3326 p[ 6] = VA_BITS16_UNDEFINED;
3327 p[ 7] = VA_BITS16_UNDEFINED;
3328 p[ 8] = VA_BITS16_UNDEFINED;
3329 p[ 9] = VA_BITS16_UNDEFINED;
3330 p[10] = VA_BITS16_UNDEFINED;
3331 p[11] = VA_BITS16_UNDEFINED;
3332 p[12] = VA_BITS16_UNDEFINED;
3333 p[13] = VA_BITS16_UNDEFINED;
3334 p[14] = VA_BITS16_UNDEFINED;
3335 p[15] = VA_BITS16_UNDEFINED;
3336 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3337 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3338 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3339 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3340 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3341 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3342 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3343 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3344 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3345 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3346 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3347 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3348 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3349 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3350 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3351 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3352 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3353 }
3354 return;
3355 }
3356 }
3357 }
3358
3359 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3360 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3361 /* Now we know the address range is suitably sized and aligned. */
3362 UWord a_lo = (UWord)(base);
3363 UWord a_hi = (UWord)(base + 288 - 1);
3364 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3365 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3366 // Now we know the entire range is within the main primary map.
3367 SecMap* sm = get_secmap_for_writing_low(a_lo);
3368 SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3369 /* Now we know that the entire address range falls within a
3370 single secondary map, and that that secondary 'lives' in
3371 the main primary map. */
3372 if (LIKELY(sm == sm_hi)) {
3373 // Finally, we know that the range is entirely within one secmap.
3374 UWord v_off = SM_OFF(a_lo);
3375 UShort* p = (UShort*)(&sm->vabits8[v_off]);
3376 p[ 0] = VA_BITS16_UNDEFINED;
3377 p[ 1] = VA_BITS16_UNDEFINED;
3378 p[ 2] = VA_BITS16_UNDEFINED;
3379 p[ 3] = VA_BITS16_UNDEFINED;
3380 p[ 4] = VA_BITS16_UNDEFINED;
3381 p[ 5] = VA_BITS16_UNDEFINED;
3382 p[ 6] = VA_BITS16_UNDEFINED;
3383 p[ 7] = VA_BITS16_UNDEFINED;
3384 p[ 8] = VA_BITS16_UNDEFINED;
3385 p[ 9] = VA_BITS16_UNDEFINED;
3386 p[10] = VA_BITS16_UNDEFINED;
3387 p[11] = VA_BITS16_UNDEFINED;
3388 p[12] = VA_BITS16_UNDEFINED;
3389 p[13] = VA_BITS16_UNDEFINED;
3390 p[14] = VA_BITS16_UNDEFINED;
3391 p[15] = VA_BITS16_UNDEFINED;
3392 p[16] = VA_BITS16_UNDEFINED;
3393 p[17] = VA_BITS16_UNDEFINED;
3394 p[18] = VA_BITS16_UNDEFINED;
3395 p[19] = VA_BITS16_UNDEFINED;
3396 p[20] = VA_BITS16_UNDEFINED;
3397 p[21] = VA_BITS16_UNDEFINED;
3398 p[22] = VA_BITS16_UNDEFINED;
3399 p[23] = VA_BITS16_UNDEFINED;
3400 p[24] = VA_BITS16_UNDEFINED;
3401 p[25] = VA_BITS16_UNDEFINED;
3402 p[26] = VA_BITS16_UNDEFINED;
3403 p[27] = VA_BITS16_UNDEFINED;
3404 p[28] = VA_BITS16_UNDEFINED;
3405 p[29] = VA_BITS16_UNDEFINED;
3406 p[30] = VA_BITS16_UNDEFINED;
3407 p[31] = VA_BITS16_UNDEFINED;
3408 p[32] = VA_BITS16_UNDEFINED;
3409 p[33] = VA_BITS16_UNDEFINED;
3410 p[34] = VA_BITS16_UNDEFINED;
3411 p[35] = VA_BITS16_UNDEFINED;
3412 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3413 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3414 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3415 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3416 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3417 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3418 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3419 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3420 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3421 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3422 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3423 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3424 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3425 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3426 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3427 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3428 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3429 set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3430 set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3431 set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3432 set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3433 set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3434 set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3435 set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3436 set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3437 set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3438 set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3439 set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3440 set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3441 set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3442 set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3443 set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3444 set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3445 set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3446 set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3447 set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3448 set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3449 }
3450 return;
3451 }
3452 }
3453 }
3454
3455 /* else fall into slow case */
3456 MC_(make_mem_undefined_w_otag)(base, len, otag);
3457 }
3458
3459
3460 /*------------------------------------------------------------*/
3461 /*--- Checking memory ---*/
3462 /*------------------------------------------------------------*/
3463
3464 typedef
3465 enum {
3466 MC_Ok = 5,
3467 MC_AddrErr = 6,
3468 MC_ValueErr = 7
3469 }
3470 MC_ReadResult;
3471
3472
3473 /* Check permissions for address range. If inadequate permissions
3474 exist, *bad_addr is set to the offending address, so the caller can
3475 know what it is. */
3476
3477 /* Returns True if [a .. a+len) is not addressible. Otherwise,
3478 returns False, and if bad_addr is non-NULL, sets *bad_addr to
3479 indicate the lowest failing address. Functions below are
3480 similar. */
MC_(check_mem_is_noaccess)3481 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
3482 {
3483 SizeT i;
3484 UWord vabits2;
3485
3486 PROF_EVENT(60, "check_mem_is_noaccess");
3487 for (i = 0; i < len; i++) {
3488 PROF_EVENT(61, "check_mem_is_noaccess(loop)");
3489 vabits2 = get_vabits2(a);
3490 if (VA_BITS2_NOACCESS != vabits2) {
3491 if (bad_addr != NULL) *bad_addr = a;
3492 return False;
3493 }
3494 a++;
3495 }
3496 return True;
3497 }
3498
is_mem_addressable(Addr a,SizeT len,Addr * bad_addr)3499 static Bool is_mem_addressable ( Addr a, SizeT len,
3500 /*OUT*/Addr* bad_addr )
3501 {
3502 SizeT i;
3503 UWord vabits2;
3504
3505 PROF_EVENT(62, "is_mem_addressable");
3506 for (i = 0; i < len; i++) {
3507 PROF_EVENT(63, "is_mem_addressable(loop)");
3508 vabits2 = get_vabits2(a);
3509 if (VA_BITS2_NOACCESS == vabits2) {
3510 if (bad_addr != NULL) *bad_addr = a;
3511 return False;
3512 }
3513 a++;
3514 }
3515 return True;
3516 }
3517
is_mem_defined(Addr a,SizeT len,Addr * bad_addr,UInt * otag)3518 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
3519 /*OUT*/Addr* bad_addr,
3520 /*OUT*/UInt* otag )
3521 {
3522 SizeT i;
3523 UWord vabits2;
3524
3525 PROF_EVENT(64, "is_mem_defined");
3526 DEBUG("is_mem_defined\n");
3527
3528 if (otag) *otag = 0;
3529 if (bad_addr) *bad_addr = 0;
3530 for (i = 0; i < len; i++) {
3531 PROF_EVENT(65, "is_mem_defined(loop)");
3532 vabits2 = get_vabits2(a);
3533 if (VA_BITS2_DEFINED != vabits2) {
3534 // Error! Nb: Report addressability errors in preference to
3535 // definedness errors. And don't report definedeness errors unless
3536 // --undef-value-errors=yes.
3537 if (bad_addr) {
3538 *bad_addr = a;
3539 }
3540 if (VA_BITS2_NOACCESS == vabits2) {
3541 return MC_AddrErr;
3542 }
3543 if (MC_(clo_mc_level) >= 2) {
3544 if (otag && MC_(clo_mc_level) == 3) {
3545 *otag = MC_(helperc_b_load1)( a );
3546 }
3547 return MC_ValueErr;
3548 }
3549 }
3550 a++;
3551 }
3552 return MC_Ok;
3553 }
3554
3555
3556 /* Like is_mem_defined but doesn't give up at the first uninitialised
3557 byte -- the entire range is always checked. This is important for
3558 detecting errors in the case where a checked range strays into
3559 invalid memory, but that fact is not detected by the ordinary
3560 is_mem_defined(), because of an undefined section that precedes the
3561 out of range section, possibly as a result of an alignment hole in
3562 the checked data. This version always checks the entire range and
3563 can report both a definedness and an accessbility error, if
3564 necessary. */
is_mem_defined_comprehensive(Addr a,SizeT len,Bool * errorV,Addr * bad_addrV,UInt * otagV,Bool * errorA,Addr * bad_addrA)3565 static void is_mem_defined_comprehensive (
3566 Addr a, SizeT len,
3567 /*OUT*/Bool* errorV, /* is there a definedness err? */
3568 /*OUT*/Addr* bad_addrV, /* if so where? */
3569 /*OUT*/UInt* otagV, /* and what's its otag? */
3570 /*OUT*/Bool* errorA, /* is there an addressability err? */
3571 /*OUT*/Addr* bad_addrA /* if so where? */
3572 )
3573 {
3574 SizeT i;
3575 UWord vabits2;
3576 Bool already_saw_errV = False;
3577
3578 PROF_EVENT(64, "is_mem_defined"); // fixme
3579 DEBUG("is_mem_defined_comprehensive\n");
3580
3581 tl_assert(!(*errorV || *errorA));
3582
3583 for (i = 0; i < len; i++) {
3584 PROF_EVENT(65, "is_mem_defined(loop)"); // fixme
3585 vabits2 = get_vabits2(a);
3586 switch (vabits2) {
3587 case VA_BITS2_DEFINED:
3588 a++;
3589 break;
3590 case VA_BITS2_UNDEFINED:
3591 case VA_BITS2_PARTDEFINED:
3592 if (!already_saw_errV) {
3593 *errorV = True;
3594 *bad_addrV = a;
3595 if (MC_(clo_mc_level) == 3) {
3596 *otagV = MC_(helperc_b_load1)( a );
3597 } else {
3598 *otagV = 0;
3599 }
3600 already_saw_errV = True;
3601 }
3602 a++; /* keep going */
3603 break;
3604 case VA_BITS2_NOACCESS:
3605 *errorA = True;
3606 *bad_addrA = a;
3607 return; /* give up now. */
3608 default:
3609 tl_assert(0);
3610 }
3611 }
3612 }
3613
3614
3615 /* Check a zero-terminated ascii string. Tricky -- don't want to
3616 examine the actual bytes, to find the end, until we're sure it is
3617 safe to do so. */
3618
mc_is_defined_asciiz(Addr a,Addr * bad_addr,UInt * otag)3619 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
3620 {
3621 UWord vabits2;
3622
3623 PROF_EVENT(66, "mc_is_defined_asciiz");
3624 DEBUG("mc_is_defined_asciiz\n");
3625
3626 if (otag) *otag = 0;
3627 if (bad_addr) *bad_addr = 0;
3628 while (True) {
3629 PROF_EVENT(67, "mc_is_defined_asciiz(loop)");
3630 vabits2 = get_vabits2(a);
3631 if (VA_BITS2_DEFINED != vabits2) {
3632 // Error! Nb: Report addressability errors in preference to
3633 // definedness errors. And don't report definedeness errors unless
3634 // --undef-value-errors=yes.
3635 if (bad_addr) {
3636 *bad_addr = a;
3637 }
3638 if (VA_BITS2_NOACCESS == vabits2) {
3639 return MC_AddrErr;
3640 }
3641 if (MC_(clo_mc_level) >= 2) {
3642 if (otag && MC_(clo_mc_level) == 3) {
3643 *otag = MC_(helperc_b_load1)( a );
3644 }
3645 return MC_ValueErr;
3646 }
3647 }
3648 /* Ok, a is safe to read. */
3649 if (* ((UChar*)a) == 0) {
3650 return MC_Ok;
3651 }
3652 a++;
3653 }
3654 }
3655
3656
3657 /*------------------------------------------------------------*/
3658 /*--- Memory event handlers ---*/
3659 /*------------------------------------------------------------*/
3660
3661 static
check_mem_is_addressable(CorePart part,ThreadId tid,Char * s,Addr base,SizeT size)3662 void check_mem_is_addressable ( CorePart part, ThreadId tid, Char* s,
3663 Addr base, SizeT size )
3664 {
3665 Addr bad_addr;
3666 Bool ok = is_mem_addressable ( base, size, &bad_addr );
3667
3668 if (!ok) {
3669 switch (part) {
3670 case Vg_CoreSysCall:
3671 MC_(record_memparam_error) ( tid, bad_addr,
3672 /*isAddrErr*/True, s, 0/*otag*/ );
3673 break;
3674
3675 case Vg_CoreSignal:
3676 MC_(record_core_mem_error)( tid, s );
3677 break;
3678
3679 default:
3680 VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
3681 }
3682 }
3683 }
3684
3685 static
check_mem_is_defined(CorePart part,ThreadId tid,Char * s,Addr base,SizeT size)3686 void check_mem_is_defined ( CorePart part, ThreadId tid, Char* s,
3687 Addr base, SizeT size )
3688 {
3689 UInt otag = 0;
3690 Addr bad_addr;
3691 MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
3692
3693 if (MC_Ok != res) {
3694 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3695
3696 switch (part) {
3697 case Vg_CoreSysCall:
3698 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3699 isAddrErr ? 0 : otag );
3700 break;
3701
3702 case Vg_CoreSysCallArgInMem:
3703 MC_(record_regparam_error) ( tid, s, otag );
3704 break;
3705
3706 /* If we're being asked to jump to a silly address, record an error
3707 message before potentially crashing the entire system. */
3708 case Vg_CoreTranslate:
3709 MC_(record_jump_error)( tid, bad_addr );
3710 break;
3711
3712 default:
3713 VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
3714 }
3715 }
3716 }
3717
3718 static
check_mem_is_defined_asciiz(CorePart part,ThreadId tid,Char * s,Addr str)3719 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
3720 Char* s, Addr str )
3721 {
3722 MC_ReadResult res;
3723 Addr bad_addr = 0; // shut GCC up
3724 UInt otag = 0;
3725
3726 tl_assert(part == Vg_CoreSysCall);
3727 res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
3728 if (MC_Ok != res) {
3729 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3730 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3731 isAddrErr ? 0 : otag );
3732 }
3733 }
3734
3735 /* Handling of mmap and mprotect is not as simple as it seems.
3736
3737 The underlying semantics are that memory obtained from mmap is
3738 always initialised, but may be inaccessible. And changes to the
3739 protection of memory do not change its contents and hence not its
3740 definedness state. Problem is we can't model
3741 inaccessible-but-with-some-definedness state; once we mark memory
3742 as inaccessible we lose all info about definedness, and so can't
3743 restore that if it is later made accessible again.
3744
3745 One obvious thing to do is this:
3746
3747 mmap/mprotect NONE -> noaccess
3748 mmap/mprotect other -> defined
3749
3750 The problem case here is: taking accessible memory, writing
3751 uninitialised data to it, mprotecting it NONE and later mprotecting
3752 it back to some accessible state causes the undefinedness to be
3753 lost.
3754
3755 A better proposal is:
3756
3757 (1) mmap NONE -> make noaccess
3758 (2) mmap other -> make defined
3759
3760 (3) mprotect NONE -> # no change
3761 (4) mprotect other -> change any "noaccess" to "defined"
3762
3763 (2) is OK because memory newly obtained from mmap really is defined
3764 (zeroed out by the kernel -- doing anything else would
3765 constitute a massive security hole.)
3766
3767 (1) is OK because the only way to make the memory usable is via
3768 (4), in which case we also wind up correctly marking it all as
3769 defined.
3770
3771 (3) is the weak case. We choose not to change memory state.
3772 (presumably the range is in some mixture of "defined" and
3773 "undefined", viz, accessible but with arbitrary V bits). Doing
3774 nothing means we retain the V bits, so that if the memory is
3775 later mprotected "other", the V bits remain unchanged, so there
3776 can be no false negatives. The bad effect is that if there's
3777 an access in the area, then MC cannot warn; but at least we'll
3778 get a SEGV to show, so it's better than nothing.
3779
3780 Consider the sequence (3) followed by (4). Any memory that was
3781 "defined" or "undefined" previously retains its state (as
3782 required). Any memory that was "noaccess" before can only have
3783 been made that way by (1), and so it's OK to change it to
3784 "defined".
3785
3786 See https://bugs.kde.org/show_bug.cgi?id=205541
3787 and https://bugs.kde.org/show_bug.cgi?id=210268
3788 */
3789 static
mc_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)3790 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
3791 ULong di_handle )
3792 {
3793 if (rr || ww || xx) {
3794 /* (2) mmap/mprotect other -> defined */
3795 MC_(make_mem_defined)(a, len);
3796 } else {
3797 /* (1) mmap/mprotect NONE -> noaccess */
3798 MC_(make_mem_noaccess)(a, len);
3799 }
3800 }
3801
3802 static
mc_new_mem_mprotect(Addr a,SizeT len,Bool rr,Bool ww,Bool xx)3803 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
3804 {
3805 if (rr || ww || xx) {
3806 /* (4) mprotect other -> change any "noaccess" to "defined" */
3807 make_mem_defined_if_noaccess(a, len);
3808 } else {
3809 /* (3) mprotect NONE -> # no change */
3810 /* do nothing */
3811 }
3812 }
3813
3814
3815 static
mc_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)3816 void mc_new_mem_startup( Addr a, SizeT len,
3817 Bool rr, Bool ww, Bool xx, ULong di_handle )
3818 {
3819 // Because code is defined, initialised variables get put in the data
3820 // segment and are defined, and uninitialised variables get put in the
3821 // bss segment and are auto-zeroed (and so defined).
3822 //
3823 // It's possible that there will be padding between global variables.
3824 // This will also be auto-zeroed, and marked as defined by Memcheck. If
3825 // a program uses it, Memcheck will not complain. This is arguably a
3826 // false negative, but it's a grey area -- the behaviour is defined (the
3827 // padding is zeroed) but it's probably not what the user intended. And
3828 // we can't avoid it.
3829 //
3830 // Note: we generally ignore RWX permissions, because we can't track them
3831 // without requiring more than one A bit which would slow things down a
3832 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
3833 // So we mark any such pages as "unaddressable".
3834 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
3835 a, (ULong)len, rr, ww, xx);
3836 mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
3837 }
3838
3839 static
mc_post_mem_write(CorePart part,ThreadId tid,Addr a,SizeT len)3840 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
3841 {
3842 MC_(make_mem_defined)(a, len);
3843 }
3844
3845
3846 /*------------------------------------------------------------*/
3847 /*--- Register event handlers ---*/
3848 /*------------------------------------------------------------*/
3849
3850 /* Try and get a nonzero origin for the guest state section of thread
3851 tid characterised by (offset,size). Return 0 if nothing to show
3852 for it. */
mb_get_origin_for_guest_offset(ThreadId tid,Int offset,SizeT size)3853 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
3854 Int offset, SizeT size )
3855 {
3856 Int sh2off;
3857 UChar area[6];
3858 UInt otag;
3859 sh2off = MC_(get_otrack_shadow_offset)( offset, size );
3860 if (sh2off == -1)
3861 return 0; /* This piece of guest state is not tracked */
3862 tl_assert(sh2off >= 0);
3863 tl_assert(0 == (sh2off % 4));
3864 area[0] = 0x31;
3865 area[5] = 0x27;
3866 VG_(get_shadow_regs_area)( tid, &area[1], 2/*shadowno*/,sh2off,4 );
3867 tl_assert(area[0] == 0x31);
3868 tl_assert(area[5] == 0x27);
3869 otag = *(UInt*)&area[1];
3870 return otag;
3871 }
3872
3873
3874 /* When some chunk of guest state is written, mark the corresponding
3875 shadow area as valid. This is used to initialise arbitrarily large
3876 chunks of guest state, hence the _SIZE value, which has to be as
3877 big as the biggest guest state.
3878 */
mc_post_reg_write(CorePart part,ThreadId tid,PtrdiffT offset,SizeT size)3879 static void mc_post_reg_write ( CorePart part, ThreadId tid,
3880 PtrdiffT offset, SizeT size)
3881 {
3882 # define MAX_REG_WRITE_SIZE 1664
3883 UChar area[MAX_REG_WRITE_SIZE];
3884 tl_assert(size <= MAX_REG_WRITE_SIZE);
3885 VG_(memset)(area, V_BITS8_DEFINED, size);
3886 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
3887 # undef MAX_REG_WRITE_SIZE
3888 }
3889
3890 static
mc_post_reg_write_clientcall(ThreadId tid,PtrdiffT offset,SizeT size,Addr f)3891 void mc_post_reg_write_clientcall ( ThreadId tid,
3892 PtrdiffT offset, SizeT size, Addr f)
3893 {
3894 mc_post_reg_write(/*dummy*/0, tid, offset, size);
3895 }
3896
3897 /* Look at the definedness of the guest's shadow state for
3898 [offset, offset+len). If any part of that is undefined, record
3899 a parameter error.
3900 */
mc_pre_reg_read(CorePart part,ThreadId tid,Char * s,PtrdiffT offset,SizeT size)3901 static void mc_pre_reg_read ( CorePart part, ThreadId tid, Char* s,
3902 PtrdiffT offset, SizeT size)
3903 {
3904 Int i;
3905 Bool bad;
3906 UInt otag;
3907
3908 UChar area[16];
3909 tl_assert(size <= 16);
3910
3911 VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
3912
3913 bad = False;
3914 for (i = 0; i < size; i++) {
3915 if (area[i] != V_BITS8_DEFINED) {
3916 bad = True;
3917 break;
3918 }
3919 }
3920
3921 if (!bad)
3922 return;
3923
3924 /* We've found some undefinedness. See if we can also find an
3925 origin for it. */
3926 otag = mb_get_origin_for_guest_offset( tid, offset, size );
3927 MC_(record_regparam_error) ( tid, s, otag );
3928 }
3929
3930
3931 /*------------------------------------------------------------*/
3932 /*--- Functions called directly from generated code: ---*/
3933 /*--- Load/store handlers. ---*/
3934 /*------------------------------------------------------------*/
3935
3936 /* Types: LOADV32, LOADV16, LOADV8 are:
3937 UWord fn ( Addr a )
3938 so they return 32-bits on 32-bit machines and 64-bits on
3939 64-bit machines. Addr has the same size as a host word.
3940
3941 LOADV64 is always ULong fn ( Addr a )
3942
3943 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
3944 are a UWord, and for STOREV64 they are a ULong.
3945 */
3946
3947 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
3948 naturally '_sz/8'-aligned, or it exceeds the range covered by the
3949 primary map. This is all very tricky (and important!), so let's
3950 work through the maths by hand (below), *and* assert for these
3951 values at startup. */
3952 #define MASK(_szInBytes) \
3953 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
3954
3955 /* MASK only exists so as to define this macro. */
3956 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
3957 ((_a) & MASK((_szInBits>>3)))
3958
3959 /* On a 32-bit machine:
3960
3961 N_PRIMARY_BITS == 16, so
3962 N_PRIMARY_MAP == 0x10000, so
3963 N_PRIMARY_MAP-1 == 0xFFFF, so
3964 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
3965
3966 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
3967 = ~ ( 0xFFFF | 0xFFFF0000 )
3968 = ~ 0xFFFF'FFFF
3969 = 0
3970
3971 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
3972 = ~ ( 0xFFFE | 0xFFFF0000 )
3973 = ~ 0xFFFF'FFFE
3974 = 1
3975
3976 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
3977 = ~ ( 0xFFFC | 0xFFFF0000 )
3978 = ~ 0xFFFF'FFFC
3979 = 3
3980
3981 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
3982 = ~ ( 0xFFF8 | 0xFFFF0000 )
3983 = ~ 0xFFFF'FFF8
3984 = 7
3985
3986 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
3987 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
3988 the 1-byte alignment case, it is always a zero value, since MASK(1)
3989 is zero. All as expected.
3990
3991 On a 64-bit machine, it's more complex, since we're testing
3992 simultaneously for misalignment and for the address being at or
3993 above 32G:
3994
3995 N_PRIMARY_BITS == 19, so
3996 N_PRIMARY_MAP == 0x80000, so
3997 N_PRIMARY_MAP-1 == 0x7FFFF, so
3998 (N_PRIMARY_MAP-1) << 16 == 0x7FFFF'0000, and so
3999
4000 MASK(1) = ~ ( (0x10000 - 1) | 0x7FFFF'0000 )
4001 = ~ ( 0xFFFF | 0x7FFFF'0000 )
4002 = ~ 0x7FFFF'FFFF
4003 = 0xFFFF'FFF8'0000'0000
4004
4005 MASK(2) = ~ ( (0x10000 - 2) | 0x7FFFF'0000 )
4006 = ~ ( 0xFFFE | 0x7FFFF'0000 )
4007 = ~ 0x7FFFF'FFFE
4008 = 0xFFFF'FFF8'0000'0001
4009
4010 MASK(4) = ~ ( (0x10000 - 4) | 0x7FFFF'0000 )
4011 = ~ ( 0xFFFC | 0x7FFFF'0000 )
4012 = ~ 0x7FFFF'FFFC
4013 = 0xFFFF'FFF8'0000'0003
4014
4015 MASK(8) = ~ ( (0x10000 - 8) | 0x7FFFF'0000 )
4016 = ~ ( 0xFFF8 | 0x7FFFF'0000 )
4017 = ~ 0x7FFFF'FFF8
4018 = 0xFFFF'FFF8'0000'0007
4019 */
4020
4021
4022 /* ------------------------ Size = 8 ------------------------ */
4023
4024 static INLINE
mc_LOADV64(Addr a,Bool isBigEndian)4025 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4026 {
4027 PROF_EVENT(200, "mc_LOADV64");
4028
4029 #ifndef PERF_FAST_LOADV
4030 return mc_LOADVn_slow( a, 64, isBigEndian );
4031 #else
4032 {
4033 UWord sm_off16, vabits16;
4034 SecMap* sm;
4035
4036 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4037 PROF_EVENT(201, "mc_LOADV64-slow1");
4038 return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4039 }
4040
4041 sm = get_secmap_for_reading_low(a);
4042 sm_off16 = SM_OFF_16(a);
4043 vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4044
4045 // Handle common case quickly: a is suitably aligned, is mapped, and
4046 // addressible.
4047 // Convert V bits from compact memory form to expanded register form.
4048 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4049 return V_BITS64_DEFINED;
4050 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4051 return V_BITS64_UNDEFINED;
4052 } else {
4053 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4054 PROF_EVENT(202, "mc_LOADV64-slow2");
4055 return mc_LOADVn_slow( a, 64, isBigEndian );
4056 }
4057 }
4058 #endif
4059 }
4060
MC_(helperc_LOADV64be)4061 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4062 {
4063 return mc_LOADV64(a, True);
4064 }
MC_(helperc_LOADV64le)4065 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4066 {
4067 return mc_LOADV64(a, False);
4068 }
4069
4070
4071 static INLINE
mc_STOREV64(Addr a,ULong vbits64,Bool isBigEndian)4072 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4073 {
4074 PROF_EVENT(210, "mc_STOREV64");
4075
4076 #ifndef PERF_FAST_STOREV
4077 // XXX: this slow case seems to be marginally faster than the fast case!
4078 // Investigate further.
4079 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4080 #else
4081 {
4082 UWord sm_off16, vabits16;
4083 SecMap* sm;
4084
4085 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4086 PROF_EVENT(211, "mc_STOREV64-slow1");
4087 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4088 return;
4089 }
4090
4091 sm = get_secmap_for_reading_low(a);
4092 sm_off16 = SM_OFF_16(a);
4093 vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4094
4095 if (LIKELY( !is_distinguished_sm(sm) &&
4096 (VA_BITS16_DEFINED == vabits16 ||
4097 VA_BITS16_UNDEFINED == vabits16) ))
4098 {
4099 /* Handle common case quickly: a is suitably aligned, */
4100 /* is mapped, and is addressible. */
4101 // Convert full V-bits in register to compact 2-bit form.
4102 if (V_BITS64_DEFINED == vbits64) {
4103 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
4104 } else if (V_BITS64_UNDEFINED == vbits64) {
4105 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
4106 } else {
4107 /* Slow but general case -- writing partially defined bytes. */
4108 PROF_EVENT(212, "mc_STOREV64-slow2");
4109 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4110 }
4111 } else {
4112 /* Slow but general case. */
4113 PROF_EVENT(213, "mc_STOREV64-slow3");
4114 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4115 }
4116 }
4117 #endif
4118 }
4119
MC_(helperc_STOREV64be)4120 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
4121 {
4122 mc_STOREV64(a, vbits64, True);
4123 }
MC_(helperc_STOREV64le)4124 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
4125 {
4126 mc_STOREV64(a, vbits64, False);
4127 }
4128
4129
4130 /* ------------------------ Size = 4 ------------------------ */
4131
4132 static INLINE
mc_LOADV32(Addr a,Bool isBigEndian)4133 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
4134 {
4135 PROF_EVENT(220, "mc_LOADV32");
4136
4137 #ifndef PERF_FAST_LOADV
4138 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4139 #else
4140 {
4141 UWord sm_off, vabits8;
4142 SecMap* sm;
4143
4144 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4145 PROF_EVENT(221, "mc_LOADV32-slow1");
4146 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4147 }
4148
4149 sm = get_secmap_for_reading_low(a);
4150 sm_off = SM_OFF(a);
4151 vabits8 = sm->vabits8[sm_off];
4152
4153 // Handle common case quickly: a is suitably aligned, is mapped, and the
4154 // entire word32 it lives in is addressible.
4155 // Convert V bits from compact memory form to expanded register form.
4156 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
4157 // Almost certainly not necessary, but be paranoid.
4158 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4159 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
4160 } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
4161 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
4162 } else {
4163 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
4164 PROF_EVENT(222, "mc_LOADV32-slow2");
4165 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4166 }
4167 }
4168 #endif
4169 }
4170
MC_(helperc_LOADV32be)4171 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
4172 {
4173 return mc_LOADV32(a, True);
4174 }
MC_(helperc_LOADV32le)4175 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
4176 {
4177 return mc_LOADV32(a, False);
4178 }
4179
4180
4181 static INLINE
mc_STOREV32(Addr a,UWord vbits32,Bool isBigEndian)4182 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
4183 {
4184 PROF_EVENT(230, "mc_STOREV32");
4185
4186 #ifndef PERF_FAST_STOREV
4187 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4188 #else
4189 {
4190 UWord sm_off, vabits8;
4191 SecMap* sm;
4192
4193 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4194 PROF_EVENT(231, "mc_STOREV32-slow1");
4195 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4196 return;
4197 }
4198
4199 sm = get_secmap_for_reading_low(a);
4200 sm_off = SM_OFF(a);
4201 vabits8 = sm->vabits8[sm_off];
4202
4203 // Cleverness: sometimes we don't have to write the shadow memory at
4204 // all, if we can tell that what we want to write is the same as what is
4205 // already there. The 64/16/8 bit cases also have cleverness at this
4206 // point, but it works a little differently to the code below.
4207 if (V_BITS32_DEFINED == vbits32) {
4208 if (vabits8 == (UInt)VA_BITS8_DEFINED) {
4209 return;
4210 } else if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
4211 sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
4212 } else {
4213 // not defined/undefined, or distinguished and changing state
4214 PROF_EVENT(232, "mc_STOREV32-slow2");
4215 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4216 }
4217 } else if (V_BITS32_UNDEFINED == vbits32) {
4218 if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
4219 return;
4220 } else if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
4221 sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
4222 } else {
4223 // not defined/undefined, or distinguished and changing state
4224 PROF_EVENT(233, "mc_STOREV32-slow3");
4225 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4226 }
4227 } else {
4228 // Partially defined word
4229 PROF_EVENT(234, "mc_STOREV32-slow4");
4230 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4231 }
4232 }
4233 #endif
4234 }
4235
MC_(helperc_STOREV32be)4236 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
4237 {
4238 mc_STOREV32(a, vbits32, True);
4239 }
MC_(helperc_STOREV32le)4240 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
4241 {
4242 mc_STOREV32(a, vbits32, False);
4243 }
4244
4245
4246 /* ------------------------ Size = 2 ------------------------ */
4247
4248 static INLINE
mc_LOADV16(Addr a,Bool isBigEndian)4249 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
4250 {
4251 PROF_EVENT(240, "mc_LOADV16");
4252
4253 #ifndef PERF_FAST_LOADV
4254 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4255 #else
4256 {
4257 UWord sm_off, vabits8;
4258 SecMap* sm;
4259
4260 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4261 PROF_EVENT(241, "mc_LOADV16-slow1");
4262 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4263 }
4264
4265 sm = get_secmap_for_reading_low(a);
4266 sm_off = SM_OFF(a);
4267 vabits8 = sm->vabits8[sm_off];
4268 // Handle common case quickly: a is suitably aligned, is mapped, and is
4269 // addressible.
4270 // Convert V bits from compact memory form to expanded register form
4271 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS16_DEFINED; }
4272 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
4273 else {
4274 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4275 // the two sub-bytes.
4276 UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
4277 if (vabits4 == VA_BITS4_DEFINED ) { return V_BITS16_DEFINED; }
4278 else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
4279 else {
4280 /* Slow case: the two bytes are not all-defined or all-undefined. */
4281 PROF_EVENT(242, "mc_LOADV16-slow2");
4282 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4283 }
4284 }
4285 }
4286 #endif
4287 }
4288
MC_(helperc_LOADV16be)4289 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
4290 {
4291 return mc_LOADV16(a, True);
4292 }
MC_(helperc_LOADV16le)4293 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
4294 {
4295 return mc_LOADV16(a, False);
4296 }
4297
4298
4299 static INLINE
mc_STOREV16(Addr a,UWord vbits16,Bool isBigEndian)4300 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
4301 {
4302 PROF_EVENT(250, "mc_STOREV16");
4303
4304 #ifndef PERF_FAST_STOREV
4305 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4306 #else
4307 {
4308 UWord sm_off, vabits8;
4309 SecMap* sm;
4310
4311 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4312 PROF_EVENT(251, "mc_STOREV16-slow1");
4313 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4314 return;
4315 }
4316
4317 sm = get_secmap_for_reading_low(a);
4318 sm_off = SM_OFF(a);
4319 vabits8 = sm->vabits8[sm_off];
4320 if (LIKELY( !is_distinguished_sm(sm) &&
4321 (VA_BITS8_DEFINED == vabits8 ||
4322 VA_BITS8_UNDEFINED == vabits8) ))
4323 {
4324 /* Handle common case quickly: a is suitably aligned, */
4325 /* is mapped, and is addressible. */
4326 // Convert full V-bits in register to compact 2-bit form.
4327 if (V_BITS16_DEFINED == vbits16) {
4328 insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED ,
4329 &(sm->vabits8[sm_off]) );
4330 } else if (V_BITS16_UNDEFINED == vbits16) {
4331 insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
4332 &(sm->vabits8[sm_off]) );
4333 } else {
4334 /* Slow but general case -- writing partially defined bytes. */
4335 PROF_EVENT(252, "mc_STOREV16-slow2");
4336 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4337 }
4338 } else {
4339 /* Slow but general case. */
4340 PROF_EVENT(253, "mc_STOREV16-slow3");
4341 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4342 }
4343 }
4344 #endif
4345 }
4346
MC_(helperc_STOREV16be)4347 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
4348 {
4349 mc_STOREV16(a, vbits16, True);
4350 }
MC_(helperc_STOREV16le)4351 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
4352 {
4353 mc_STOREV16(a, vbits16, False);
4354 }
4355
4356
4357 /* ------------------------ Size = 1 ------------------------ */
4358 /* Note: endianness is irrelevant for size == 1 */
4359
4360 VG_REGPARM(1)
MC_(helperc_LOADV8)4361 UWord MC_(helperc_LOADV8) ( Addr a )
4362 {
4363 PROF_EVENT(260, "mc_LOADV8");
4364
4365 #ifndef PERF_FAST_LOADV
4366 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4367 #else
4368 {
4369 UWord sm_off, vabits8;
4370 SecMap* sm;
4371
4372 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4373 PROF_EVENT(261, "mc_LOADV8-slow1");
4374 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4375 }
4376
4377 sm = get_secmap_for_reading_low(a);
4378 sm_off = SM_OFF(a);
4379 vabits8 = sm->vabits8[sm_off];
4380 // Convert V bits from compact memory form to expanded register form
4381 // Handle common case quickly: a is mapped, and the entire
4382 // word32 it lives in is addressible.
4383 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS8_DEFINED; }
4384 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
4385 else {
4386 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4387 // the single byte.
4388 UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
4389 if (vabits2 == VA_BITS2_DEFINED ) { return V_BITS8_DEFINED; }
4390 else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
4391 else {
4392 /* Slow case: the byte is not all-defined or all-undefined. */
4393 PROF_EVENT(262, "mc_LOADV8-slow2");
4394 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4395 }
4396 }
4397 }
4398 #endif
4399 }
4400
4401
4402 VG_REGPARM(2)
MC_(helperc_STOREV8)4403 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
4404 {
4405 PROF_EVENT(270, "mc_STOREV8");
4406
4407 #ifndef PERF_FAST_STOREV
4408 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4409 #else
4410 {
4411 UWord sm_off, vabits8;
4412 SecMap* sm;
4413
4414 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4415 PROF_EVENT(271, "mc_STOREV8-slow1");
4416 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4417 return;
4418 }
4419
4420 sm = get_secmap_for_reading_low(a);
4421 sm_off = SM_OFF(a);
4422 vabits8 = sm->vabits8[sm_off];
4423 if (LIKELY
4424 ( !is_distinguished_sm(sm) &&
4425 ( (VA_BITS8_DEFINED == vabits8 || VA_BITS8_UNDEFINED == vabits8)
4426 || (VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8))
4427 )
4428 )
4429 )
4430 {
4431 /* Handle common case quickly: a is mapped, the entire word32 it
4432 lives in is addressible. */
4433 // Convert full V-bits in register to compact 2-bit form.
4434 if (V_BITS8_DEFINED == vbits8) {
4435 insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
4436 &(sm->vabits8[sm_off]) );
4437 } else if (V_BITS8_UNDEFINED == vbits8) {
4438 insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
4439 &(sm->vabits8[sm_off]) );
4440 } else {
4441 /* Slow but general case -- writing partially defined bytes. */
4442 PROF_EVENT(272, "mc_STOREV8-slow2");
4443 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4444 }
4445 } else {
4446 /* Slow but general case. */
4447 PROF_EVENT(273, "mc_STOREV8-slow3");
4448 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4449 }
4450 }
4451 #endif
4452 }
4453
4454
4455 /*------------------------------------------------------------*/
4456 /*--- Functions called directly from generated code: ---*/
4457 /*--- Value-check failure handlers. ---*/
4458 /*------------------------------------------------------------*/
4459
4460 /* Call these ones when an origin is available ... */
4461 VG_REGPARM(1)
MC_(helperc_value_check0_fail_w_o)4462 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
4463 MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
4464 }
4465
4466 VG_REGPARM(1)
MC_(helperc_value_check1_fail_w_o)4467 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
4468 MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
4469 }
4470
4471 VG_REGPARM(1)
MC_(helperc_value_check4_fail_w_o)4472 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
4473 MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
4474 }
4475
4476 VG_REGPARM(1)
MC_(helperc_value_check8_fail_w_o)4477 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
4478 MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
4479 }
4480
4481 VG_REGPARM(2)
MC_(helperc_value_checkN_fail_w_o)4482 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
4483 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
4484 }
4485
4486 /* ... and these when an origin isn't available. */
4487
4488 VG_REGPARM(0)
MC_(helperc_value_check0_fail_no_o)4489 void MC_(helperc_value_check0_fail_no_o) ( void ) {
4490 MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
4491 }
4492
4493 VG_REGPARM(0)
MC_(helperc_value_check1_fail_no_o)4494 void MC_(helperc_value_check1_fail_no_o) ( void ) {
4495 MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
4496 }
4497
4498 VG_REGPARM(0)
MC_(helperc_value_check4_fail_no_o)4499 void MC_(helperc_value_check4_fail_no_o) ( void ) {
4500 MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
4501 }
4502
4503 VG_REGPARM(0)
MC_(helperc_value_check8_fail_no_o)4504 void MC_(helperc_value_check8_fail_no_o) ( void ) {
4505 MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
4506 }
4507
4508 VG_REGPARM(1)
MC_(helperc_value_checkN_fail_no_o)4509 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
4510 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
4511 }
4512
4513
4514 /*------------------------------------------------------------*/
4515 /*--- Metadata get/set functions, for client requests. ---*/
4516 /*------------------------------------------------------------*/
4517
4518 // Nb: this expands the V+A bits out into register-form V bits, even though
4519 // they're in memory. This is for backward compatibility, and because it's
4520 // probably what the user wants.
4521
4522 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
4523 error [no longer used], 3 == addressing error. */
4524 /* Nb: We used to issue various definedness/addressability errors from here,
4525 but we took them out because they ranged from not-very-helpful to
4526 downright annoying, and they complicated the error data structures. */
mc_get_or_set_vbits_for_client(Addr a,Addr vbits,SizeT szB,Bool setting,Bool is_client_request)4527 static Int mc_get_or_set_vbits_for_client (
4528 Addr a,
4529 Addr vbits,
4530 SizeT szB,
4531 Bool setting, /* True <=> set vbits, False <=> get vbits */
4532 Bool is_client_request /* True <=> real user request
4533 False <=> internal call from gdbserver */
4534 )
4535 {
4536 SizeT i;
4537 Bool ok;
4538 UChar vbits8;
4539
4540 /* Check that arrays are addressible before doing any getting/setting.
4541 vbits to be checked only for real user request. */
4542 for (i = 0; i < szB; i++) {
4543 if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
4544 (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
4545 return 3;
4546 }
4547 }
4548
4549 /* Do the copy */
4550 if (setting) {
4551 /* setting */
4552 for (i = 0; i < szB; i++) {
4553 ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
4554 tl_assert(ok);
4555 }
4556 } else {
4557 /* getting */
4558 for (i = 0; i < szB; i++) {
4559 ok = get_vbits8(a + i, &vbits8);
4560 tl_assert(ok);
4561 ((UChar*)vbits)[i] = vbits8;
4562 }
4563 if (is_client_request)
4564 // The bytes in vbits[] have now been set, so mark them as such.
4565 MC_(make_mem_defined)(vbits, szB);
4566 }
4567
4568 return 1;
4569 }
4570
4571
4572 /*------------------------------------------------------------*/
4573 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
4574 /*------------------------------------------------------------*/
4575
4576 /* For the memory leak detector, say whether an entire 64k chunk of
4577 address space is possibly in use, or not. If in doubt return
4578 True.
4579 */
MC_(is_within_valid_secondary)4580 Bool MC_(is_within_valid_secondary) ( Addr a )
4581 {
4582 SecMap* sm = maybe_get_secmap_for ( a );
4583 if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]
4584 || MC_(in_ignored_range)(a)) {
4585 /* Definitely not in use. */
4586 return False;
4587 } else {
4588 return True;
4589 }
4590 }
4591
4592
4593 /* For the memory leak detector, say whether or not a given word
4594 address is to be regarded as valid. */
MC_(is_valid_aligned_word)4595 Bool MC_(is_valid_aligned_word) ( Addr a )
4596 {
4597 tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
4598 tl_assert(VG_IS_WORD_ALIGNED(a));
4599 if (is_mem_defined( a, sizeof(UWord), NULL, NULL) == MC_Ok
4600 && !MC_(in_ignored_range)(a)) {
4601 return True;
4602 } else {
4603 return False;
4604 }
4605 }
4606
4607
4608 /*------------------------------------------------------------*/
4609 /*--- Initialisation ---*/
4610 /*------------------------------------------------------------*/
4611
init_shadow_memory(void)4612 static void init_shadow_memory ( void )
4613 {
4614 Int i;
4615 SecMap* sm;
4616
4617 tl_assert(V_BIT_UNDEFINED == 1);
4618 tl_assert(V_BIT_DEFINED == 0);
4619 tl_assert(V_BITS8_UNDEFINED == 0xFF);
4620 tl_assert(V_BITS8_DEFINED == 0);
4621
4622 /* Build the 3 distinguished secondaries */
4623 sm = &sm_distinguished[SM_DIST_NOACCESS];
4624 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
4625
4626 sm = &sm_distinguished[SM_DIST_UNDEFINED];
4627 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
4628
4629 sm = &sm_distinguished[SM_DIST_DEFINED];
4630 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
4631
4632 /* Set up the primary map. */
4633 /* These entries gradually get overwritten as the used address
4634 space expands. */
4635 for (i = 0; i < N_PRIMARY_MAP; i++)
4636 primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
4637
4638 /* Auxiliary primary maps */
4639 init_auxmap_L1_L2();
4640
4641 /* auxmap_size = auxmap_used = 0;
4642 no ... these are statically initialised */
4643
4644 /* Secondary V bit table */
4645 secVBitTable = createSecVBitTable();
4646 }
4647
4648
4649 /*------------------------------------------------------------*/
4650 /*--- Sanity check machinery (permanently engaged) ---*/
4651 /*------------------------------------------------------------*/
4652
mc_cheap_sanity_check(void)4653 static Bool mc_cheap_sanity_check ( void )
4654 {
4655 n_sanity_cheap++;
4656 PROF_EVENT(490, "cheap_sanity_check");
4657 /* Check for sane operating level */
4658 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
4659 return False;
4660 /* nothing else useful we can rapidly check */
4661 return True;
4662 }
4663
mc_expensive_sanity_check(void)4664 static Bool mc_expensive_sanity_check ( void )
4665 {
4666 Int i;
4667 Word n_secmaps_found;
4668 SecMap* sm;
4669 HChar* errmsg;
4670 Bool bad = False;
4671
4672 if (0) VG_(printf)("expensive sanity check\n");
4673 if (0) return True;
4674
4675 n_sanity_expensive++;
4676 PROF_EVENT(491, "expensive_sanity_check");
4677
4678 /* Check for sane operating level */
4679 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
4680 return False;
4681
4682 /* Check that the 3 distinguished SMs are still as they should be. */
4683
4684 /* Check noaccess DSM. */
4685 sm = &sm_distinguished[SM_DIST_NOACCESS];
4686 for (i = 0; i < SM_CHUNKS; i++)
4687 if (sm->vabits8[i] != VA_BITS8_NOACCESS)
4688 bad = True;
4689
4690 /* Check undefined DSM. */
4691 sm = &sm_distinguished[SM_DIST_UNDEFINED];
4692 for (i = 0; i < SM_CHUNKS; i++)
4693 if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
4694 bad = True;
4695
4696 /* Check defined DSM. */
4697 sm = &sm_distinguished[SM_DIST_DEFINED];
4698 for (i = 0; i < SM_CHUNKS; i++)
4699 if (sm->vabits8[i] != VA_BITS8_DEFINED)
4700 bad = True;
4701
4702 if (bad) {
4703 VG_(printf)("memcheck expensive sanity: "
4704 "distinguished_secondaries have changed\n");
4705 return False;
4706 }
4707
4708 /* If we're not checking for undefined value errors, the secondary V bit
4709 * table should be empty. */
4710 if (MC_(clo_mc_level) == 1) {
4711 if (0 != VG_(OSetGen_Size)(secVBitTable))
4712 return False;
4713 }
4714
4715 /* check the auxiliary maps, very thoroughly */
4716 n_secmaps_found = 0;
4717 errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
4718 if (errmsg) {
4719 VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
4720 return False;
4721 }
4722
4723 /* n_secmaps_found is now the number referred to by the auxiliary
4724 primary map. Now add on the ones referred to by the main
4725 primary map. */
4726 for (i = 0; i < N_PRIMARY_MAP; i++) {
4727 if (primary_map[i] == NULL) {
4728 bad = True;
4729 } else {
4730 if (!is_distinguished_sm(primary_map[i]))
4731 n_secmaps_found++;
4732 }
4733 }
4734
4735 /* check that the number of secmaps issued matches the number that
4736 are reachable (iow, no secmap leaks) */
4737 if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
4738 bad = True;
4739
4740 if (bad) {
4741 VG_(printf)("memcheck expensive sanity: "
4742 "apparent secmap leakage\n");
4743 return False;
4744 }
4745
4746 if (bad) {
4747 VG_(printf)("memcheck expensive sanity: "
4748 "auxmap covers wrong address space\n");
4749 return False;
4750 }
4751
4752 /* there is only one pointer to each secmap (expensive) */
4753
4754 return True;
4755 }
4756
4757 /*------------------------------------------------------------*/
4758 /*--- Command line args ---*/
4759 /*------------------------------------------------------------*/
4760
4761 Bool MC_(clo_partial_loads_ok) = False;
4762 Long MC_(clo_freelist_vol) = 20*1000*1000LL;
4763 Long MC_(clo_freelist_big_blocks) = 1*1000*1000LL;
4764 LeakCheckMode MC_(clo_leak_check) = LC_Summary;
4765 VgRes MC_(clo_leak_resolution) = Vg_HighRes;
4766 Bool MC_(clo_show_reachable) = False;
4767 Bool MC_(clo_show_possibly_lost) = True;
4768 Bool MC_(clo_workaround_gcc296_bugs) = False;
4769 Int MC_(clo_malloc_fill) = -1;
4770 Int MC_(clo_free_fill) = -1;
4771 Int MC_(clo_mc_level) = 2;
4772 const char* MC_(clo_summary_file) = NULL;
4773
4774
mc_process_cmd_line_options(Char * arg)4775 static Bool mc_process_cmd_line_options(Char* arg)
4776 {
4777 Char* tmp_str;
4778
4779 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
4780
4781 /* Set MC_(clo_mc_level):
4782 1 = A bit tracking only
4783 2 = A and V bit tracking, but no V bit origins
4784 3 = A and V bit tracking, and V bit origins
4785
4786 Do this by inspecting --undef-value-errors= and
4787 --track-origins=. Reject the case --undef-value-errors=no
4788 --track-origins=yes as meaningless.
4789 */
4790 if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
4791 if (MC_(clo_mc_level) == 3) {
4792 goto bad_level;
4793 } else {
4794 MC_(clo_mc_level) = 1;
4795 return True;
4796 }
4797 }
4798 if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
4799 if (MC_(clo_mc_level) == 1)
4800 MC_(clo_mc_level) = 2;
4801 return True;
4802 }
4803 if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
4804 if (MC_(clo_mc_level) == 3)
4805 MC_(clo_mc_level) = 2;
4806 return True;
4807 }
4808 if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
4809 if (MC_(clo_mc_level) == 1) {
4810 goto bad_level;
4811 } else {
4812 MC_(clo_mc_level) = 3;
4813 return True;
4814 }
4815 }
4816
4817 if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
4818 else if VG_BOOL_CLO(arg, "--show-reachable", MC_(clo_show_reachable)) {}
4819 else if VG_BOOL_CLO(arg, "--show-possibly-lost",
4820 MC_(clo_show_possibly_lost)) {}
4821 else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
4822 MC_(clo_workaround_gcc296_bugs)) {}
4823
4824 else if VG_BINT_CLO(arg, "--freelist-vol", MC_(clo_freelist_vol),
4825 0, 10*1000*1000*1000LL) {}
4826
4827 else if VG_BINT_CLO(arg, "--freelist-big-blocks",
4828 MC_(clo_freelist_big_blocks),
4829 0, 10*1000*1000*1000LL) {}
4830
4831 else if VG_XACT_CLO(arg, "--leak-check=no",
4832 MC_(clo_leak_check), LC_Off) {}
4833 else if VG_XACT_CLO(arg, "--leak-check=summary",
4834 MC_(clo_leak_check), LC_Summary) {}
4835 else if VG_XACT_CLO(arg, "--leak-check=yes",
4836 MC_(clo_leak_check), LC_Full) {}
4837 else if VG_XACT_CLO(arg, "--leak-check=full",
4838 MC_(clo_leak_check), LC_Full) {}
4839
4840 else if VG_XACT_CLO(arg, "--leak-resolution=low",
4841 MC_(clo_leak_resolution), Vg_LowRes) {}
4842 else if VG_XACT_CLO(arg, "--leak-resolution=med",
4843 MC_(clo_leak_resolution), Vg_MedRes) {}
4844 else if VG_XACT_CLO(arg, "--leak-resolution=high",
4845 MC_(clo_leak_resolution), Vg_HighRes) {}
4846
4847 else if VG_STR_CLO(arg, "--summary-file", tmp_str) {
4848 MC_(clo_summary_file) = VG_(strdup)("clo_summary_file", tmp_str);
4849 }
4850 else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
4851 Int i;
4852 Bool ok = parse_ignore_ranges(tmp_str);
4853 if (!ok)
4854 return False;
4855 tl_assert(ignoreRanges.used >= 0);
4856 tl_assert(ignoreRanges.used < M_IGNORE_RANGES);
4857 for (i = 0; i < ignoreRanges.used; i++) {
4858 Addr s = ignoreRanges.start[i];
4859 Addr e = ignoreRanges.end[i];
4860 Addr limit = 0x4000000; /* 64M - entirely arbitrary limit */
4861 if (e <= s) {
4862 VG_(message)(Vg_DebugMsg,
4863 "ERROR: --ignore-ranges: end <= start in range:\n");
4864 VG_(message)(Vg_DebugMsg,
4865 " 0x%lx-0x%lx\n", s, e);
4866 return False;
4867 }
4868 if (e - s > limit) {
4869 VG_(message)(Vg_DebugMsg,
4870 "ERROR: --ignore-ranges: suspiciously large range:\n");
4871 VG_(message)(Vg_DebugMsg,
4872 " 0x%lx-0x%lx (size %ld)\n", s, e, (UWord)(e-s));
4873 return False;
4874 }
4875 }
4876 }
4877
4878 else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
4879 else if VG_BHEX_CLO(arg, "--free-fill", MC_(clo_free_fill), 0x00,0xFF) {}
4880
4881 else
4882 return VG_(replacement_malloc_process_cmd_line_option)(arg);
4883
4884 return True;
4885
4886
4887 bad_level:
4888 VG_(fmsg_bad_option)(arg,
4889 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
4890 }
4891
mc_print_usage(void)4892 static void mc_print_usage(void)
4893 {
4894 VG_(printf)(
4895 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
4896 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
4897 " --show-reachable=no|yes show reachable blocks in leak check? [no]\n"
4898 " --show-possibly-lost=no|yes show possibly lost blocks in leak check?\n"
4899 " [yes]\n"
4900 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
4901 " --track-origins=no|yes show origins of undefined values? [no]\n"
4902 " --partial-loads-ok=no|yes too hard to explain here; see manual [no]\n"
4903 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
4904 " --freelist-big-blocks=<number> releases first blocks with size >= [1000000]\n"
4905 " --workaround-gcc296-bugs=no|yes self explanatory [no]\n"
4906 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
4907 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
4908 " --free-fill=<hexnumber> fill free'd areas with given value\n"
4909 );
4910 }
4911
mc_print_debug_usage(void)4912 static void mc_print_debug_usage(void)
4913 {
4914 VG_(printf)(
4915 " (none)\n"
4916 );
4917 }
4918
4919
4920 /*------------------------------------------------------------*/
4921 /*--- Client blocks ---*/
4922 /*------------------------------------------------------------*/
4923
4924 /* Client block management:
4925
4926 This is managed as an expanding array of client block descriptors.
4927 Indices of live descriptors are issued to the client, so it can ask
4928 to free them later. Therefore we cannot slide live entries down
4929 over dead ones. Instead we must use free/inuse flags and scan for
4930 an empty slot at allocation time. This in turn means allocation is
4931 relatively expensive, so we hope this does not happen too often.
4932
4933 An unused block has start == size == 0
4934 */
4935
4936 /* type CGenBlock is defined in mc_include.h */
4937
4938 /* This subsystem is self-initialising. */
4939 static UWord cgb_size = 0;
4940 static UWord cgb_used = 0;
4941 static CGenBlock* cgbs = NULL;
4942
4943 /* Stats for this subsystem. */
4944 static ULong cgb_used_MAX = 0; /* Max in use. */
4945 static ULong cgb_allocs = 0; /* Number of allocs. */
4946 static ULong cgb_discards = 0; /* Number of discards. */
4947 static ULong cgb_search = 0; /* Number of searches. */
4948
4949
4950 /* Get access to the client block array. */
MC_(get_ClientBlock_array)4951 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
4952 /*OUT*/UWord* nBlocks )
4953 {
4954 *blocks = cgbs;
4955 *nBlocks = cgb_used;
4956 }
4957
4958
4959 static
alloc_client_block(void)4960 Int alloc_client_block ( void )
4961 {
4962 UWord i, sz_new;
4963 CGenBlock* cgbs_new;
4964
4965 cgb_allocs++;
4966
4967 for (i = 0; i < cgb_used; i++) {
4968 cgb_search++;
4969 if (cgbs[i].start == 0 && cgbs[i].size == 0)
4970 return i;
4971 }
4972
4973 /* Not found. Try to allocate one at the end. */
4974 if (cgb_used < cgb_size) {
4975 cgb_used++;
4976 return cgb_used-1;
4977 }
4978
4979 /* Ok, we have to allocate a new one. */
4980 tl_assert(cgb_used == cgb_size);
4981 sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
4982
4983 cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
4984 for (i = 0; i < cgb_used; i++)
4985 cgbs_new[i] = cgbs[i];
4986
4987 if (cgbs != NULL)
4988 VG_(free)( cgbs );
4989 cgbs = cgbs_new;
4990
4991 cgb_size = sz_new;
4992 cgb_used++;
4993 if (cgb_used > cgb_used_MAX)
4994 cgb_used_MAX = cgb_used;
4995 return cgb_used-1;
4996 }
4997
4998
show_client_block_stats(void)4999 static void show_client_block_stats ( void )
5000 {
5001 VG_(message)(Vg_DebugMsg,
5002 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
5003 cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
5004 );
5005 }
print_monitor_help(void)5006 static void print_monitor_help ( void )
5007 {
5008 VG_(gdb_printf)
5009 (
5010 "\n"
5011 "memcheck monitor commands:\n"
5012 " get_vbits <addr> [<len>]\n"
5013 " returns validity bits for <len> (or 1) bytes at <addr>\n"
5014 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
5015 " Example: get_vbits 0x8049c78 10\n"
5016 " make_memory [noaccess|undefined\n"
5017 " |defined|Definedifaddressable] <addr> [<len>]\n"
5018 " mark <len> (or 1) bytes at <addr> with the given accessibility\n"
5019 " check_memory [addressable|defined] <addr> [<len>]\n"
5020 " check that <len> (or 1) bytes at <addr> have the given accessibility\n"
5021 " and outputs a description of <addr>\n"
5022 " leak_check [full*|summary] [reachable|possibleleak*|definiteleak]\n"
5023 " [increased*|changed|any]\n"
5024 " * = defaults\n"
5025 " Examples: leak_check\n"
5026 " leak_check summary any\n"
5027 "\n");
5028 }
5029
5030 /* return True if request recognised, False otherwise */
handle_gdb_monitor_command(ThreadId tid,Char * req)5031 static Bool handle_gdb_monitor_command (ThreadId tid, Char *req)
5032 {
5033 Char* wcmd;
5034 Char s[VG_(strlen(req))]; /* copy for strtok_r */
5035 Char *ssaveptr;
5036
5037 VG_(strcpy) (s, req);
5038
5039 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
5040 /* NB: if possible, avoid introducing a new command below which
5041 starts with the same first letter(s) as an already existing
5042 command. This ensures a shorter abbreviation for the user. */
5043 switch (VG_(keyword_id)
5044 ("help get_vbits leak_check make_memory check_memory",
5045 wcmd, kwd_report_duplicated_matches)) {
5046 case -2: /* multiple matches */
5047 return True;
5048 case -1: /* not found */
5049 return False;
5050 case 0: /* help */
5051 print_monitor_help();
5052 return True;
5053 case 1: { /* get_vbits */
5054 Addr address;
5055 SizeT szB = 1;
5056 VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5057 if (szB != 0) {
5058 UChar vbits;
5059 Int i;
5060 Int unaddressable = 0;
5061 for (i = 0; i < szB; i++) {
5062 Int res = mc_get_or_set_vbits_for_client
5063 (address+i, (Addr) &vbits, 1,
5064 False, /* get them */
5065 False /* is client request */ );
5066 if ((i % 32) == 0 && i != 0)
5067 VG_(gdb_printf) ("\n");
5068 else if ((i % 4) == 0 && i != 0)
5069 VG_(gdb_printf) (" ");
5070 if (res == 1) {
5071 VG_(gdb_printf) ("%02x", vbits);
5072 } else {
5073 tl_assert(3 == res);
5074 unaddressable++;
5075 VG_(gdb_printf) ("__");
5076 }
5077 }
5078 if ((i % 80) != 0)
5079 VG_(gdb_printf) ("\n");
5080 if (unaddressable) {
5081 VG_(gdb_printf)
5082 ("Address %p len %ld has %d bytes unaddressable\n",
5083 (void *)address, szB, unaddressable);
5084 }
5085 }
5086 return True;
5087 }
5088 case 2: { /* leak_check */
5089 Int err = 0;
5090 LeakCheckParams lcp;
5091 Char* kw;
5092
5093 lcp.mode = LC_Full;
5094 lcp.show_reachable = False;
5095 lcp.show_possibly_lost = True;
5096 lcp.deltamode = LCD_Increased;
5097 lcp.requested_by_monitor_command = True;
5098
5099 for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
5100 kw != NULL;
5101 kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
5102 switch (VG_(keyword_id)
5103 ("full summary "
5104 "reachable possibleleak definiteleak "
5105 "increased changed any",
5106 kw, kwd_report_all)) {
5107 case -2: err++; break;
5108 case -1: err++; break;
5109 case 0: /* full */
5110 lcp.mode = LC_Full; break;
5111 case 1: /* summary */
5112 lcp.mode = LC_Summary; break;
5113 case 2: /* reachable */
5114 lcp.show_reachable = True;
5115 lcp.show_possibly_lost = True; break;
5116 case 3: /* possibleleak */
5117 lcp.show_reachable = False;
5118 lcp.show_possibly_lost = True; break;
5119 case 4: /* definiteleak */
5120 lcp.show_reachable = False;
5121 lcp.show_possibly_lost = False; break;
5122 case 5: /* increased */
5123 lcp.deltamode = LCD_Increased; break;
5124 case 6: /* changed */
5125 lcp.deltamode = LCD_Changed; break;
5126 case 7: /* any */
5127 lcp.deltamode = LCD_Any; break;
5128 default:
5129 tl_assert (0);
5130 }
5131 }
5132 if (!err)
5133 MC_(detect_memory_leaks)(tid, lcp);
5134 return True;
5135 }
5136
5137 case 3: { /* make_memory */
5138 Addr address;
5139 SizeT szB = 1;
5140 int kwdid = VG_(keyword_id)
5141 ("noaccess undefined defined Definedifaddressable",
5142 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5143 VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5144 if (address == (Addr) 0 && szB == 0) return True;
5145 switch (kwdid) {
5146 case -2: break;
5147 case -1: break;
5148 case 0: MC_(make_mem_noaccess) (address, szB); break;
5149 case 1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
5150 MC_OKIND_USER ); break;
5151 case 2: MC_(make_mem_defined) ( address, szB ); break;
5152 case 3: make_mem_defined_if_addressable ( address, szB ); break;;
5153 default: tl_assert(0);
5154 }
5155 return True;
5156 }
5157
5158 case 4: { /* check_memory */
5159 Addr address;
5160 SizeT szB = 1;
5161 Addr bad_addr;
5162 UInt okind;
5163 char* src;
5164 UInt otag;
5165 UInt ecu;
5166 ExeContext* origin_ec;
5167 MC_ReadResult res;
5168
5169 int kwdid = VG_(keyword_id)
5170 ("addressable defined",
5171 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5172 VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5173 if (address == (Addr) 0 && szB == 0) return True;
5174 switch (kwdid) {
5175 case -2: break;
5176 case -1: break;
5177 case 0:
5178 if (is_mem_addressable ( address, szB, &bad_addr ))
5179 VG_(gdb_printf) ("Address %p len %ld addressable\n",
5180 (void *)address, szB);
5181 else
5182 VG_(gdb_printf)
5183 ("Address %p len %ld not addressable:\nbad address %p\n",
5184 (void *)address, szB, (void *) bad_addr);
5185 MC_(pp_describe_addr) (address);
5186 break;
5187 case 1: res = is_mem_defined ( address, szB, &bad_addr, &otag );
5188 if (MC_AddrErr == res)
5189 VG_(gdb_printf)
5190 ("Address %p len %ld not addressable:\nbad address %p\n",
5191 (void *)address, szB, (void *) bad_addr);
5192 else if (MC_ValueErr == res) {
5193 okind = otag & 3;
5194 switch (okind) {
5195 case MC_OKIND_STACK:
5196 src = " was created by a stack allocation"; break;
5197 case MC_OKIND_HEAP:
5198 src = " was created by a heap allocation"; break;
5199 case MC_OKIND_USER:
5200 src = " was created by a client request"; break;
5201 case MC_OKIND_UNKNOWN:
5202 src = ""; break;
5203 default: tl_assert(0);
5204 }
5205 VG_(gdb_printf)
5206 ("Address %p len %ld not defined:\n"
5207 "Uninitialised value at %p%s\n",
5208 (void *)address, szB, (void *) bad_addr, src);
5209 ecu = otag & ~3;
5210 if (VG_(is_plausible_ECU)(ecu)) {
5211 origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
5212 VG_(pp_ExeContext)( origin_ec );
5213 }
5214 }
5215 else
5216 VG_(gdb_printf) ("Address %p len %ld defined\n",
5217 (void *)address, szB);
5218 MC_(pp_describe_addr) (address);
5219 break;
5220 default: tl_assert(0);
5221 }
5222 return True;
5223 }
5224
5225 default:
5226 tl_assert(0);
5227 return False;
5228 }
5229 }
5230
5231 /*------------------------------------------------------------*/
5232 /*--- Client requests ---*/
5233 /*------------------------------------------------------------*/
5234
mc_handle_client_request(ThreadId tid,UWord * arg,UWord * ret)5235 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
5236 {
5237 Int i;
5238 Bool ok;
5239 Addr bad_addr;
5240
5241 if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
5242 && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
5243 && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
5244 && VG_USERREQ__FREELIKE_BLOCK != arg[0]
5245 && VG_USERREQ__CREATE_MEMPOOL != arg[0]
5246 && VG_USERREQ__DESTROY_MEMPOOL != arg[0]
5247 && VG_USERREQ__MEMPOOL_ALLOC != arg[0]
5248 && VG_USERREQ__MEMPOOL_FREE != arg[0]
5249 && VG_USERREQ__MEMPOOL_TRIM != arg[0]
5250 && VG_USERREQ__MOVE_MEMPOOL != arg[0]
5251 && VG_USERREQ__MEMPOOL_CHANGE != arg[0]
5252 && VG_USERREQ__MEMPOOL_EXISTS != arg[0]
5253 && VG_USERREQ__GDB_MONITOR_COMMAND != arg[0])
5254 return False;
5255
5256 switch (arg[0]) {
5257 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE:
5258 ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
5259 if (!ok)
5260 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
5261 *ret = ok ? (UWord)NULL : bad_addr;
5262 break;
5263
5264 case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
5265 Bool errorV = False;
5266 Addr bad_addrV = 0;
5267 UInt otagV = 0;
5268 Bool errorA = False;
5269 Addr bad_addrA = 0;
5270 is_mem_defined_comprehensive(
5271 arg[1], arg[2],
5272 &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
5273 );
5274 if (errorV) {
5275 MC_(record_user_error) ( tid, bad_addrV,
5276 /*isAddrErr*/False, otagV );
5277 }
5278 if (errorA) {
5279 MC_(record_user_error) ( tid, bad_addrA,
5280 /*isAddrErr*/True, 0 );
5281 }
5282 /* Return the lower of the two erring addresses, if any. */
5283 *ret = 0;
5284 if (errorV && !errorA) {
5285 *ret = bad_addrV;
5286 }
5287 if (!errorV && errorA) {
5288 *ret = bad_addrA;
5289 }
5290 if (errorV && errorA) {
5291 *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
5292 }
5293 break;
5294 }
5295
5296 case VG_USERREQ__DO_LEAK_CHECK: {
5297 LeakCheckParams lcp;
5298
5299 if (arg[1] == 0)
5300 lcp.mode = LC_Full;
5301 else if (arg[1] == 1)
5302 lcp.mode = LC_Summary;
5303 else {
5304 VG_(message)(Vg_UserMsg,
5305 "Warning: unknown memcheck leak search mode\n");
5306 lcp.mode = LC_Full;
5307 }
5308
5309 lcp.show_reachable = MC_(clo_show_reachable);
5310 lcp.show_possibly_lost = MC_(clo_show_possibly_lost);
5311
5312 if (arg[2] == 0)
5313 lcp.deltamode = LCD_Any;
5314 else if (arg[2] == 1)
5315 lcp.deltamode = LCD_Increased;
5316 else if (arg[2] == 2)
5317 lcp.deltamode = LCD_Changed;
5318 else {
5319 VG_(message)
5320 (Vg_UserMsg,
5321 "Warning: unknown memcheck leak search deltamode\n");
5322 lcp.deltamode = LCD_Any;
5323 }
5324 lcp.requested_by_monitor_command = False;
5325
5326 MC_(detect_memory_leaks)(tid, lcp);
5327 *ret = 0; /* return value is meaningless */
5328 break;
5329 }
5330
5331 case VG_USERREQ__MAKE_MEM_NOACCESS:
5332 MC_(make_mem_noaccess) ( arg[1], arg[2] );
5333 *ret = -1;
5334 break;
5335
5336 case VG_USERREQ__MAKE_MEM_UNDEFINED:
5337 make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
5338 MC_OKIND_USER );
5339 *ret = -1;
5340 break;
5341
5342 case VG_USERREQ__MAKE_MEM_DEFINED:
5343 MC_(make_mem_defined) ( arg[1], arg[2] );
5344 *ret = -1;
5345 break;
5346
5347 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
5348 make_mem_defined_if_addressable ( arg[1], arg[2] );
5349 *ret = -1;
5350 break;
5351
5352 case VG_USERREQ__CREATE_BLOCK: /* describe a block */
5353 if (arg[1] != 0 && arg[2] != 0) {
5354 i = alloc_client_block();
5355 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
5356 cgbs[i].start = arg[1];
5357 cgbs[i].size = arg[2];
5358 cgbs[i].desc = VG_(strdup)("mc.mhcr.1", (Char *)arg[3]);
5359 cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
5360 *ret = i;
5361 } else
5362 *ret = -1;
5363 break;
5364
5365 case VG_USERREQ__DISCARD: /* discard */
5366 if (cgbs == NULL
5367 || arg[2] >= cgb_used ||
5368 (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
5369 *ret = 1;
5370 } else {
5371 tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
5372 cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
5373 VG_(free)(cgbs[arg[2]].desc);
5374 cgb_discards++;
5375 *ret = 0;
5376 }
5377 break;
5378
5379 case VG_USERREQ__GET_VBITS:
5380 *ret = mc_get_or_set_vbits_for_client
5381 ( arg[1], arg[2], arg[3],
5382 False /* get them */,
5383 True /* is client request */ );
5384 break;
5385
5386 case VG_USERREQ__SET_VBITS:
5387 *ret = mc_get_or_set_vbits_for_client
5388 ( arg[1], arg[2], arg[3],
5389 True /* set them */,
5390 True /* is client request */ );
5391 break;
5392
5393 case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
5394 UWord** argp = (UWord**)arg;
5395 // MC_(bytes_leaked) et al were set by the last leak check (or zero
5396 // if no prior leak checks performed).
5397 *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
5398 *argp[2] = MC_(bytes_dubious);
5399 *argp[3] = MC_(bytes_reachable);
5400 *argp[4] = MC_(bytes_suppressed);
5401 // there is no argp[5]
5402 //*argp[5] = MC_(bytes_indirect);
5403 // XXX need to make *argp[1-4] defined; currently done in the
5404 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
5405 *ret = 0;
5406 return True;
5407 }
5408 case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
5409 UWord** argp = (UWord**)arg;
5410 // MC_(blocks_leaked) et al were set by the last leak check (or zero
5411 // if no prior leak checks performed).
5412 *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
5413 *argp[2] = MC_(blocks_dubious);
5414 *argp[3] = MC_(blocks_reachable);
5415 *argp[4] = MC_(blocks_suppressed);
5416 // there is no argp[5]
5417 //*argp[5] = MC_(blocks_indirect);
5418 // XXX need to make *argp[1-4] defined; currently done in the
5419 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
5420 *ret = 0;
5421 return True;
5422 }
5423 case VG_USERREQ__MALLOCLIKE_BLOCK: {
5424 Addr p = (Addr)arg[1];
5425 SizeT sizeB = arg[2];
5426 //UInt rzB = arg[3]; XXX: unused!
5427 Bool is_zeroed = (Bool)arg[4];
5428
5429 MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
5430 MC_AllocCustom, MC_(malloc_list) );
5431 return True;
5432 }
5433 case VG_USERREQ__RESIZEINPLACE_BLOCK: {
5434 Addr p = (Addr)arg[1];
5435 SizeT oldSizeB = arg[2];
5436 SizeT newSizeB = arg[3];
5437 UInt rzB = arg[4];
5438
5439 MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
5440 return True;
5441 }
5442 case VG_USERREQ__FREELIKE_BLOCK: {
5443 Addr p = (Addr)arg[1];
5444 UInt rzB = arg[2];
5445
5446 MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
5447 return True;
5448 }
5449
5450 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
5451 Char* s = (Char*)arg[1];
5452 Addr dst = (Addr) arg[2];
5453 Addr src = (Addr) arg[3];
5454 SizeT len = (SizeT)arg[4];
5455 MC_(record_overlap_error)(tid, s, src, dst, len);
5456 return True;
5457 }
5458
5459 case VG_USERREQ__CREATE_MEMPOOL: {
5460 Addr pool = (Addr)arg[1];
5461 UInt rzB = arg[2];
5462 Bool is_zeroed = (Bool)arg[3];
5463
5464 MC_(create_mempool) ( pool, rzB, is_zeroed );
5465 return True;
5466 }
5467
5468 case VG_USERREQ__DESTROY_MEMPOOL: {
5469 Addr pool = (Addr)arg[1];
5470
5471 MC_(destroy_mempool) ( pool );
5472 return True;
5473 }
5474
5475 case VG_USERREQ__MEMPOOL_ALLOC: {
5476 Addr pool = (Addr)arg[1];
5477 Addr addr = (Addr)arg[2];
5478 UInt size = arg[3];
5479
5480 MC_(mempool_alloc) ( tid, pool, addr, size );
5481 return True;
5482 }
5483
5484 case VG_USERREQ__MEMPOOL_FREE: {
5485 Addr pool = (Addr)arg[1];
5486 Addr addr = (Addr)arg[2];
5487
5488 MC_(mempool_free) ( pool, addr );
5489 return True;
5490 }
5491
5492 case VG_USERREQ__MEMPOOL_TRIM: {
5493 Addr pool = (Addr)arg[1];
5494 Addr addr = (Addr)arg[2];
5495 UInt size = arg[3];
5496
5497 MC_(mempool_trim) ( pool, addr, size );
5498 return True;
5499 }
5500
5501 case VG_USERREQ__MOVE_MEMPOOL: {
5502 Addr poolA = (Addr)arg[1];
5503 Addr poolB = (Addr)arg[2];
5504
5505 MC_(move_mempool) ( poolA, poolB );
5506 return True;
5507 }
5508
5509 case VG_USERREQ__MEMPOOL_CHANGE: {
5510 Addr pool = (Addr)arg[1];
5511 Addr addrA = (Addr)arg[2];
5512 Addr addrB = (Addr)arg[3];
5513 UInt size = arg[4];
5514
5515 MC_(mempool_change) ( pool, addrA, addrB, size );
5516 return True;
5517 }
5518
5519 case VG_USERREQ__MEMPOOL_EXISTS: {
5520 Addr pool = (Addr)arg[1];
5521
5522 *ret = (UWord) MC_(mempool_exists) ( pool );
5523 return True;
5524 }
5525
5526 case VG_USERREQ__GDB_MONITOR_COMMAND: {
5527 Bool handled = handle_gdb_monitor_command (tid, (Char*)arg[1]);
5528 if (handled)
5529 *ret = 1;
5530 else
5531 *ret = 0;
5532 return handled;
5533 }
5534
5535 default:
5536 VG_(message)(
5537 Vg_UserMsg,
5538 "Warning: unknown memcheck client request code %llx\n",
5539 (ULong)arg[0]
5540 );
5541 return False;
5542 }
5543 return True;
5544 }
5545
5546
5547 /*------------------------------------------------------------*/
5548 /*--- Crude profiling machinery. ---*/
5549 /*------------------------------------------------------------*/
5550
5551 // We track a number of interesting events (using PROF_EVENT)
5552 // if MC_PROFILE_MEMORY is defined.
5553
5554 #ifdef MC_PROFILE_MEMORY
5555
5556 UInt MC_(event_ctr)[N_PROF_EVENTS];
5557 HChar* MC_(event_ctr_name)[N_PROF_EVENTS];
5558
init_prof_mem(void)5559 static void init_prof_mem ( void )
5560 {
5561 Int i;
5562 for (i = 0; i < N_PROF_EVENTS; i++) {
5563 MC_(event_ctr)[i] = 0;
5564 MC_(event_ctr_name)[i] = NULL;
5565 }
5566 }
5567
done_prof_mem(void)5568 static void done_prof_mem ( void )
5569 {
5570 Int i;
5571 Bool spaced = False;
5572 for (i = 0; i < N_PROF_EVENTS; i++) {
5573 if (!spaced && (i % 10) == 0) {
5574 VG_(printf)("\n");
5575 spaced = True;
5576 }
5577 if (MC_(event_ctr)[i] > 0) {
5578 spaced = False;
5579 VG_(printf)( "prof mem event %3d: %9d %s\n",
5580 i, MC_(event_ctr)[i],
5581 MC_(event_ctr_name)[i]
5582 ? MC_(event_ctr_name)[i] : "unnamed");
5583 }
5584 }
5585 }
5586
5587 #else
5588
init_prof_mem(void)5589 static void init_prof_mem ( void ) { }
done_prof_mem(void)5590 static void done_prof_mem ( void ) { }
5591
5592 #endif
5593
5594
5595 /*------------------------------------------------------------*/
5596 /*--- Origin tracking stuff ---*/
5597 /*------------------------------------------------------------*/
5598
5599 /*--------------------------------------------*/
5600 /*--- Origin tracking: load handlers ---*/
5601 /*--------------------------------------------*/
5602
merge_origins(UInt or1,UInt or2)5603 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
5604 return or1 > or2 ? or1 : or2;
5605 }
5606
MC_(helperc_b_load1)5607 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
5608 OCacheLine* line;
5609 UChar descr;
5610 UWord lineoff = oc_line_offset(a);
5611 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
5612
5613 if (OC_ENABLE_ASSERTIONS) {
5614 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5615 }
5616
5617 line = find_OCacheLine( a );
5618
5619 descr = line->descr[lineoff];
5620 if (OC_ENABLE_ASSERTIONS) {
5621 tl_assert(descr < 0x10);
5622 }
5623
5624 if (LIKELY(0 == (descr & (1 << byteoff)))) {
5625 return 0;
5626 } else {
5627 return line->w32[lineoff];
5628 }
5629 }
5630
MC_(helperc_b_load2)5631 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
5632 OCacheLine* line;
5633 UChar descr;
5634 UWord lineoff, byteoff;
5635
5636 if (UNLIKELY(a & 1)) {
5637 /* Handle misaligned case, slowly. */
5638 UInt oLo = (UInt)MC_(helperc_b_load1)( a + 0 );
5639 UInt oHi = (UInt)MC_(helperc_b_load1)( a + 1 );
5640 return merge_origins(oLo, oHi);
5641 }
5642
5643 lineoff = oc_line_offset(a);
5644 byteoff = a & 3; /* 0 or 2 */
5645
5646 if (OC_ENABLE_ASSERTIONS) {
5647 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5648 }
5649 line = find_OCacheLine( a );
5650
5651 descr = line->descr[lineoff];
5652 if (OC_ENABLE_ASSERTIONS) {
5653 tl_assert(descr < 0x10);
5654 }
5655
5656 if (LIKELY(0 == (descr & (3 << byteoff)))) {
5657 return 0;
5658 } else {
5659 return line->w32[lineoff];
5660 }
5661 }
5662
MC_(helperc_b_load4)5663 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
5664 OCacheLine* line;
5665 UChar descr;
5666 UWord lineoff;
5667
5668 if (UNLIKELY(a & 3)) {
5669 /* Handle misaligned case, slowly. */
5670 UInt oLo = (UInt)MC_(helperc_b_load2)( a + 0 );
5671 UInt oHi = (UInt)MC_(helperc_b_load2)( a + 2 );
5672 return merge_origins(oLo, oHi);
5673 }
5674
5675 lineoff = oc_line_offset(a);
5676 if (OC_ENABLE_ASSERTIONS) {
5677 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5678 }
5679
5680 line = find_OCacheLine( a );
5681
5682 descr = line->descr[lineoff];
5683 if (OC_ENABLE_ASSERTIONS) {
5684 tl_assert(descr < 0x10);
5685 }
5686
5687 if (LIKELY(0 == descr)) {
5688 return 0;
5689 } else {
5690 return line->w32[lineoff];
5691 }
5692 }
5693
MC_(helperc_b_load8)5694 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
5695 OCacheLine* line;
5696 UChar descrLo, descrHi, descr;
5697 UWord lineoff;
5698
5699 if (UNLIKELY(a & 7)) {
5700 /* Handle misaligned case, slowly. */
5701 UInt oLo = (UInt)MC_(helperc_b_load4)( a + 0 );
5702 UInt oHi = (UInt)MC_(helperc_b_load4)( a + 4 );
5703 return merge_origins(oLo, oHi);
5704 }
5705
5706 lineoff = oc_line_offset(a);
5707 if (OC_ENABLE_ASSERTIONS) {
5708 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
5709 }
5710
5711 line = find_OCacheLine( a );
5712
5713 descrLo = line->descr[lineoff + 0];
5714 descrHi = line->descr[lineoff + 1];
5715 descr = descrLo | descrHi;
5716 if (OC_ENABLE_ASSERTIONS) {
5717 tl_assert(descr < 0x10);
5718 }
5719
5720 if (LIKELY(0 == descr)) {
5721 return 0; /* both 32-bit chunks are defined */
5722 } else {
5723 UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
5724 UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
5725 return merge_origins(oLo, oHi);
5726 }
5727 }
5728
MC_(helperc_b_load16)5729 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
5730 UInt oLo = (UInt)MC_(helperc_b_load8)( a + 0 );
5731 UInt oHi = (UInt)MC_(helperc_b_load8)( a + 8 );
5732 UInt oBoth = merge_origins(oLo, oHi);
5733 return (UWord)oBoth;
5734 }
5735
5736
5737 /*--------------------------------------------*/
5738 /*--- Origin tracking: store handlers ---*/
5739 /*--------------------------------------------*/
5740
MC_(helperc_b_store1)5741 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
5742 OCacheLine* line;
5743 UWord lineoff = oc_line_offset(a);
5744 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
5745
5746 if (OC_ENABLE_ASSERTIONS) {
5747 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5748 }
5749
5750 line = find_OCacheLine( a );
5751
5752 if (d32 == 0) {
5753 line->descr[lineoff] &= ~(1 << byteoff);
5754 } else {
5755 line->descr[lineoff] |= (1 << byteoff);
5756 line->w32[lineoff] = d32;
5757 }
5758 }
5759
MC_(helperc_b_store2)5760 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
5761 OCacheLine* line;
5762 UWord lineoff, byteoff;
5763
5764 if (UNLIKELY(a & 1)) {
5765 /* Handle misaligned case, slowly. */
5766 MC_(helperc_b_store1)( a + 0, d32 );
5767 MC_(helperc_b_store1)( a + 1, d32 );
5768 return;
5769 }
5770
5771 lineoff = oc_line_offset(a);
5772 byteoff = a & 3; /* 0 or 2 */
5773
5774 if (OC_ENABLE_ASSERTIONS) {
5775 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5776 }
5777
5778 line = find_OCacheLine( a );
5779
5780 if (d32 == 0) {
5781 line->descr[lineoff] &= ~(3 << byteoff);
5782 } else {
5783 line->descr[lineoff] |= (3 << byteoff);
5784 line->w32[lineoff] = d32;
5785 }
5786 }
5787
MC_(helperc_b_store4)5788 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
5789 OCacheLine* line;
5790 UWord lineoff;
5791
5792 if (UNLIKELY(a & 3)) {
5793 /* Handle misaligned case, slowly. */
5794 MC_(helperc_b_store2)( a + 0, d32 );
5795 MC_(helperc_b_store2)( a + 2, d32 );
5796 return;
5797 }
5798
5799 lineoff = oc_line_offset(a);
5800 if (OC_ENABLE_ASSERTIONS) {
5801 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5802 }
5803
5804 line = find_OCacheLine( a );
5805
5806 if (d32 == 0) {
5807 line->descr[lineoff] = 0;
5808 } else {
5809 line->descr[lineoff] = 0xF;
5810 line->w32[lineoff] = d32;
5811 }
5812 }
5813
MC_(helperc_b_store8)5814 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
5815 OCacheLine* line;
5816 UWord lineoff;
5817
5818 if (UNLIKELY(a & 7)) {
5819 /* Handle misaligned case, slowly. */
5820 MC_(helperc_b_store4)( a + 0, d32 );
5821 MC_(helperc_b_store4)( a + 4, d32 );
5822 return;
5823 }
5824
5825 lineoff = oc_line_offset(a);
5826 if (OC_ENABLE_ASSERTIONS) {
5827 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
5828 }
5829
5830 line = find_OCacheLine( a );
5831
5832 if (d32 == 0) {
5833 line->descr[lineoff + 0] = 0;
5834 line->descr[lineoff + 1] = 0;
5835 } else {
5836 line->descr[lineoff + 0] = 0xF;
5837 line->descr[lineoff + 1] = 0xF;
5838 line->w32[lineoff + 0] = d32;
5839 line->w32[lineoff + 1] = d32;
5840 }
5841 }
5842
MC_(helperc_b_store16)5843 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
5844 MC_(helperc_b_store8)( a + 0, d32 );
5845 MC_(helperc_b_store8)( a + 8, d32 );
5846 }
5847
5848
5849 /*--------------------------------------------*/
5850 /*--- Origin tracking: sarp handlers ---*/
5851 /*--------------------------------------------*/
5852
5853 __attribute__((noinline))
ocache_sarp_Set_Origins(Addr a,UWord len,UInt otag)5854 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
5855 if ((a & 1) && len >= 1) {
5856 MC_(helperc_b_store1)( a, otag );
5857 a++;
5858 len--;
5859 }
5860 if ((a & 2) && len >= 2) {
5861 MC_(helperc_b_store2)( a, otag );
5862 a += 2;
5863 len -= 2;
5864 }
5865 if (len >= 4)
5866 tl_assert(0 == (a & 3));
5867 while (len >= 4) {
5868 MC_(helperc_b_store4)( a, otag );
5869 a += 4;
5870 len -= 4;
5871 }
5872 if (len >= 2) {
5873 MC_(helperc_b_store2)( a, otag );
5874 a += 2;
5875 len -= 2;
5876 }
5877 if (len >= 1) {
5878 MC_(helperc_b_store1)( a, otag );
5879 //a++;
5880 len--;
5881 }
5882 tl_assert(len == 0);
5883 }
5884
5885 __attribute__((noinline))
ocache_sarp_Clear_Origins(Addr a,UWord len)5886 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
5887 if ((a & 1) && len >= 1) {
5888 MC_(helperc_b_store1)( a, 0 );
5889 a++;
5890 len--;
5891 }
5892 if ((a & 2) && len >= 2) {
5893 MC_(helperc_b_store2)( a, 0 );
5894 a += 2;
5895 len -= 2;
5896 }
5897 if (len >= 4)
5898 tl_assert(0 == (a & 3));
5899 while (len >= 4) {
5900 MC_(helperc_b_store4)( a, 0 );
5901 a += 4;
5902 len -= 4;
5903 }
5904 if (len >= 2) {
5905 MC_(helperc_b_store2)( a, 0 );
5906 a += 2;
5907 len -= 2;
5908 }
5909 if (len >= 1) {
5910 MC_(helperc_b_store1)( a, 0 );
5911 //a++;
5912 len--;
5913 }
5914 tl_assert(len == 0);
5915 }
5916
5917
5918 /*------------------------------------------------------------*/
5919 /*--- Setup and finalisation ---*/
5920 /*------------------------------------------------------------*/
5921
mc_post_clo_init(void)5922 static void mc_post_clo_init ( void )
5923 {
5924 // timurrrr: removed the check for VG_(clo_xml) here.
5925 if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol))
5926 VG_(message)(Vg_UserMsg,
5927 "Warning: --freelist-big-blocks value %lld has no effect\n"
5928 "as it is >= to --freelist-vol value %lld\n",
5929 MC_(clo_freelist_big_blocks),
5930 MC_(clo_freelist_vol));
5931
5932 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
5933
5934 if (MC_(clo_mc_level) == 3) {
5935 /* We're doing origin tracking. */
5936 # ifdef PERF_FAST_STACK
5937 VG_(track_new_mem_stack_4_w_ECU) ( mc_new_mem_stack_4_w_ECU );
5938 VG_(track_new_mem_stack_8_w_ECU) ( mc_new_mem_stack_8_w_ECU );
5939 VG_(track_new_mem_stack_12_w_ECU) ( mc_new_mem_stack_12_w_ECU );
5940 VG_(track_new_mem_stack_16_w_ECU) ( mc_new_mem_stack_16_w_ECU );
5941 VG_(track_new_mem_stack_32_w_ECU) ( mc_new_mem_stack_32_w_ECU );
5942 VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
5943 VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
5944 VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
5945 VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
5946 # endif
5947 VG_(track_new_mem_stack_w_ECU) ( mc_new_mem_stack_w_ECU );
5948 } else {
5949 /* Not doing origin tracking */
5950 # ifdef PERF_FAST_STACK
5951 VG_(track_new_mem_stack_4) ( mc_new_mem_stack_4 );
5952 VG_(track_new_mem_stack_8) ( mc_new_mem_stack_8 );
5953 VG_(track_new_mem_stack_12) ( mc_new_mem_stack_12 );
5954 VG_(track_new_mem_stack_16) ( mc_new_mem_stack_16 );
5955 VG_(track_new_mem_stack_32) ( mc_new_mem_stack_32 );
5956 VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
5957 VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
5958 VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
5959 VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
5960 # endif
5961 VG_(track_new_mem_stack) ( mc_new_mem_stack );
5962 }
5963
5964 /* This origin tracking cache is huge (~100M), so only initialise
5965 if we need it. */
5966 if (MC_(clo_mc_level) >= 3) {
5967 init_OCache();
5968 tl_assert(ocacheL1 != NULL);
5969 tl_assert(ocacheL2 != NULL);
5970 } else {
5971 tl_assert(ocacheL1 == NULL);
5972 tl_assert(ocacheL2 == NULL);
5973 }
5974 }
5975
print_SM_info(char * type,int n_SMs)5976 static void print_SM_info(char* type, int n_SMs)
5977 {
5978 VG_(message)(Vg_DebugMsg,
5979 " memcheck: SMs: %s = %d (%ldk, %ldM)\n",
5980 type,
5981 n_SMs,
5982 n_SMs * sizeof(SecMap) / 1024UL,
5983 n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
5984 }
5985
mc_fini(Int exitcode)5986 static void mc_fini ( Int exitcode )
5987 {
5988 MC_(print_malloc_stats)();
5989
5990 if (MC_(clo_leak_check) != LC_Off) {
5991 LeakCheckParams lcp;
5992 lcp.mode = MC_(clo_leak_check);
5993 lcp.show_reachable = MC_(clo_show_reachable);
5994 lcp.show_possibly_lost = MC_(clo_show_possibly_lost);
5995 lcp.deltamode = LCD_Any;
5996 lcp.requested_by_monitor_command = False;
5997 MC_(detect_memory_leaks)(1/*bogus ThreadId*/, lcp);
5998 } else {
5999 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6000 VG_(umsg)(
6001 "For a detailed leak analysis, rerun with: --leak-check=full\n"
6002 "\n"
6003 );
6004 }
6005 }
6006
6007 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6008 VG_(message)(Vg_UserMsg,
6009 "For counts of detected and suppressed errors, rerun with: -v\n");
6010 }
6011
6012 if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
6013 && MC_(clo_mc_level) == 2) {
6014 VG_(message)(Vg_UserMsg,
6015 "Use --track-origins=yes to see where "
6016 "uninitialised values come from\n");
6017 }
6018
6019 done_prof_mem();
6020
6021 if (VG_(clo_stats)) {
6022 SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
6023
6024 VG_(message)(Vg_DebugMsg,
6025 " memcheck: sanity checks: %d cheap, %d expensive\n",
6026 n_sanity_cheap, n_sanity_expensive );
6027 VG_(message)(Vg_DebugMsg,
6028 " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n",
6029 n_auxmap_L2_nodes,
6030 n_auxmap_L2_nodes * 64,
6031 n_auxmap_L2_nodes / 16 );
6032 VG_(message)(Vg_DebugMsg,
6033 " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n",
6034 n_auxmap_L1_searches, n_auxmap_L1_cmps,
6035 (10ULL * n_auxmap_L1_cmps)
6036 / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
6037 );
6038 VG_(message)(Vg_DebugMsg,
6039 " memcheck: auxmaps_L2: %lld searches, %lld nodes\n",
6040 n_auxmap_L2_searches, n_auxmap_L2_nodes
6041 );
6042
6043 print_SM_info("n_issued ", n_issued_SMs);
6044 print_SM_info("n_deissued ", n_deissued_SMs);
6045 print_SM_info("max_noaccess ", max_noaccess_SMs);
6046 print_SM_info("max_undefined", max_undefined_SMs);
6047 print_SM_info("max_defined ", max_defined_SMs);
6048 print_SM_info("max_non_DSM ", max_non_DSM_SMs);
6049
6050 // Three DSMs, plus the non-DSM ones
6051 max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
6052 // The 3*sizeof(Word) bytes is the AVL node metadata size.
6053 // The 4*sizeof(Word) bytes is the malloc metadata size.
6054 // Hardwiring these sizes in sucks, but I don't see how else to do it.
6055 max_secVBit_szB = max_secVBit_nodes *
6056 (sizeof(SecVBitNode) + 3*sizeof(Word) + 4*sizeof(Word));
6057 max_shmem_szB = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
6058
6059 VG_(message)(Vg_DebugMsg,
6060 " memcheck: max sec V bit nodes: %d (%ldk, %ldM)\n",
6061 max_secVBit_nodes, max_secVBit_szB / 1024,
6062 max_secVBit_szB / (1024 * 1024));
6063 VG_(message)(Vg_DebugMsg,
6064 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
6065 sec_vbits_new_nodes + sec_vbits_updates,
6066 sec_vbits_new_nodes, sec_vbits_updates );
6067 VG_(message)(Vg_DebugMsg,
6068 " memcheck: max shadow mem size: %ldk, %ldM\n",
6069 max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
6070
6071 if (MC_(clo_mc_level) >= 3) {
6072 VG_(message)(Vg_DebugMsg,
6073 " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n",
6074 stats_ocacheL1_find,
6075 stats_ocacheL1_misses,
6076 stats_ocacheL1_lossage );
6077 VG_(message)(Vg_DebugMsg,
6078 " ocacheL1: %'12lu at 0 %'12lu at 1\n",
6079 stats_ocacheL1_find - stats_ocacheL1_misses
6080 - stats_ocacheL1_found_at_1
6081 - stats_ocacheL1_found_at_N,
6082 stats_ocacheL1_found_at_1 );
6083 VG_(message)(Vg_DebugMsg,
6084 " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n",
6085 stats_ocacheL1_found_at_N,
6086 stats_ocacheL1_movefwds );
6087 VG_(message)(Vg_DebugMsg,
6088 " ocacheL1: %'12lu sizeB %'12u useful\n",
6089 (UWord)sizeof(OCache),
6090 4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
6091 VG_(message)(Vg_DebugMsg,
6092 " ocacheL2: %'12lu refs %'12lu misses\n",
6093 stats__ocacheL2_refs,
6094 stats__ocacheL2_misses );
6095 VG_(message)(Vg_DebugMsg,
6096 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
6097 stats__ocacheL2_n_nodes_max,
6098 stats__ocacheL2_n_nodes );
6099 VG_(message)(Vg_DebugMsg,
6100 " niacache: %'12lu refs %'12lu misses\n",
6101 stats__nia_cache_queries, stats__nia_cache_misses);
6102 } else {
6103 tl_assert(ocacheL1 == NULL);
6104 tl_assert(ocacheL2 == NULL);
6105 }
6106 }
6107
6108 if (0) {
6109 VG_(message)(Vg_DebugMsg,
6110 "------ Valgrind's client block stats follow ---------------\n" );
6111 show_client_block_stats();
6112 }
6113 }
6114
6115 /* mark the given addr/len unaddressable for watchpoint implementation
6116 The PointKind will be handled at access time */
mc_mark_unaddressable_for_watchpoint(PointKind kind,Bool insert,Addr addr,SizeT len)6117 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
6118 Addr addr, SizeT len)
6119 {
6120 /* GDBTD this is somewhat fishy. We might rather have to save the previous
6121 accessibility and definedness in gdbserver so as to allow restoring it
6122 properly. Currently, we assume that the user only watches things
6123 which are properly addressable and defined */
6124 if (insert)
6125 MC_(make_mem_noaccess) (addr, len);
6126 else
6127 MC_(make_mem_defined) (addr, len);
6128 return True;
6129 }
6130
mc_pre_clo_init(void)6131 static void mc_pre_clo_init(void)
6132 {
6133 VG_(details_name) ("Memcheck");
6134 VG_(details_version) (NULL);
6135 VG_(details_description) ("a memory error detector");
6136 VG_(details_copyright_author)(
6137 "Copyright (C) 2002-2011, and GNU GPL'd, by Julian Seward et al.");
6138 VG_(details_bug_reports_to) (VG_BUGS_TO);
6139 VG_(details_avg_translation_sizeB) ( 640 );
6140
6141 VG_(basic_tool_funcs) (mc_post_clo_init,
6142 MC_(instrument),
6143 mc_fini);
6144
6145 VG_(needs_final_IR_tidy_pass) ( MC_(final_tidy) );
6146
6147
6148 VG_(needs_core_errors) ();
6149 VG_(needs_tool_errors) (MC_(eq_Error),
6150 MC_(before_pp_Error),
6151 MC_(pp_Error),
6152 True,/*show TIDs for errors*/
6153 MC_(update_Error_extra),
6154 MC_(is_recognised_suppression),
6155 MC_(read_extra_suppression_info),
6156 MC_(error_matches_suppression),
6157 MC_(get_error_name),
6158 MC_(get_extra_suppression_info));
6159 VG_(needs_libc_freeres) ();
6160 VG_(needs_command_line_options)(mc_process_cmd_line_options,
6161 mc_print_usage,
6162 mc_print_debug_usage);
6163 VG_(needs_client_requests) (mc_handle_client_request);
6164 VG_(needs_sanity_checks) (mc_cheap_sanity_check,
6165 mc_expensive_sanity_check);
6166 VG_(needs_malloc_replacement) (MC_(malloc),
6167 MC_(__builtin_new),
6168 MC_(__builtin_vec_new),
6169 MC_(memalign),
6170 MC_(calloc),
6171 MC_(free),
6172 MC_(__builtin_delete),
6173 MC_(__builtin_vec_delete),
6174 MC_(realloc),
6175 MC_(malloc_usable_size),
6176 MC_MALLOC_REDZONE_SZB );
6177
6178 VG_(needs_xml_output) ();
6179
6180 VG_(track_new_mem_startup) ( mc_new_mem_startup );
6181 VG_(track_new_mem_stack_signal)( make_mem_undefined_w_tid );
6182 // We assume that brk()/sbrk() does not initialise new memory. Is this
6183 // accurate? John Reiser says:
6184 //
6185 // 0) sbrk() can *decrease* process address space. No zero fill is done
6186 // for a decrease, not even the fragment on the high end of the last page
6187 // that is beyond the new highest address. For maximum safety and
6188 // portability, then the bytes in the last page that reside above [the
6189 // new] sbrk(0) should be considered to be uninitialized, but in practice
6190 // it is exceedingly likely that they will retain their previous
6191 // contents.
6192 //
6193 // 1) If an increase is large enough to require new whole pages, then
6194 // those new whole pages (like all new pages) are zero-filled by the
6195 // operating system. So if sbrk(0) already is page aligned, then
6196 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
6197 //
6198 // 2) Any increase that lies within an existing allocated page is not
6199 // changed. So if (x = sbrk(0)) is not page aligned, then
6200 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
6201 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
6202 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
6203 // of them come along for the ride because the operating system deals
6204 // only in whole pages. Again, for maximum safety and portability, then
6205 // anything that lives above [the new] sbrk(0) should be considered
6206 // uninitialized, but in practice will retain previous contents [zero in
6207 // this case.]"
6208 //
6209 // In short:
6210 //
6211 // A key property of sbrk/brk is that new whole pages that are supplied
6212 // by the operating system *do* get initialized to zero.
6213 //
6214 // As for the portability of all this:
6215 //
6216 // sbrk and brk are not POSIX. However, any system that is a derivative
6217 // of *nix has sbrk and brk because there are too many softwares (such as
6218 // the Bourne shell) which rely on the traditional memory map (.text,
6219 // .data+.bss, stack) and the existence of sbrk/brk.
6220 //
6221 // So we should arguably observe all this. However:
6222 // - The current inaccuracy has caused maybe one complaint in seven years(?)
6223 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
6224 // doubt most programmers know the above information.
6225 // So I'm not terribly unhappy with marking it as undefined. --njn.
6226 //
6227 // [More: I think most of what John said only applies to sbrk(). It seems
6228 // that brk() always deals in whole pages. And since this event deals
6229 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
6230 // just mark all memory it allocates as defined.]
6231 //
6232 VG_(track_new_mem_brk) ( make_mem_undefined_w_tid );
6233
6234 // Handling of mmap and mprotect isn't simple (well, it is simple,
6235 // but the justification isn't.) See comments above, just prior to
6236 // mc_new_mem_mmap.
6237 VG_(track_new_mem_mmap) ( mc_new_mem_mmap );
6238 VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
6239
6240 VG_(track_copy_mem_remap) ( MC_(copy_address_range_state) );
6241
6242 VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
6243 VG_(track_die_mem_brk) ( MC_(make_mem_noaccess) );
6244 VG_(track_die_mem_munmap) ( MC_(make_mem_noaccess) );
6245
6246 /* Defer the specification of the new_mem_stack functions to the
6247 post_clo_init function, since we need to first parse the command
6248 line before deciding which set to use. */
6249
6250 # ifdef PERF_FAST_STACK
6251 VG_(track_die_mem_stack_4) ( mc_die_mem_stack_4 );
6252 VG_(track_die_mem_stack_8) ( mc_die_mem_stack_8 );
6253 VG_(track_die_mem_stack_12) ( mc_die_mem_stack_12 );
6254 VG_(track_die_mem_stack_16) ( mc_die_mem_stack_16 );
6255 VG_(track_die_mem_stack_32) ( mc_die_mem_stack_32 );
6256 VG_(track_die_mem_stack_112) ( mc_die_mem_stack_112 );
6257 VG_(track_die_mem_stack_128) ( mc_die_mem_stack_128 );
6258 VG_(track_die_mem_stack_144) ( mc_die_mem_stack_144 );
6259 VG_(track_die_mem_stack_160) ( mc_die_mem_stack_160 );
6260 # endif
6261 VG_(track_die_mem_stack) ( mc_die_mem_stack );
6262
6263 VG_(track_ban_mem_stack) ( MC_(make_mem_noaccess) );
6264
6265 VG_(track_pre_mem_read) ( check_mem_is_defined );
6266 VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
6267 VG_(track_pre_mem_write) ( check_mem_is_addressable );
6268 VG_(track_post_mem_write) ( mc_post_mem_write );
6269
6270 if (MC_(clo_mc_level) >= 2)
6271 VG_(track_pre_reg_read) ( mc_pre_reg_read );
6272
6273 VG_(track_post_reg_write) ( mc_post_reg_write );
6274 VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
6275
6276 VG_(needs_watchpoint) ( mc_mark_unaddressable_for_watchpoint );
6277
6278 init_shadow_memory();
6279 MC_(malloc_list) = VG_(HT_construct)( "MC_(malloc_list)" );
6280 MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
6281 init_prof_mem();
6282
6283 tl_assert( mc_expensive_sanity_check() );
6284
6285 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
6286 tl_assert(sizeof(UWord) == sizeof(Addr));
6287 // Call me paranoid. I don't care.
6288 tl_assert(sizeof(void*) == sizeof(Addr));
6289
6290 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
6291 tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
6292
6293 /* This is small. Always initialise it. */
6294 init_nia_to_ecu_cache();
6295
6296 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
6297 if we need to, since the command line args haven't been
6298 processed yet. Hence defer it to mc_post_clo_init. */
6299 tl_assert(ocacheL1 == NULL);
6300 tl_assert(ocacheL2 == NULL);
6301
6302 /* Check some important stuff. See extensive comments above
6303 re UNALIGNED_OR_HIGH for background. */
6304 # if VG_WORDSIZE == 4
6305 tl_assert(sizeof(void*) == 4);
6306 tl_assert(sizeof(Addr) == 4);
6307 tl_assert(sizeof(UWord) == 4);
6308 tl_assert(sizeof(Word) == 4);
6309 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
6310 tl_assert(MASK(1) == 0UL);
6311 tl_assert(MASK(2) == 1UL);
6312 tl_assert(MASK(4) == 3UL);
6313 tl_assert(MASK(8) == 7UL);
6314 # else
6315 tl_assert(VG_WORDSIZE == 8);
6316 tl_assert(sizeof(void*) == 8);
6317 tl_assert(sizeof(Addr) == 8);
6318 tl_assert(sizeof(UWord) == 8);
6319 tl_assert(sizeof(Word) == 8);
6320 tl_assert(MAX_PRIMARY_ADDRESS == 0x3FFFFFFFFFULL);
6321 tl_assert(MASK(1) == 0xFFFFFFC000000000ULL);
6322 tl_assert(MASK(2) == 0xFFFFFFC000000001ULL);
6323 tl_assert(MASK(4) == 0xFFFFFFC000000003ULL);
6324 tl_assert(MASK(8) == 0xFFFFFFC000000007ULL);
6325 # endif
6326 }
6327
6328 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
6329
6330 /*--------------------------------------------------------------------*/
6331 /*--- end mc_main.c ---*/
6332 /*--------------------------------------------------------------------*/
6333