1
2 /*--------------------------------------------------------------------*/
3 /*--- MemCheck: Maintain bitmaps of memory, tracking the ---*/
4 /*--- accessibility (A) and validity (V) status of each byte. ---*/
5 /*--- mc_main.c ---*/
6 /*--------------------------------------------------------------------*/
7
8 /*
9 This file is part of MemCheck, a heavyweight Valgrind tool for
10 detecting memory errors.
11
12 Copyright (C) 2000-2012 Julian Seward
13 jseward@acm.org
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31 */
32
33 #include "pub_tool_basics.h"
34 #include "pub_tool_aspacemgr.h"
35 #include "pub_tool_gdbserver.h"
36 #include "pub_tool_poolalloc.h"
37 #include "pub_tool_hashtable.h" // For mc_include.h
38 #include "pub_tool_libcbase.h"
39 #include "pub_tool_libcassert.h"
40 #include "pub_tool_libcprint.h"
41 #include "pub_tool_machine.h"
42 #include "pub_tool_mallocfree.h"
43 #include "pub_tool_options.h"
44 #include "pub_tool_oset.h"
45 #include "pub_tool_replacemalloc.h"
46 #include "pub_tool_tooliface.h"
47 #include "pub_tool_threadstate.h"
48
49 #include "mc_include.h"
50 #include "memcheck.h" /* for client requests */
51
52
53 /* Set to 1 to do a little more sanity checking */
54 #define VG_DEBUG_MEMORY 0
55
56 #define DEBUG(fmt, args...) //VG_(printf)(fmt, ## args)
57
58 static void ocache_sarp_Set_Origins ( Addr, UWord, UInt ); /* fwds */
59 static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
60
61
62 /*------------------------------------------------------------*/
63 /*--- Fast-case knobs ---*/
64 /*------------------------------------------------------------*/
65
66 // Comment these out to disable the fast cases (don't just set them to zero).
67
68 #define PERF_FAST_LOADV 1
69 #define PERF_FAST_STOREV 1
70
71 #define PERF_FAST_SARP 1
72
73 #define PERF_FAST_STACK 1
74 #define PERF_FAST_STACK2 1
75
76 /* Change this to 1 to enable assertions on origin tracking cache fast
77 paths */
78 #define OC_ENABLE_ASSERTIONS 0
79
80
81 /*------------------------------------------------------------*/
82 /*--- Comments on the origin tracking implementation ---*/
83 /*------------------------------------------------------------*/
84
85 /* See detailed comment entitled
86 AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
87 which is contained further on in this file. */
88
89
90 /*------------------------------------------------------------*/
91 /*--- V bits and A bits ---*/
92 /*------------------------------------------------------------*/
93
94 /* Conceptually, every byte value has 8 V bits, which track whether Memcheck
95 thinks the corresponding value bit is defined. And every memory byte
96 has an A bit, which tracks whether Memcheck thinks the program can access
97 it safely (ie. it's mapped, and has at least one of the RWX permission bits
98 set). So every N-bit register is shadowed with N V bits, and every memory
99 byte is shadowed with 8 V bits and one A bit.
100
101 In the implementation, we use two forms of compression (compressed V bits
102 and distinguished secondary maps) to avoid the 9-bit-per-byte overhead
103 for memory.
104
105 Memcheck also tracks extra information about each heap block that is
106 allocated, for detecting memory leaks and other purposes.
107 */
108
109 /*------------------------------------------------------------*/
110 /*--- Basic A/V bitmap representation. ---*/
111 /*------------------------------------------------------------*/
112
113 /* All reads and writes are checked against a memory map (a.k.a. shadow
114 memory), which records the state of all memory in the process.
115
116 On 32-bit machines the memory map is organised as follows.
117 The top 16 bits of an address are used to index into a top-level
118 map table, containing 65536 entries. Each entry is a pointer to a
119 second-level map, which records the accesibililty and validity
120 permissions for the 65536 bytes indexed by the lower 16 bits of the
121 address. Each byte is represented by two bits (details are below). So
122 each second-level map contains 16384 bytes. This two-level arrangement
123 conveniently divides the 4G address space into 64k lumps, each size 64k
124 bytes.
125
126 All entries in the primary (top-level) map must point to a valid
127 secondary (second-level) map. Since many of the 64kB chunks will
128 have the same status for every bit -- ie. noaccess (for unused
129 address space) or entirely addressable and defined (for code segments) --
130 there are three distinguished secondary maps, which indicate 'noaccess',
131 'undefined' and 'defined'. For these uniform 64kB chunks, the primary
132 map entry points to the relevant distinguished map. In practice,
133 typically more than half of the addressable memory is represented with
134 the 'undefined' or 'defined' distinguished secondary map, so it gives a
135 good saving. It also lets us set the V+A bits of large address regions
136 quickly in set_address_range_perms().
137
138 On 64-bit machines it's more complicated. If we followed the same basic
139 scheme we'd have a four-level table which would require too many memory
140 accesses. So instead the top-level map table has 2^19 entries (indexed
141 using bits 16..34 of the address); this covers the bottom 32GB. Any
142 accesses above 32GB are handled with a slow, sparse auxiliary table.
143 Valgrind's address space manager tries very hard to keep things below
144 this 32GB barrier so that performance doesn't suffer too much.
145
146 Note that this file has a lot of different functions for reading and
147 writing shadow memory. Only a couple are strictly necessary (eg.
148 get_vabits2 and set_vabits2), most are just specialised for specific
149 common cases to improve performance.
150
151 Aside: the V+A bits are less precise than they could be -- we have no way
152 of marking memory as read-only. It would be great if we could add an
153 extra state VA_BITSn_READONLY. But then we'd have 5 different states,
154 which requires 2.3 bits to hold, and there's no way to do that elegantly
155 -- we'd have to double up to 4 bits of metadata per byte, which doesn't
156 seem worth it.
157 */
158
159 /* --------------- Basic configuration --------------- */
160
161 /* Only change this. N_PRIMARY_MAP *must* be a power of 2. */
162
163 #if VG_WORDSIZE == 4
164
165 /* cover the entire address space */
166 # define N_PRIMARY_BITS 16
167
168 #else
169
170 /* Just handle the first 32G fast and the rest via auxiliary
171 primaries. If you change this, Memcheck will assert at startup.
172 See the definition of UNALIGNED_OR_HIGH for extensive comments. */
173 # define N_PRIMARY_BITS 19
174
175 #endif
176
177
178 /* Do not change this. */
179 #define N_PRIMARY_MAP ( ((UWord)1) << N_PRIMARY_BITS)
180
181 /* Do not change this. */
182 #define MAX_PRIMARY_ADDRESS (Addr)((((Addr)65536) * N_PRIMARY_MAP)-1)
183
184
185 /* --------------- Secondary maps --------------- */
186
187 // Each byte of memory conceptually has an A bit, which indicates its
188 // addressability, and 8 V bits, which indicates its definedness.
189 //
190 // But because very few bytes are partially defined, we can use a nice
191 // compression scheme to reduce the size of shadow memory. Each byte of
192 // memory has 2 bits which indicates its state (ie. V+A bits):
193 //
194 // 00: noaccess (unaddressable but treated as fully defined)
195 // 01: undefined (addressable and fully undefined)
196 // 10: defined (addressable and fully defined)
197 // 11: partdefined (addressable and partially defined)
198 //
199 // In the "partdefined" case, we use a secondary table to store the V bits.
200 // Each entry in the secondary-V-bits table maps a byte address to its 8 V
201 // bits.
202 //
203 // We store the compressed V+A bits in 8-bit chunks, ie. the V+A bits for
204 // four bytes (32 bits) of memory are in each chunk. Hence the name
205 // "vabits8". This lets us get the V+A bits for four bytes at a time
206 // easily (without having to do any shifting and/or masking), and that is a
207 // very common operation. (Note that although each vabits8 chunk
208 // is 8 bits in size, it represents 32 bits of memory.)
209 //
210 // The representation is "inverse" little-endian... each 4 bytes of
211 // memory is represented by a 1 byte value, where:
212 //
213 // - the status of byte (a+0) is held in bits [1..0]
214 // - the status of byte (a+1) is held in bits [3..2]
215 // - the status of byte (a+2) is held in bits [5..4]
216 // - the status of byte (a+3) is held in bits [7..6]
217 //
218 // It's "inverse" because endianness normally describes a mapping from
219 // value bits to memory addresses; in this case the mapping is inverted.
220 // Ie. instead of particular value bits being held in certain addresses, in
221 // this case certain addresses are represented by particular value bits.
222 // See insert_vabits2_into_vabits8() for an example.
223 //
224 // But note that we don't compress the V bits stored in registers; they
225 // need to be explicit to made the shadow operations possible. Therefore
226 // when moving values between registers and memory we need to convert
227 // between the expanded in-register format and the compressed in-memory
228 // format. This isn't so difficult, it just requires careful attention in a
229 // few places.
230
231 // These represent eight bits of memory.
232 #define VA_BITS2_NOACCESS 0x0 // 00b
233 #define VA_BITS2_UNDEFINED 0x1 // 01b
234 #define VA_BITS2_DEFINED 0x2 // 10b
235 #define VA_BITS2_PARTDEFINED 0x3 // 11b
236
237 // These represent 16 bits of memory.
238 #define VA_BITS4_NOACCESS 0x0 // 00_00b
239 #define VA_BITS4_UNDEFINED 0x5 // 01_01b
240 #define VA_BITS4_DEFINED 0xa // 10_10b
241
242 // These represent 32 bits of memory.
243 #define VA_BITS8_NOACCESS 0x00 // 00_00_00_00b
244 #define VA_BITS8_UNDEFINED 0x55 // 01_01_01_01b
245 #define VA_BITS8_DEFINED 0xaa // 10_10_10_10b
246
247 // These represent 64 bits of memory.
248 #define VA_BITS16_NOACCESS 0x0000 // 00_00_00_00b x 2
249 #define VA_BITS16_UNDEFINED 0x5555 // 01_01_01_01b x 2
250 #define VA_BITS16_DEFINED 0xaaaa // 10_10_10_10b x 2
251
252
253 #define SM_CHUNKS 16384
254 #define SM_OFF(aaa) (((aaa) & 0xffff) >> 2)
255 #define SM_OFF_16(aaa) (((aaa) & 0xffff) >> 3)
256
257 // Paranoia: it's critical for performance that the requested inlining
258 // occurs. So try extra hard.
259 #define INLINE inline __attribute__((always_inline))
260
start_of_this_sm(Addr a)261 static INLINE Addr start_of_this_sm ( Addr a ) {
262 return (a & (~SM_MASK));
263 }
is_start_of_sm(Addr a)264 static INLINE Bool is_start_of_sm ( Addr a ) {
265 return (start_of_this_sm(a) == a);
266 }
267
268 typedef
269 struct {
270 UChar vabits8[SM_CHUNKS];
271 }
272 SecMap;
273
274 // 3 distinguished secondary maps, one for no-access, one for
275 // accessible but undefined, and one for accessible and defined.
276 // Distinguished secondaries may never be modified.
277 #define SM_DIST_NOACCESS 0
278 #define SM_DIST_UNDEFINED 1
279 #define SM_DIST_DEFINED 2
280
281 static SecMap sm_distinguished[3];
282
is_distinguished_sm(SecMap * sm)283 static INLINE Bool is_distinguished_sm ( SecMap* sm ) {
284 return sm >= &sm_distinguished[0] && sm <= &sm_distinguished[2];
285 }
286
287 // Forward declaration
288 static void update_SM_counts(SecMap* oldSM, SecMap* newSM);
289
290 /* dist_sm points to one of our three distinguished secondaries. Make
291 a copy of it so that we can write to it.
292 */
copy_for_writing(SecMap * dist_sm)293 static SecMap* copy_for_writing ( SecMap* dist_sm )
294 {
295 SecMap* new_sm;
296 tl_assert(dist_sm == &sm_distinguished[0]
297 || dist_sm == &sm_distinguished[1]
298 || dist_sm == &sm_distinguished[2]);
299
300 new_sm = VG_(am_shadow_alloc)(sizeof(SecMap));
301 if (new_sm == NULL)
302 VG_(out_of_memory_NORETURN)( "memcheck:allocate new SecMap",
303 sizeof(SecMap) );
304 VG_(memcpy)(new_sm, dist_sm, sizeof(SecMap));
305 update_SM_counts(dist_sm, new_sm);
306 return new_sm;
307 }
308
309 /* --------------- Stats --------------- */
310
311 static Int n_issued_SMs = 0;
312 static Int n_deissued_SMs = 0;
313 static Int n_noaccess_SMs = N_PRIMARY_MAP; // start with many noaccess DSMs
314 static Int n_undefined_SMs = 0;
315 static Int n_defined_SMs = 0;
316 static Int n_non_DSM_SMs = 0;
317 static Int max_noaccess_SMs = 0;
318 static Int max_undefined_SMs = 0;
319 static Int max_defined_SMs = 0;
320 static Int max_non_DSM_SMs = 0;
321
322 /* # searches initiated in auxmap_L1, and # base cmps required */
323 static ULong n_auxmap_L1_searches = 0;
324 static ULong n_auxmap_L1_cmps = 0;
325 /* # of searches that missed in auxmap_L1 and therefore had to
326 be handed to auxmap_L2. And the number of nodes inserted. */
327 static ULong n_auxmap_L2_searches = 0;
328 static ULong n_auxmap_L2_nodes = 0;
329
330 static Int n_sanity_cheap = 0;
331 static Int n_sanity_expensive = 0;
332
333 static Int n_secVBit_nodes = 0;
334 static Int max_secVBit_nodes = 0;
335
update_SM_counts(SecMap * oldSM,SecMap * newSM)336 static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
337 {
338 if (oldSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs --;
339 else if (oldSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs--;
340 else if (oldSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs --;
341 else { n_non_DSM_SMs --;
342 n_deissued_SMs ++; }
343
344 if (newSM == &sm_distinguished[SM_DIST_NOACCESS ]) n_noaccess_SMs ++;
345 else if (newSM == &sm_distinguished[SM_DIST_UNDEFINED]) n_undefined_SMs++;
346 else if (newSM == &sm_distinguished[SM_DIST_DEFINED ]) n_defined_SMs ++;
347 else { n_non_DSM_SMs ++;
348 n_issued_SMs ++; }
349
350 if (n_noaccess_SMs > max_noaccess_SMs ) max_noaccess_SMs = n_noaccess_SMs;
351 if (n_undefined_SMs > max_undefined_SMs) max_undefined_SMs = n_undefined_SMs;
352 if (n_defined_SMs > max_defined_SMs ) max_defined_SMs = n_defined_SMs;
353 if (n_non_DSM_SMs > max_non_DSM_SMs ) max_non_DSM_SMs = n_non_DSM_SMs;
354 }
355
356 /* --------------- Primary maps --------------- */
357
358 /* The main primary map. This covers some initial part of the address
359 space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is
360 handled using the auxiliary primary map.
361 */
362 static SecMap* primary_map[N_PRIMARY_MAP];
363
364
365 /* An entry in the auxiliary primary map. base must be a 64k-aligned
366 value, and sm points at the relevant secondary map. As with the
367 main primary map, the secondary may be either a real secondary, or
368 one of the three distinguished secondaries. DO NOT CHANGE THIS
369 LAYOUT: the first word has to be the key for OSet fast lookups.
370 */
371 typedef
372 struct {
373 Addr base;
374 SecMap* sm;
375 }
376 AuxMapEnt;
377
378 /* Tunable parameter: How big is the L1 queue? */
379 #define N_AUXMAP_L1 24
380
381 /* Tunable parameter: How far along the L1 queue to insert
382 entries resulting from L2 lookups? */
383 #define AUXMAP_L1_INSERT_IX 12
384
385 static struct {
386 Addr base;
387 AuxMapEnt* ent; // pointer to the matching auxmap_L2 node
388 }
389 auxmap_L1[N_AUXMAP_L1];
390
391 static OSet* auxmap_L2 = NULL;
392
init_auxmap_L1_L2(void)393 static void init_auxmap_L1_L2 ( void )
394 {
395 Int i;
396 for (i = 0; i < N_AUXMAP_L1; i++) {
397 auxmap_L1[i].base = 0;
398 auxmap_L1[i].ent = NULL;
399 }
400
401 tl_assert(0 == offsetof(AuxMapEnt,base));
402 tl_assert(sizeof(Addr) == sizeof(void*));
403 auxmap_L2 = VG_(OSetGen_Create)( /*keyOff*/ offsetof(AuxMapEnt,base),
404 /*fastCmp*/ NULL,
405 VG_(malloc), "mc.iaLL.1", VG_(free) );
406 }
407
408 /* Check representation invariants; if OK return NULL; else a
409 descriptive bit of text. Also return the number of
410 non-distinguished secondary maps referred to from the auxiliary
411 primary maps. */
412
check_auxmap_L1_L2_sanity(Word * n_secmaps_found)413 static HChar* check_auxmap_L1_L2_sanity ( Word* n_secmaps_found )
414 {
415 Word i, j;
416 /* On a 32-bit platform, the L2 and L1 tables should
417 both remain empty forever.
418
419 On a 64-bit platform:
420 In the L2 table:
421 all .base & 0xFFFF == 0
422 all .base > MAX_PRIMARY_ADDRESS
423 In the L1 table:
424 all .base & 0xFFFF == 0
425 all (.base > MAX_PRIMARY_ADDRESS
426 .base & 0xFFFF == 0
427 and .ent points to an AuxMapEnt with the same .base)
428 or
429 (.base == 0 and .ent == NULL)
430 */
431 *n_secmaps_found = 0;
432 if (sizeof(void*) == 4) {
433 /* 32-bit platform */
434 if (VG_(OSetGen_Size)(auxmap_L2) != 0)
435 return "32-bit: auxmap_L2 is non-empty";
436 for (i = 0; i < N_AUXMAP_L1; i++)
437 if (auxmap_L1[i].base != 0 || auxmap_L1[i].ent != NULL)
438 return "32-bit: auxmap_L1 is non-empty";
439 } else {
440 /* 64-bit platform */
441 UWord elems_seen = 0;
442 AuxMapEnt *elem, *res;
443 AuxMapEnt key;
444 /* L2 table */
445 VG_(OSetGen_ResetIter)(auxmap_L2);
446 while ( (elem = VG_(OSetGen_Next)(auxmap_L2)) ) {
447 elems_seen++;
448 if (0 != (elem->base & (Addr)0xFFFF))
449 return "64-bit: nonzero .base & 0xFFFF in auxmap_L2";
450 if (elem->base <= MAX_PRIMARY_ADDRESS)
451 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L2";
452 if (elem->sm == NULL)
453 return "64-bit: .sm in _L2 is NULL";
454 if (!is_distinguished_sm(elem->sm))
455 (*n_secmaps_found)++;
456 }
457 if (elems_seen != n_auxmap_L2_nodes)
458 return "64-bit: disagreement on number of elems in _L2";
459 /* Check L1-L2 correspondence */
460 for (i = 0; i < N_AUXMAP_L1; i++) {
461 if (auxmap_L1[i].base == 0 && auxmap_L1[i].ent == NULL)
462 continue;
463 if (0 != (auxmap_L1[i].base & (Addr)0xFFFF))
464 return "64-bit: nonzero .base & 0xFFFF in auxmap_L1";
465 if (auxmap_L1[i].base <= MAX_PRIMARY_ADDRESS)
466 return "64-bit: .base <= MAX_PRIMARY_ADDRESS in auxmap_L1";
467 if (auxmap_L1[i].ent == NULL)
468 return "64-bit: .ent is NULL in auxmap_L1";
469 if (auxmap_L1[i].ent->base != auxmap_L1[i].base)
470 return "64-bit: _L1 and _L2 bases are inconsistent";
471 /* Look it up in auxmap_L2. */
472 key.base = auxmap_L1[i].base;
473 key.sm = 0;
474 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
475 if (res == NULL)
476 return "64-bit: _L1 .base not found in _L2";
477 if (res != auxmap_L1[i].ent)
478 return "64-bit: _L1 .ent disagrees with _L2 entry";
479 }
480 /* Check L1 contains no duplicates */
481 for (i = 0; i < N_AUXMAP_L1; i++) {
482 if (auxmap_L1[i].base == 0)
483 continue;
484 for (j = i+1; j < N_AUXMAP_L1; j++) {
485 if (auxmap_L1[j].base == 0)
486 continue;
487 if (auxmap_L1[j].base == auxmap_L1[i].base)
488 return "64-bit: duplicate _L1 .base entries";
489 }
490 }
491 }
492 return NULL; /* ok */
493 }
494
insert_into_auxmap_L1_at(Word rank,AuxMapEnt * ent)495 static void insert_into_auxmap_L1_at ( Word rank, AuxMapEnt* ent )
496 {
497 Word i;
498 tl_assert(ent);
499 tl_assert(rank >= 0 && rank < N_AUXMAP_L1);
500 for (i = N_AUXMAP_L1-1; i > rank; i--)
501 auxmap_L1[i] = auxmap_L1[i-1];
502 auxmap_L1[rank].base = ent->base;
503 auxmap_L1[rank].ent = ent;
504 }
505
maybe_find_in_auxmap(Addr a)506 static INLINE AuxMapEnt* maybe_find_in_auxmap ( Addr a )
507 {
508 AuxMapEnt key;
509 AuxMapEnt* res;
510 Word i;
511
512 tl_assert(a > MAX_PRIMARY_ADDRESS);
513 a &= ~(Addr)0xFFFF;
514
515 /* First search the front-cache, which is a self-organising
516 list containing the most popular entries. */
517
518 if (LIKELY(auxmap_L1[0].base == a))
519 return auxmap_L1[0].ent;
520 if (LIKELY(auxmap_L1[1].base == a)) {
521 Addr t_base = auxmap_L1[0].base;
522 AuxMapEnt* t_ent = auxmap_L1[0].ent;
523 auxmap_L1[0].base = auxmap_L1[1].base;
524 auxmap_L1[0].ent = auxmap_L1[1].ent;
525 auxmap_L1[1].base = t_base;
526 auxmap_L1[1].ent = t_ent;
527 return auxmap_L1[0].ent;
528 }
529
530 n_auxmap_L1_searches++;
531
532 for (i = 0; i < N_AUXMAP_L1; i++) {
533 if (auxmap_L1[i].base == a) {
534 break;
535 }
536 }
537 tl_assert(i >= 0 && i <= N_AUXMAP_L1);
538
539 n_auxmap_L1_cmps += (ULong)(i+1);
540
541 if (i < N_AUXMAP_L1) {
542 if (i > 0) {
543 Addr t_base = auxmap_L1[i-1].base;
544 AuxMapEnt* t_ent = auxmap_L1[i-1].ent;
545 auxmap_L1[i-1].base = auxmap_L1[i-0].base;
546 auxmap_L1[i-1].ent = auxmap_L1[i-0].ent;
547 auxmap_L1[i-0].base = t_base;
548 auxmap_L1[i-0].ent = t_ent;
549 i--;
550 }
551 return auxmap_L1[i].ent;
552 }
553
554 n_auxmap_L2_searches++;
555
556 /* First see if we already have it. */
557 key.base = a;
558 key.sm = 0;
559
560 res = VG_(OSetGen_Lookup)(auxmap_L2, &key);
561 if (res)
562 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, res );
563 return res;
564 }
565
find_or_alloc_in_auxmap(Addr a)566 static AuxMapEnt* find_or_alloc_in_auxmap ( Addr a )
567 {
568 AuxMapEnt *nyu, *res;
569
570 /* First see if we already have it. */
571 res = maybe_find_in_auxmap( a );
572 if (LIKELY(res))
573 return res;
574
575 /* Ok, there's no entry in the secondary map, so we'll have
576 to allocate one. */
577 a &= ~(Addr)0xFFFF;
578
579 nyu = (AuxMapEnt*) VG_(OSetGen_AllocNode)( auxmap_L2, sizeof(AuxMapEnt) );
580 tl_assert(nyu);
581 nyu->base = a;
582 nyu->sm = &sm_distinguished[SM_DIST_NOACCESS];
583 VG_(OSetGen_Insert)( auxmap_L2, nyu );
584 insert_into_auxmap_L1_at( AUXMAP_L1_INSERT_IX, nyu );
585 n_auxmap_L2_nodes++;
586 return nyu;
587 }
588
589 /* --------------- SecMap fundamentals --------------- */
590
591 // In all these, 'low' means it's definitely in the main primary map,
592 // 'high' means it's definitely in the auxiliary table.
593
get_secmap_low_ptr(Addr a)594 static INLINE SecMap** get_secmap_low_ptr ( Addr a )
595 {
596 UWord pm_off = a >> 16;
597 # if VG_DEBUG_MEMORY >= 1
598 tl_assert(pm_off < N_PRIMARY_MAP);
599 # endif
600 return &primary_map[ pm_off ];
601 }
602
get_secmap_high_ptr(Addr a)603 static INLINE SecMap** get_secmap_high_ptr ( Addr a )
604 {
605 AuxMapEnt* am = find_or_alloc_in_auxmap(a);
606 return &am->sm;
607 }
608
get_secmap_ptr(Addr a)609 static SecMap** get_secmap_ptr ( Addr a )
610 {
611 return ( a <= MAX_PRIMARY_ADDRESS
612 ? get_secmap_low_ptr(a)
613 : get_secmap_high_ptr(a));
614 }
615
get_secmap_for_reading_low(Addr a)616 static INLINE SecMap* get_secmap_for_reading_low ( Addr a )
617 {
618 return *get_secmap_low_ptr(a);
619 }
620
get_secmap_for_reading_high(Addr a)621 static INLINE SecMap* get_secmap_for_reading_high ( Addr a )
622 {
623 return *get_secmap_high_ptr(a);
624 }
625
get_secmap_for_writing_low(Addr a)626 static INLINE SecMap* get_secmap_for_writing_low(Addr a)
627 {
628 SecMap** p = get_secmap_low_ptr(a);
629 if (UNLIKELY(is_distinguished_sm(*p)))
630 *p = copy_for_writing(*p);
631 return *p;
632 }
633
get_secmap_for_writing_high(Addr a)634 static INLINE SecMap* get_secmap_for_writing_high ( Addr a )
635 {
636 SecMap** p = get_secmap_high_ptr(a);
637 if (UNLIKELY(is_distinguished_sm(*p)))
638 *p = copy_for_writing(*p);
639 return *p;
640 }
641
642 /* Produce the secmap for 'a', either from the primary map or by
643 ensuring there is an entry for it in the aux primary map. The
644 secmap may be a distinguished one as the caller will only want to
645 be able to read it.
646 */
get_secmap_for_reading(Addr a)647 static INLINE SecMap* get_secmap_for_reading ( Addr a )
648 {
649 return ( a <= MAX_PRIMARY_ADDRESS
650 ? get_secmap_for_reading_low (a)
651 : get_secmap_for_reading_high(a) );
652 }
653
654 /* Produce the secmap for 'a', either from the primary map or by
655 ensuring there is an entry for it in the aux primary map. The
656 secmap may not be a distinguished one, since the caller will want
657 to be able to write it. If it is a distinguished secondary, make a
658 writable copy of it, install it, and return the copy instead. (COW
659 semantics).
660 */
get_secmap_for_writing(Addr a)661 static SecMap* get_secmap_for_writing ( Addr a )
662 {
663 return ( a <= MAX_PRIMARY_ADDRESS
664 ? get_secmap_for_writing_low (a)
665 : get_secmap_for_writing_high(a) );
666 }
667
668 /* If 'a' has a SecMap, produce it. Else produce NULL. But don't
669 allocate one if one doesn't already exist. This is used by the
670 leak checker.
671 */
maybe_get_secmap_for(Addr a)672 static SecMap* maybe_get_secmap_for ( Addr a )
673 {
674 if (a <= MAX_PRIMARY_ADDRESS) {
675 return get_secmap_for_reading_low(a);
676 } else {
677 AuxMapEnt* am = maybe_find_in_auxmap(a);
678 return am ? am->sm : NULL;
679 }
680 }
681
682 /* --------------- Fundamental functions --------------- */
683
684 static INLINE
insert_vabits2_into_vabits8(Addr a,UChar vabits2,UChar * vabits8)685 void insert_vabits2_into_vabits8 ( Addr a, UChar vabits2, UChar* vabits8 )
686 {
687 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
688 *vabits8 &= ~(0x3 << shift); // mask out the two old bits
689 *vabits8 |= (vabits2 << shift); // mask in the two new bits
690 }
691
692 static INLINE
insert_vabits4_into_vabits8(Addr a,UChar vabits4,UChar * vabits8)693 void insert_vabits4_into_vabits8 ( Addr a, UChar vabits4, UChar* vabits8 )
694 {
695 UInt shift;
696 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
697 shift = (a & 2) << 1; // shift by 0 or 4
698 *vabits8 &= ~(0xf << shift); // mask out the four old bits
699 *vabits8 |= (vabits4 << shift); // mask in the four new bits
700 }
701
702 static INLINE
extract_vabits2_from_vabits8(Addr a,UChar vabits8)703 UChar extract_vabits2_from_vabits8 ( Addr a, UChar vabits8 )
704 {
705 UInt shift = (a & 3) << 1; // shift by 0, 2, 4, or 6
706 vabits8 >>= shift; // shift the two bits to the bottom
707 return 0x3 & vabits8; // mask out the rest
708 }
709
710 static INLINE
extract_vabits4_from_vabits8(Addr a,UChar vabits8)711 UChar extract_vabits4_from_vabits8 ( Addr a, UChar vabits8 )
712 {
713 UInt shift;
714 tl_assert(VG_IS_2_ALIGNED(a)); // Must be 2-aligned
715 shift = (a & 2) << 1; // shift by 0 or 4
716 vabits8 >>= shift; // shift the four bits to the bottom
717 return 0xf & vabits8; // mask out the rest
718 }
719
720 // Note that these four are only used in slow cases. The fast cases do
721 // clever things like combine the auxmap check (in
722 // get_secmap_{read,writ}able) with alignment checks.
723
724 // *** WARNING! ***
725 // Any time this function is called, if it is possible that vabits2
726 // is equal to VA_BITS2_PARTDEFINED, then the corresponding entry in the
727 // sec-V-bits table must also be set!
728 static INLINE
set_vabits2(Addr a,UChar vabits2)729 void set_vabits2 ( Addr a, UChar vabits2 )
730 {
731 SecMap* sm = get_secmap_for_writing(a);
732 UWord sm_off = SM_OFF(a);
733 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
734 }
735
736 static INLINE
get_vabits2(Addr a)737 UChar get_vabits2 ( Addr a )
738 {
739 SecMap* sm = get_secmap_for_reading(a);
740 UWord sm_off = SM_OFF(a);
741 UChar vabits8 = sm->vabits8[sm_off];
742 return extract_vabits2_from_vabits8(a, vabits8);
743 }
744
745 // *** WARNING! ***
746 // Any time this function is called, if it is possible that any of the
747 // 4 2-bit fields in vabits8 are equal to VA_BITS2_PARTDEFINED, then the
748 // corresponding entry(s) in the sec-V-bits table must also be set!
749 static INLINE
get_vabits8_for_aligned_word32(Addr a)750 UChar get_vabits8_for_aligned_word32 ( Addr a )
751 {
752 SecMap* sm = get_secmap_for_reading(a);
753 UWord sm_off = SM_OFF(a);
754 UChar vabits8 = sm->vabits8[sm_off];
755 return vabits8;
756 }
757
758 static INLINE
set_vabits8_for_aligned_word32(Addr a,UChar vabits8)759 void set_vabits8_for_aligned_word32 ( Addr a, UChar vabits8 )
760 {
761 SecMap* sm = get_secmap_for_writing(a);
762 UWord sm_off = SM_OFF(a);
763 sm->vabits8[sm_off] = vabits8;
764 }
765
766
767 // Forward declarations
768 static UWord get_sec_vbits8(Addr a);
769 static void set_sec_vbits8(Addr a, UWord vbits8);
770
771 // Returns False if there was an addressability error.
772 static INLINE
set_vbits8(Addr a,UChar vbits8)773 Bool set_vbits8 ( Addr a, UChar vbits8 )
774 {
775 Bool ok = True;
776 UChar vabits2 = get_vabits2(a);
777 if ( VA_BITS2_NOACCESS != vabits2 ) {
778 // Addressable. Convert in-register format to in-memory format.
779 // Also remove any existing sec V bit entry for the byte if no
780 // longer necessary.
781 if ( V_BITS8_DEFINED == vbits8 ) { vabits2 = VA_BITS2_DEFINED; }
782 else if ( V_BITS8_UNDEFINED == vbits8 ) { vabits2 = VA_BITS2_UNDEFINED; }
783 else { vabits2 = VA_BITS2_PARTDEFINED;
784 set_sec_vbits8(a, vbits8); }
785 set_vabits2(a, vabits2);
786
787 } else {
788 // Unaddressable! Do nothing -- when writing to unaddressable
789 // memory it acts as a black hole, and the V bits can never be seen
790 // again. So we don't have to write them at all.
791 ok = False;
792 }
793 return ok;
794 }
795
796 // Returns False if there was an addressability error. In that case, we put
797 // all defined bits into vbits8.
798 static INLINE
get_vbits8(Addr a,UChar * vbits8)799 Bool get_vbits8 ( Addr a, UChar* vbits8 )
800 {
801 Bool ok = True;
802 UChar vabits2 = get_vabits2(a);
803
804 // Convert the in-memory format to in-register format.
805 if ( VA_BITS2_DEFINED == vabits2 ) { *vbits8 = V_BITS8_DEFINED; }
806 else if ( VA_BITS2_UNDEFINED == vabits2 ) { *vbits8 = V_BITS8_UNDEFINED; }
807 else if ( VA_BITS2_NOACCESS == vabits2 ) {
808 *vbits8 = V_BITS8_DEFINED; // Make V bits defined!
809 ok = False;
810 } else {
811 tl_assert( VA_BITS2_PARTDEFINED == vabits2 );
812 *vbits8 = get_sec_vbits8(a);
813 }
814 return ok;
815 }
816
817
818 /* --------------- Secondary V bit table ------------ */
819
820 // This table holds the full V bit pattern for partially-defined bytes
821 // (PDBs) that are represented by VA_BITS2_PARTDEFINED in the main shadow
822 // memory.
823 //
824 // Note: the nodes in this table can become stale. Eg. if you write a PDB,
825 // then overwrite the same address with a fully defined byte, the sec-V-bit
826 // node will not necessarily be removed. This is because checking for
827 // whether removal is necessary would slow down the fast paths.
828 //
829 // To avoid the stale nodes building up too much, we periodically (once the
830 // table reaches a certain size) garbage collect (GC) the table by
831 // traversing it and evicting any nodes not having PDB.
832 // If more than a certain proportion of nodes survived, we increase the
833 // table size so that GCs occur less often.
834 //
835 // This policy is designed to avoid bad table bloat in the worst case where
836 // a program creates huge numbers of stale PDBs -- we would get this bloat
837 // if we had no GC -- while handling well the case where a node becomes
838 // stale but shortly afterwards is rewritten with a PDB and so becomes
839 // non-stale again (which happens quite often, eg. in perf/bz2). If we just
840 // remove all stale nodes as soon as possible, we just end up re-adding a
841 // lot of them in later again. The "sufficiently stale" approach avoids
842 // this. (If a program has many live PDBs, performance will just suck,
843 // there's no way around that.)
844 //
845 // Further comments, JRS 14 Feb 2012. It turns out that the policy of
846 // holding on to stale entries for 2 GCs before discarding them can lead
847 // to massive space leaks. So we're changing to an arrangement where
848 // lines are evicted as soon as they are observed to be stale during a
849 // GC. This also has a side benefit of allowing the sufficiently_stale
850 // field to be removed from the SecVBitNode struct, reducing its size by
851 // 8 bytes, which is a substantial space saving considering that the
852 // struct was previously 32 or so bytes, on a 64 bit target.
853 //
854 // In order to try and mitigate the problem that the "sufficiently stale"
855 // heuristic was designed to avoid, the table size is allowed to drift
856 // up ("DRIFTUP") slowly to 80000, even if the residency is low. This
857 // means that nodes will exist in the table longer on average, and hopefully
858 // will be deleted and re-added less frequently.
859 //
860 // The previous scaling up mechanism (now called STEPUP) is retained:
861 // if residency exceeds 50%, the table is scaled up, although by a
862 // factor sqrt(2) rather than 2 as before. This effectively doubles the
863 // frequency of GCs when there are many PDBs at reduces the tendency of
864 // stale PDBs to reside for long periods in the table.
865
866 static OSet* secVBitTable;
867
868 // Stats
869 static ULong sec_vbits_new_nodes = 0;
870 static ULong sec_vbits_updates = 0;
871
872 // This must be a power of two; this is checked in mc_pre_clo_init().
873 // The size chosen here is a trade-off: if the nodes are bigger (ie. cover
874 // a larger address range) they take more space but we can get multiple
875 // partially-defined bytes in one if they are close to each other, reducing
876 // the number of total nodes. In practice sometimes they are clustered (eg.
877 // perf/bz2 repeatedly writes then reads more than 20,000 in a contiguous
878 // row), but often not. So we choose something intermediate.
879 #define BYTES_PER_SEC_VBIT_NODE 16
880
881 // We make the table bigger by a factor of STEPUP_GROWTH_FACTOR if
882 // more than this many nodes survive a GC.
883 #define STEPUP_SURVIVOR_PROPORTION 0.5
884 #define STEPUP_GROWTH_FACTOR 1.414213562
885
886 // If the above heuristic doesn't apply, then we may make the table
887 // slightly bigger, by a factor of DRIFTUP_GROWTH_FACTOR, if more than
888 // this many nodes survive a GC, _and_ the total table size does
889 // not exceed a fixed limit. The numbers are somewhat arbitrary, but
890 // work tolerably well on long Firefox runs. The scaleup ratio of 1.5%
891 // effectively although gradually reduces residency and increases time
892 // between GCs for programs with small numbers of PDBs. The 80000 limit
893 // effectively limits the table size to around 2MB for programs with
894 // small numbers of PDBs, whilst giving a reasonably long lifetime to
895 // entries, to try and reduce the costs resulting from deleting and
896 // re-adding of entries.
897 #define DRIFTUP_SURVIVOR_PROPORTION 0.15
898 #define DRIFTUP_GROWTH_FACTOR 1.015
899 #define DRIFTUP_MAX_SIZE 80000
900
901 // We GC the table when it gets this many nodes in it, ie. it's effectively
902 // the table size. It can change.
903 static Int secVBitLimit = 1000;
904
905 // The number of GCs done, used to age sec-V-bit nodes for eviction.
906 // Because it's unsigned, wrapping doesn't matter -- the right answer will
907 // come out anyway.
908 static UInt GCs_done = 0;
909
910 typedef
911 struct {
912 Addr a;
913 UChar vbits8[BYTES_PER_SEC_VBIT_NODE];
914 }
915 SecVBitNode;
916
createSecVBitTable(void)917 static OSet* createSecVBitTable(void)
918 {
919 OSet* newSecVBitTable;
920 newSecVBitTable = VG_(OSetGen_Create_With_Pool)
921 ( offsetof(SecVBitNode, a),
922 NULL, // use fast comparisons
923 VG_(malloc), "mc.cSVT.1 (sec VBit table)",
924 VG_(free),
925 1000,
926 sizeof(SecVBitNode));
927 return newSecVBitTable;
928 }
929
gcSecVBitTable(void)930 static void gcSecVBitTable(void)
931 {
932 OSet* secVBitTable2;
933 SecVBitNode* n;
934 Int i, n_nodes = 0, n_survivors = 0;
935
936 GCs_done++;
937
938 // Create the new table.
939 secVBitTable2 = createSecVBitTable();
940
941 // Traverse the table, moving fresh nodes into the new table.
942 VG_(OSetGen_ResetIter)(secVBitTable);
943 while ( (n = VG_(OSetGen_Next)(secVBitTable)) ) {
944 // Keep node if any of its bytes are non-stale. Using
945 // get_vabits2() for the lookup is not very efficient, but I don't
946 // think it matters.
947 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
948 if (VA_BITS2_PARTDEFINED == get_vabits2(n->a + i)) {
949 // Found a non-stale byte, so keep =>
950 // Insert a copy of the node into the new table.
951 SecVBitNode* n2 =
952 VG_(OSetGen_AllocNode)(secVBitTable2, sizeof(SecVBitNode));
953 *n2 = *n;
954 VG_(OSetGen_Insert)(secVBitTable2, n2);
955 break;
956 }
957 }
958 }
959
960 // Get the before and after sizes.
961 n_nodes = VG_(OSetGen_Size)(secVBitTable);
962 n_survivors = VG_(OSetGen_Size)(secVBitTable2);
963
964 // Destroy the old table, and put the new one in its place.
965 VG_(OSetGen_Destroy)(secVBitTable);
966 secVBitTable = secVBitTable2;
967
968 if (VG_(clo_verbosity) > 1) {
969 Char percbuf[7];
970 VG_(percentify)(n_survivors, n_nodes, 1, 6, percbuf);
971 VG_(message)(Vg_DebugMsg, "memcheck GC: %d nodes, %d survivors (%s)\n",
972 n_nodes, n_survivors, percbuf);
973 }
974
975 // Increase table size if necessary.
976 if ((Double)n_survivors
977 > ((Double)secVBitLimit * STEPUP_SURVIVOR_PROPORTION)) {
978 secVBitLimit = (Int)((Double)secVBitLimit * (Double)STEPUP_GROWTH_FACTOR);
979 if (VG_(clo_verbosity) > 1)
980 VG_(message)(Vg_DebugMsg,
981 "memcheck GC: %d new table size (stepup)\n",
982 secVBitLimit);
983 }
984 else
985 if (secVBitLimit < DRIFTUP_MAX_SIZE
986 && (Double)n_survivors
987 > ((Double)secVBitLimit * DRIFTUP_SURVIVOR_PROPORTION)) {
988 secVBitLimit = (Int)((Double)secVBitLimit * (Double)DRIFTUP_GROWTH_FACTOR);
989 if (VG_(clo_verbosity) > 1)
990 VG_(message)(Vg_DebugMsg,
991 "memcheck GC: %d new table size (driftup)\n",
992 secVBitLimit);
993 }
994 }
995
get_sec_vbits8(Addr a)996 static UWord get_sec_vbits8(Addr a)
997 {
998 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
999 Int amod = a % BYTES_PER_SEC_VBIT_NODE;
1000 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1001 UChar vbits8;
1002 tl_assert2(n, "get_sec_vbits8: no node for address %p (%p)\n", aAligned, a);
1003 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1004 // make it to the secondary V bits table.
1005 vbits8 = n->vbits8[amod];
1006 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1007 return vbits8;
1008 }
1009
set_sec_vbits8(Addr a,UWord vbits8)1010 static void set_sec_vbits8(Addr a, UWord vbits8)
1011 {
1012 Addr aAligned = VG_ROUNDDN(a, BYTES_PER_SEC_VBIT_NODE);
1013 Int i, amod = a % BYTES_PER_SEC_VBIT_NODE;
1014 SecVBitNode* n = VG_(OSetGen_Lookup)(secVBitTable, &aAligned);
1015 // Shouldn't be fully defined or fully undefined -- those cases shouldn't
1016 // make it to the secondary V bits table.
1017 tl_assert(V_BITS8_DEFINED != vbits8 && V_BITS8_UNDEFINED != vbits8);
1018 if (n) {
1019 n->vbits8[amod] = vbits8; // update
1020 sec_vbits_updates++;
1021 } else {
1022 // Do a table GC if necessary. Nb: do this before creating and
1023 // inserting the new node, to avoid erroneously GC'ing the new node.
1024 if (secVBitLimit == VG_(OSetGen_Size)(secVBitTable)) {
1025 gcSecVBitTable();
1026 }
1027
1028 // New node: assign the specific byte, make the rest invalid (they
1029 // should never be read as-is, but be cautious).
1030 n = VG_(OSetGen_AllocNode)(secVBitTable, sizeof(SecVBitNode));
1031 n->a = aAligned;
1032 for (i = 0; i < BYTES_PER_SEC_VBIT_NODE; i++) {
1033 n->vbits8[i] = V_BITS8_UNDEFINED;
1034 }
1035 n->vbits8[amod] = vbits8;
1036
1037 // Insert the new node.
1038 VG_(OSetGen_Insert)(secVBitTable, n);
1039 sec_vbits_new_nodes++;
1040
1041 n_secVBit_nodes = VG_(OSetGen_Size)(secVBitTable);
1042 if (n_secVBit_nodes > max_secVBit_nodes)
1043 max_secVBit_nodes = n_secVBit_nodes;
1044 }
1045 }
1046
1047 /* --------------- Endianness helpers --------------- */
1048
1049 /* Returns the offset in memory of the byteno-th most significant byte
1050 in a wordszB-sized word, given the specified endianness. */
byte_offset_w(UWord wordszB,Bool bigendian,UWord byteno)1051 static INLINE UWord byte_offset_w ( UWord wordszB, Bool bigendian,
1052 UWord byteno ) {
1053 return bigendian ? (wordszB-1-byteno) : byteno;
1054 }
1055
1056
1057 /* --------------- Ignored address ranges --------------- */
1058
1059 #define M_IGNORE_RANGES 4
1060
1061 typedef
1062 struct {
1063 Int used;
1064 Addr start[M_IGNORE_RANGES];
1065 Addr end[M_IGNORE_RANGES];
1066 }
1067 IgnoreRanges;
1068
1069 static IgnoreRanges ignoreRanges;
1070
MC_(in_ignored_range)1071 INLINE Bool MC_(in_ignored_range) ( Addr a )
1072 {
1073 Int i;
1074 if (LIKELY(ignoreRanges.used == 0))
1075 return False;
1076 for (i = 0; i < ignoreRanges.used; i++) {
1077 if (a >= ignoreRanges.start[i] && a < ignoreRanges.end[i])
1078 return True;
1079 }
1080 return False;
1081 }
1082
1083 /* Parse two Addr separated by a dash, or fail. */
1084
parse_range(UChar ** ppc,Addr * result1,Addr * result2)1085 static Bool parse_range ( UChar** ppc, Addr* result1, Addr* result2 )
1086 {
1087 Bool ok = VG_(parse_Addr) (ppc, result1);
1088 if (!ok)
1089 return False;
1090 if (**ppc != '-')
1091 return False;
1092 (*ppc)++;
1093 ok = VG_(parse_Addr) (ppc, result2);
1094 if (!ok)
1095 return False;
1096 return True;
1097 }
1098
1099 /* Parse a set of ranges separated by commas into 'ignoreRanges', or
1100 fail. */
1101
parse_ignore_ranges(UChar * str0)1102 static Bool parse_ignore_ranges ( UChar* str0 )
1103 {
1104 Addr start, end;
1105 Bool ok;
1106 UChar* str = str0;
1107 UChar** ppc = &str;
1108 ignoreRanges.used = 0;
1109 while (1) {
1110 ok = parse_range(ppc, &start, &end);
1111 if (!ok)
1112 return False;
1113 if (ignoreRanges.used >= M_IGNORE_RANGES)
1114 return False;
1115 ignoreRanges.start[ignoreRanges.used] = start;
1116 ignoreRanges.end[ignoreRanges.used] = end;
1117 ignoreRanges.used++;
1118 if (**ppc == 0)
1119 return True;
1120 if (**ppc != ',')
1121 return False;
1122 (*ppc)++;
1123 }
1124 /*NOTREACHED*/
1125 return False;
1126 }
1127
1128
1129 /* --------------- Load/store slow cases. --------------- */
1130
1131 static
1132 __attribute__((noinline))
mc_LOADVn_slow(Addr a,SizeT nBits,Bool bigendian)1133 ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian )
1134 {
1135 PROF_EVENT(30, "mc_LOADVn_slow");
1136
1137 /* ------------ BEGIN semi-fast cases ------------ */
1138 /* These deal quickly-ish with the common auxiliary primary map
1139 cases on 64-bit platforms. Are merely a speedup hack; can be
1140 omitted without loss of correctness/functionality. Note that in
1141 both cases the "sizeof(void*) == 8" causes these cases to be
1142 folded out by compilers on 32-bit platforms. These are derived
1143 from LOADV64 and LOADV32.
1144 */
1145 if (LIKELY(sizeof(void*) == 8
1146 && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1147 SecMap* sm = get_secmap_for_reading(a);
1148 UWord sm_off16 = SM_OFF_16(a);
1149 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1150 if (LIKELY(vabits16 == VA_BITS16_DEFINED))
1151 return V_BITS64_DEFINED;
1152 if (LIKELY(vabits16 == VA_BITS16_UNDEFINED))
1153 return V_BITS64_UNDEFINED;
1154 /* else fall into the slow case */
1155 }
1156 if (LIKELY(sizeof(void*) == 8
1157 && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1158 SecMap* sm = get_secmap_for_reading(a);
1159 UWord sm_off = SM_OFF(a);
1160 UWord vabits8 = sm->vabits8[sm_off];
1161 if (LIKELY(vabits8 == VA_BITS8_DEFINED))
1162 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
1163 if (LIKELY(vabits8 == VA_BITS8_UNDEFINED))
1164 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
1165 /* else fall into slow case */
1166 }
1167 /* ------------ END semi-fast cases ------------ */
1168
1169 ULong vbits64 = V_BITS64_UNDEFINED; /* result */
1170 ULong pessim64 = V_BITS64_DEFINED; /* only used when p-l-ok=yes */
1171 SSizeT szB = nBits / 8;
1172 SSizeT i; /* Must be signed. */
1173 SizeT n_addrs_bad = 0;
1174 Addr ai;
1175 UChar vbits8;
1176 Bool ok;
1177
1178 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1179
1180 /* Make up a 64-bit result V word, which contains the loaded data
1181 for valid addresses and Defined for invalid addresses. Iterate
1182 over the bytes in the word, from the most significant down to
1183 the least. The vbits to return are calculated into vbits64.
1184 Also compute the pessimising value to be used when
1185 --partial-loads-ok=yes. n_addrs_bad is redundant (the relevant
1186 info can be gleaned from pessim64) but is used as a
1187 cross-check. */
1188 for (i = szB-1; i >= 0; i--) {
1189 PROF_EVENT(31, "mc_LOADVn_slow(loop)");
1190 ai = a + byte_offset_w(szB, bigendian, i);
1191 ok = get_vbits8(ai, &vbits8);
1192 vbits64 <<= 8;
1193 vbits64 |= vbits8;
1194 if (!ok) n_addrs_bad++;
1195 pessim64 <<= 8;
1196 pessim64 |= (ok ? V_BITS8_DEFINED : V_BITS8_UNDEFINED);
1197 }
1198
1199 /* In the common case, all the addresses involved are valid, so we
1200 just return the computed V bits and have done. */
1201 if (LIKELY(n_addrs_bad == 0))
1202 return vbits64;
1203
1204 /* If there's no possibility of getting a partial-loads-ok
1205 exemption, report the error and quit. */
1206 if (!MC_(clo_partial_loads_ok)) {
1207 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1208 return vbits64;
1209 }
1210
1211 /* The partial-loads-ok excemption might apply. Find out if it
1212 does. If so, don't report an addressing error, but do return
1213 Undefined for the bytes that are out of range, so as to avoid
1214 false negatives. If it doesn't apply, just report an addressing
1215 error in the usual way. */
1216
1217 /* Some code steps along byte strings in aligned word-sized chunks
1218 even when there is only a partially defined word at the end (eg,
1219 optimised strlen). This is allowed by the memory model of
1220 modern machines, since an aligned load cannot span two pages and
1221 thus cannot "partially fault". Despite such behaviour being
1222 declared undefined by ANSI C/C++.
1223
1224 Therefore, a load from a partially-addressible place is allowed
1225 if all of the following hold:
1226 - the command-line flag is set [by default, it isn't]
1227 - it's a word-sized, word-aligned load
1228 - at least one of the addresses in the word *is* valid
1229
1230 Since this suppresses the addressing error, we avoid false
1231 negatives by marking bytes undefined when they come from an
1232 invalid address.
1233 */
1234
1235 /* "at least one of the addresses is invalid" */
1236 tl_assert(pessim64 != V_BITS64_DEFINED);
1237
1238 if (szB == VG_WORDSIZE && VG_IS_WORD_ALIGNED(a)
1239 && n_addrs_bad < VG_WORDSIZE) {
1240 /* Exemption applies. Use the previously computed pessimising
1241 value for vbits64 and return the combined result, but don't
1242 flag an addressing error. The pessimising value is Defined
1243 for valid addresses and Undefined for invalid addresses. */
1244 /* for assumption that doing bitwise or implements UifU */
1245 tl_assert(V_BIT_UNDEFINED == 1 && V_BIT_DEFINED == 0);
1246 /* (really need "UifU" here...)
1247 vbits64 UifU= pessim64 (is pessimised by it, iow) */
1248 vbits64 |= pessim64;
1249 return vbits64;
1250 }
1251
1252 /* Exemption doesn't apply. Flag an addressing error in the normal
1253 way. */
1254 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
1255
1256 return vbits64;
1257 }
1258
1259
1260 static
1261 __attribute__((noinline))
mc_STOREVn_slow(Addr a,SizeT nBits,ULong vbytes,Bool bigendian)1262 void mc_STOREVn_slow ( Addr a, SizeT nBits, ULong vbytes, Bool bigendian )
1263 {
1264 SizeT szB = nBits / 8;
1265 SizeT i, n_addrs_bad = 0;
1266 UChar vbits8;
1267 Addr ai;
1268 Bool ok;
1269
1270 PROF_EVENT(35, "mc_STOREVn_slow");
1271
1272 /* ------------ BEGIN semi-fast cases ------------ */
1273 /* These deal quickly-ish with the common auxiliary primary map
1274 cases on 64-bit platforms. Are merely a speedup hack; can be
1275 omitted without loss of correctness/functionality. Note that in
1276 both cases the "sizeof(void*) == 8" causes these cases to be
1277 folded out by compilers on 32-bit platforms. These are derived
1278 from STOREV64 and STOREV32.
1279 */
1280 if (LIKELY(sizeof(void*) == 8
1281 && nBits == 64 && VG_IS_8_ALIGNED(a))) {
1282 SecMap* sm = get_secmap_for_reading(a);
1283 UWord sm_off16 = SM_OFF_16(a);
1284 UWord vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
1285 if (LIKELY( !is_distinguished_sm(sm) &&
1286 (VA_BITS16_DEFINED == vabits16 ||
1287 VA_BITS16_UNDEFINED == vabits16) )) {
1288 /* Handle common case quickly: a is suitably aligned, */
1289 /* is mapped, and is addressible. */
1290 // Convert full V-bits in register to compact 2-bit form.
1291 if (LIKELY(V_BITS64_DEFINED == vbytes)) {
1292 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
1293 return;
1294 } else if (V_BITS64_UNDEFINED == vbytes) {
1295 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
1296 return;
1297 }
1298 /* else fall into the slow case */
1299 }
1300 /* else fall into the slow case */
1301 }
1302 if (LIKELY(sizeof(void*) == 8
1303 && nBits == 32 && VG_IS_4_ALIGNED(a))) {
1304 SecMap* sm = get_secmap_for_reading(a);
1305 UWord sm_off = SM_OFF(a);
1306 UWord vabits8 = sm->vabits8[sm_off];
1307 if (LIKELY( !is_distinguished_sm(sm) &&
1308 (VA_BITS8_DEFINED == vabits8 ||
1309 VA_BITS8_UNDEFINED == vabits8) )) {
1310 /* Handle common case quickly: a is suitably aligned, */
1311 /* is mapped, and is addressible. */
1312 // Convert full V-bits in register to compact 2-bit form.
1313 if (LIKELY(V_BITS32_DEFINED == (vbytes & 0xFFFFFFFF))) {
1314 sm->vabits8[sm_off] = VA_BITS8_DEFINED;
1315 return;
1316 } else if (V_BITS32_UNDEFINED == (vbytes & 0xFFFFFFFF)) {
1317 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
1318 return;
1319 }
1320 /* else fall into the slow case */
1321 }
1322 /* else fall into the slow case */
1323 }
1324 /* ------------ END semi-fast cases ------------ */
1325
1326 tl_assert(nBits == 64 || nBits == 32 || nBits == 16 || nBits == 8);
1327
1328 /* Dump vbytes in memory, iterating from least to most significant
1329 byte. At the same time establish addressibility of the location. */
1330 for (i = 0; i < szB; i++) {
1331 PROF_EVENT(36, "mc_STOREVn_slow(loop)");
1332 ai = a + byte_offset_w(szB, bigendian, i);
1333 vbits8 = vbytes & 0xff;
1334 ok = set_vbits8(ai, vbits8);
1335 if (!ok) n_addrs_bad++;
1336 vbytes >>= 8;
1337 }
1338
1339 /* If an address error has happened, report it. */
1340 if (n_addrs_bad > 0)
1341 MC_(record_address_error)( VG_(get_running_tid)(), a, szB, True );
1342 }
1343
1344
1345 /*------------------------------------------------------------*/
1346 /*--- Setting permissions over address ranges. ---*/
1347 /*------------------------------------------------------------*/
1348
set_address_range_perms(Addr a,SizeT lenT,UWord vabits16,UWord dsm_num)1349 static void set_address_range_perms ( Addr a, SizeT lenT, UWord vabits16,
1350 UWord dsm_num )
1351 {
1352 UWord sm_off, sm_off16;
1353 UWord vabits2 = vabits16 & 0x3;
1354 SizeT lenA, lenB, len_to_next_secmap;
1355 Addr aNext;
1356 SecMap* sm;
1357 SecMap** sm_ptr;
1358 SecMap* example_dsm;
1359
1360 PROF_EVENT(150, "set_address_range_perms");
1361
1362 /* Check the V+A bits make sense. */
1363 tl_assert(VA_BITS16_NOACCESS == vabits16 ||
1364 VA_BITS16_UNDEFINED == vabits16 ||
1365 VA_BITS16_DEFINED == vabits16);
1366
1367 // This code should never write PDBs; ensure this. (See comment above
1368 // set_vabits2().)
1369 tl_assert(VA_BITS2_PARTDEFINED != vabits2);
1370
1371 if (lenT == 0)
1372 return;
1373
1374 if (lenT > 256 * 1024 * 1024) {
1375 if (VG_(clo_verbosity) > 0 && !VG_(clo_xml)) {
1376 Char* s = "unknown???";
1377 if (vabits16 == VA_BITS16_NOACCESS ) s = "noaccess";
1378 if (vabits16 == VA_BITS16_UNDEFINED) s = "undefined";
1379 if (vabits16 == VA_BITS16_DEFINED ) s = "defined";
1380 VG_(message)(Vg_UserMsg, "Warning: set address range perms: "
1381 "large range [0x%lx, 0x%lx) (%s)\n",
1382 a, a + lenT, s);
1383 }
1384 }
1385
1386 #ifndef PERF_FAST_SARP
1387 /*------------------ debug-only case ------------------ */
1388 {
1389 // Endianness doesn't matter here because all bytes are being set to
1390 // the same value.
1391 // Nb: We don't have to worry about updating the sec-V-bits table
1392 // after these set_vabits2() calls because this code never writes
1393 // VA_BITS2_PARTDEFINED values.
1394 SizeT i;
1395 for (i = 0; i < lenT; i++) {
1396 set_vabits2(a + i, vabits2);
1397 }
1398 return;
1399 }
1400 #endif
1401
1402 /*------------------ standard handling ------------------ */
1403
1404 /* Get the distinguished secondary that we might want
1405 to use (part of the space-compression scheme). */
1406 example_dsm = &sm_distinguished[dsm_num];
1407
1408 // We have to handle ranges covering various combinations of partial and
1409 // whole sec-maps. Here is how parts 1, 2 and 3 are used in each case.
1410 // Cases marked with a '*' are common.
1411 //
1412 // TYPE PARTS USED
1413 // ---- ----------
1414 // * one partial sec-map (p) 1
1415 // - one whole sec-map (P) 2
1416 //
1417 // * two partial sec-maps (pp) 1,3
1418 // - one partial, one whole sec-map (pP) 1,2
1419 // - one whole, one partial sec-map (Pp) 2,3
1420 // - two whole sec-maps (PP) 2,2
1421 //
1422 // * one partial, one whole, one partial (pPp) 1,2,3
1423 // - one partial, two whole (pPP) 1,2,2
1424 // - two whole, one partial (PPp) 2,2,3
1425 // - three whole (PPP) 2,2,2
1426 //
1427 // * one partial, N-2 whole, one partial (pP...Pp) 1,2...2,3
1428 // - one partial, N-1 whole (pP...PP) 1,2...2,2
1429 // - N-1 whole, one partial (PP...Pp) 2,2...2,3
1430 // - N whole (PP...PP) 2,2...2,3
1431
1432 // Break up total length (lenT) into two parts: length in the first
1433 // sec-map (lenA), and the rest (lenB); lenT == lenA + lenB.
1434 aNext = start_of_this_sm(a) + SM_SIZE;
1435 len_to_next_secmap = aNext - a;
1436 if ( lenT <= len_to_next_secmap ) {
1437 // Range entirely within one sec-map. Covers almost all cases.
1438 PROF_EVENT(151, "set_address_range_perms-single-secmap");
1439 lenA = lenT;
1440 lenB = 0;
1441 } else if (is_start_of_sm(a)) {
1442 // Range spans at least one whole sec-map, and starts at the beginning
1443 // of a sec-map; skip to Part 2.
1444 PROF_EVENT(152, "set_address_range_perms-startof-secmap");
1445 lenA = 0;
1446 lenB = lenT;
1447 goto part2;
1448 } else {
1449 // Range spans two or more sec-maps, first one is partial.
1450 PROF_EVENT(153, "set_address_range_perms-multiple-secmaps");
1451 lenA = len_to_next_secmap;
1452 lenB = lenT - lenA;
1453 }
1454
1455 //------------------------------------------------------------------------
1456 // Part 1: Deal with the first sec_map. Most of the time the range will be
1457 // entirely within a sec_map and this part alone will suffice. Also,
1458 // doing it this way lets us avoid repeatedly testing for the crossing of
1459 // a sec-map boundary within these loops.
1460 //------------------------------------------------------------------------
1461
1462 // If it's distinguished, make it undistinguished if necessary.
1463 sm_ptr = get_secmap_ptr(a);
1464 if (is_distinguished_sm(*sm_ptr)) {
1465 if (*sm_ptr == example_dsm) {
1466 // Sec-map already has the V+A bits that we want, so skip.
1467 PROF_EVENT(154, "set_address_range_perms-dist-sm1-quick");
1468 a = aNext;
1469 lenA = 0;
1470 } else {
1471 PROF_EVENT(155, "set_address_range_perms-dist-sm1");
1472 *sm_ptr = copy_for_writing(*sm_ptr);
1473 }
1474 }
1475 sm = *sm_ptr;
1476
1477 // 1 byte steps
1478 while (True) {
1479 if (VG_IS_8_ALIGNED(a)) break;
1480 if (lenA < 1) break;
1481 PROF_EVENT(156, "set_address_range_perms-loop1a");
1482 sm_off = SM_OFF(a);
1483 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1484 a += 1;
1485 lenA -= 1;
1486 }
1487 // 8-aligned, 8 byte steps
1488 while (True) {
1489 if (lenA < 8) break;
1490 PROF_EVENT(157, "set_address_range_perms-loop8a");
1491 sm_off16 = SM_OFF_16(a);
1492 ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1493 a += 8;
1494 lenA -= 8;
1495 }
1496 // 1 byte steps
1497 while (True) {
1498 if (lenA < 1) break;
1499 PROF_EVENT(158, "set_address_range_perms-loop1b");
1500 sm_off = SM_OFF(a);
1501 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1502 a += 1;
1503 lenA -= 1;
1504 }
1505
1506 // We've finished the first sec-map. Is that it?
1507 if (lenB == 0)
1508 return;
1509
1510 //------------------------------------------------------------------------
1511 // Part 2: Fast-set entire sec-maps at a time.
1512 //------------------------------------------------------------------------
1513 part2:
1514 // 64KB-aligned, 64KB steps.
1515 // Nb: we can reach here with lenB < SM_SIZE
1516 tl_assert(0 == lenA);
1517 while (True) {
1518 if (lenB < SM_SIZE) break;
1519 tl_assert(is_start_of_sm(a));
1520 PROF_EVENT(159, "set_address_range_perms-loop64K");
1521 sm_ptr = get_secmap_ptr(a);
1522 if (!is_distinguished_sm(*sm_ptr)) {
1523 PROF_EVENT(160, "set_address_range_perms-loop64K-free-dist-sm");
1524 // Free the non-distinguished sec-map that we're replacing. This
1525 // case happens moderately often, enough to be worthwhile.
1526 SysRes sres = VG_(am_munmap_valgrind)((Addr)*sm_ptr, sizeof(SecMap));
1527 tl_assert2(! sr_isError(sres), "SecMap valgrind munmap failure\n");
1528 }
1529 update_SM_counts(*sm_ptr, example_dsm);
1530 // Make the sec-map entry point to the example DSM
1531 *sm_ptr = example_dsm;
1532 lenB -= SM_SIZE;
1533 a += SM_SIZE;
1534 }
1535
1536 // We've finished the whole sec-maps. Is that it?
1537 if (lenB == 0)
1538 return;
1539
1540 //------------------------------------------------------------------------
1541 // Part 3: Finish off the final partial sec-map, if necessary.
1542 //------------------------------------------------------------------------
1543
1544 tl_assert(is_start_of_sm(a) && lenB < SM_SIZE);
1545
1546 // If it's distinguished, make it undistinguished if necessary.
1547 sm_ptr = get_secmap_ptr(a);
1548 if (is_distinguished_sm(*sm_ptr)) {
1549 if (*sm_ptr == example_dsm) {
1550 // Sec-map already has the V+A bits that we want, so stop.
1551 PROF_EVENT(161, "set_address_range_perms-dist-sm2-quick");
1552 return;
1553 } else {
1554 PROF_EVENT(162, "set_address_range_perms-dist-sm2");
1555 *sm_ptr = copy_for_writing(*sm_ptr);
1556 }
1557 }
1558 sm = *sm_ptr;
1559
1560 // 8-aligned, 8 byte steps
1561 while (True) {
1562 if (lenB < 8) break;
1563 PROF_EVENT(163, "set_address_range_perms-loop8b");
1564 sm_off16 = SM_OFF_16(a);
1565 ((UShort*)(sm->vabits8))[sm_off16] = vabits16;
1566 a += 8;
1567 lenB -= 8;
1568 }
1569 // 1 byte steps
1570 while (True) {
1571 if (lenB < 1) return;
1572 PROF_EVENT(164, "set_address_range_perms-loop1c");
1573 sm_off = SM_OFF(a);
1574 insert_vabits2_into_vabits8( a, vabits2, &(sm->vabits8[sm_off]) );
1575 a += 1;
1576 lenB -= 1;
1577 }
1578 }
1579
1580
1581 /* --- Set permissions for arbitrary address ranges --- */
1582
MC_(make_mem_noaccess)1583 void MC_(make_mem_noaccess) ( Addr a, SizeT len )
1584 {
1585 PROF_EVENT(40, "MC_(make_mem_noaccess)");
1586 DEBUG("MC_(make_mem_noaccess)(%p, %lu)\n", a, len);
1587 set_address_range_perms ( a, len, VA_BITS16_NOACCESS, SM_DIST_NOACCESS );
1588 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1589 ocache_sarp_Clear_Origins ( a, len );
1590 }
1591
make_mem_undefined(Addr a,SizeT len)1592 static void make_mem_undefined ( Addr a, SizeT len )
1593 {
1594 PROF_EVENT(41, "make_mem_undefined");
1595 DEBUG("make_mem_undefined(%p, %lu)\n", a, len);
1596 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1597 }
1598
MC_(make_mem_undefined_w_otag)1599 void MC_(make_mem_undefined_w_otag) ( Addr a, SizeT len, UInt otag )
1600 {
1601 PROF_EVENT(41, "MC_(make_mem_undefined)");
1602 DEBUG("MC_(make_mem_undefined)(%p, %lu)\n", a, len);
1603 set_address_range_perms ( a, len, VA_BITS16_UNDEFINED, SM_DIST_UNDEFINED );
1604 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1605 ocache_sarp_Set_Origins ( a, len, otag );
1606 }
1607
1608 static
make_mem_undefined_w_tid_and_okind(Addr a,SizeT len,ThreadId tid,UInt okind)1609 void make_mem_undefined_w_tid_and_okind ( Addr a, SizeT len,
1610 ThreadId tid, UInt okind )
1611 {
1612 UInt ecu;
1613 ExeContext* here;
1614 /* VG_(record_ExeContext) checks for validity of tid, and asserts
1615 if it is invalid. So no need to do it here. */
1616 tl_assert(okind <= 3);
1617 here = VG_(record_ExeContext)( tid, 0/*first_ip_delta*/ );
1618 tl_assert(here);
1619 ecu = VG_(get_ECU_from_ExeContext)(here);
1620 tl_assert(VG_(is_plausible_ECU)(ecu));
1621 MC_(make_mem_undefined_w_otag) ( a, len, ecu | okind );
1622 }
1623
1624 static
make_mem_undefined_w_tid(Addr a,SizeT len,ThreadId tid)1625 void make_mem_undefined_w_tid ( Addr a, SizeT len, ThreadId tid ) {
1626 make_mem_undefined_w_tid_and_okind ( a, len, tid, MC_OKIND_UNKNOWN );
1627 }
1628
1629
MC_(make_mem_defined)1630 void MC_(make_mem_defined) ( Addr a, SizeT len )
1631 {
1632 PROF_EVENT(42, "MC_(make_mem_defined)");
1633 DEBUG("MC_(make_mem_defined)(%p, %lu)\n", a, len);
1634 set_address_range_perms ( a, len, VA_BITS16_DEFINED, SM_DIST_DEFINED );
1635 if (UNLIKELY( MC_(clo_mc_level) == 3 ))
1636 ocache_sarp_Clear_Origins ( a, len );
1637 }
1638
1639 /* For each byte in [a,a+len), if the byte is addressable, make it be
1640 defined, but if it isn't addressible, leave it alone. In other
1641 words a version of MC_(make_mem_defined) that doesn't mess with
1642 addressibility. Low-performance implementation. */
make_mem_defined_if_addressable(Addr a,SizeT len)1643 static void make_mem_defined_if_addressable ( Addr a, SizeT len )
1644 {
1645 SizeT i;
1646 UChar vabits2;
1647 DEBUG("make_mem_defined_if_addressable(%p, %llu)\n", a, (ULong)len);
1648 for (i = 0; i < len; i++) {
1649 vabits2 = get_vabits2( a+i );
1650 if (LIKELY(VA_BITS2_NOACCESS != vabits2)) {
1651 set_vabits2(a+i, VA_BITS2_DEFINED);
1652 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1653 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1654 }
1655 }
1656 }
1657 }
1658
1659 /* Similarly (needed for mprotect handling ..) */
make_mem_defined_if_noaccess(Addr a,SizeT len)1660 static void make_mem_defined_if_noaccess ( Addr a, SizeT len )
1661 {
1662 SizeT i;
1663 UChar vabits2;
1664 DEBUG("make_mem_defined_if_noaccess(%p, %llu)\n", a, (ULong)len);
1665 for (i = 0; i < len; i++) {
1666 vabits2 = get_vabits2( a+i );
1667 if (LIKELY(VA_BITS2_NOACCESS == vabits2)) {
1668 set_vabits2(a+i, VA_BITS2_DEFINED);
1669 if (UNLIKELY(MC_(clo_mc_level) >= 3)) {
1670 MC_(helperc_b_store1)( a+i, 0 ); /* clear the origin tag */
1671 }
1672 }
1673 }
1674 }
1675
1676 /* --- Block-copy permissions (needed for implementing realloc() and
1677 sys_mremap). --- */
1678
MC_(copy_address_range_state)1679 void MC_(copy_address_range_state) ( Addr src, Addr dst, SizeT len )
1680 {
1681 SizeT i, j;
1682 UChar vabits2, vabits8;
1683 Bool aligned, nooverlap;
1684
1685 DEBUG("MC_(copy_address_range_state)\n");
1686 PROF_EVENT(50, "MC_(copy_address_range_state)");
1687
1688 if (len == 0 || src == dst)
1689 return;
1690
1691 aligned = VG_IS_4_ALIGNED(src) && VG_IS_4_ALIGNED(dst);
1692 nooverlap = src+len <= dst || dst+len <= src;
1693
1694 if (nooverlap && aligned) {
1695
1696 /* Vectorised fast case, when no overlap and suitably aligned */
1697 /* vector loop */
1698 i = 0;
1699 while (len >= 4) {
1700 vabits8 = get_vabits8_for_aligned_word32( src+i );
1701 set_vabits8_for_aligned_word32( dst+i, vabits8 );
1702 if (LIKELY(VA_BITS8_DEFINED == vabits8
1703 || VA_BITS8_UNDEFINED == vabits8
1704 || VA_BITS8_NOACCESS == vabits8)) {
1705 /* do nothing */
1706 } else {
1707 /* have to copy secondary map info */
1708 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+0 ))
1709 set_sec_vbits8( dst+i+0, get_sec_vbits8( src+i+0 ) );
1710 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+1 ))
1711 set_sec_vbits8( dst+i+1, get_sec_vbits8( src+i+1 ) );
1712 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+2 ))
1713 set_sec_vbits8( dst+i+2, get_sec_vbits8( src+i+2 ) );
1714 if (VA_BITS2_PARTDEFINED == get_vabits2( src+i+3 ))
1715 set_sec_vbits8( dst+i+3, get_sec_vbits8( src+i+3 ) );
1716 }
1717 i += 4;
1718 len -= 4;
1719 }
1720 /* fixup loop */
1721 while (len >= 1) {
1722 vabits2 = get_vabits2( src+i );
1723 set_vabits2( dst+i, vabits2 );
1724 if (VA_BITS2_PARTDEFINED == vabits2) {
1725 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1726 }
1727 i++;
1728 len--;
1729 }
1730
1731 } else {
1732
1733 /* We have to do things the slow way */
1734 if (src < dst) {
1735 for (i = 0, j = len-1; i < len; i++, j--) {
1736 PROF_EVENT(51, "MC_(copy_address_range_state)(loop)");
1737 vabits2 = get_vabits2( src+j );
1738 set_vabits2( dst+j, vabits2 );
1739 if (VA_BITS2_PARTDEFINED == vabits2) {
1740 set_sec_vbits8( dst+j, get_sec_vbits8( src+j ) );
1741 }
1742 }
1743 }
1744
1745 if (src > dst) {
1746 for (i = 0; i < len; i++) {
1747 PROF_EVENT(52, "MC_(copy_address_range_state)(loop)");
1748 vabits2 = get_vabits2( src+i );
1749 set_vabits2( dst+i, vabits2 );
1750 if (VA_BITS2_PARTDEFINED == vabits2) {
1751 set_sec_vbits8( dst+i, get_sec_vbits8( src+i ) );
1752 }
1753 }
1754 }
1755 }
1756
1757 }
1758
1759
1760 /*------------------------------------------------------------*/
1761 /*--- Origin tracking stuff - cache basics ---*/
1762 /*------------------------------------------------------------*/
1763
1764 /* AN OVERVIEW OF THE ORIGIN TRACKING IMPLEMENTATION
1765 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1766
1767 Note that this implementation draws inspiration from the "origin
1768 tracking by value piggybacking" scheme described in "Tracking Bad
1769 Apples: Reporting the Origin of Null and Undefined Value Errors"
1770 (Michael Bond, Nicholas Nethercote, Stephen Kent, Samuel Guyer,
1771 Kathryn McKinley, OOPSLA07, Montreal, Oct 2007) but in fact it is
1772 implemented completely differently.
1773
1774 Origin tags and ECUs -- about the shadow values
1775 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1776
1777 This implementation tracks the defining point of all uninitialised
1778 values using so called "origin tags", which are 32-bit integers,
1779 rather than using the values themselves to encode the origins. The
1780 latter, so-called value piggybacking", is what the OOPSLA07 paper
1781 describes.
1782
1783 Origin tags, as tracked by the machinery below, are 32-bit unsigned
1784 ints (UInts), regardless of the machine's word size. Each tag
1785 comprises an upper 30-bit ECU field and a lower 2-bit
1786 'kind' field. The ECU field is a number given out by m_execontext
1787 and has a 1-1 mapping with ExeContext*s. An ECU can be used
1788 directly as an origin tag (otag), but in fact we want to put
1789 additional information 'kind' field to indicate roughly where the
1790 tag came from. This helps print more understandable error messages
1791 for the user -- it has no other purpose. In summary:
1792
1793 * Both ECUs and origin tags are represented as 32-bit words
1794
1795 * m_execontext and the core-tool interface deal purely in ECUs.
1796 They have no knowledge of origin tags - that is a purely
1797 Memcheck-internal matter.
1798
1799 * all valid ECUs have the lowest 2 bits zero and at least
1800 one of the upper 30 bits nonzero (see VG_(is_plausible_ECU))
1801
1802 * to convert from an ECU to an otag, OR in one of the MC_OKIND_
1803 constants defined in mc_include.h.
1804
1805 * to convert an otag back to an ECU, AND it with ~3
1806
1807 One important fact is that no valid otag is zero. A zero otag is
1808 used by the implementation to indicate "no origin", which could
1809 mean that either the value is defined, or it is undefined but the
1810 implementation somehow managed to lose the origin.
1811
1812 The ECU used for memory created by malloc etc is derived from the
1813 stack trace at the time the malloc etc happens. This means the
1814 mechanism can show the exact allocation point for heap-created
1815 uninitialised values.
1816
1817 In contrast, it is simply too expensive to create a complete
1818 backtrace for each stack allocation. Therefore we merely use a
1819 depth-1 backtrace for stack allocations, which can be done once at
1820 translation time, rather than N times at run time. The result of
1821 this is that, for stack created uninitialised values, Memcheck can
1822 only show the allocating function, and not what called it.
1823 Furthermore, compilers tend to move the stack pointer just once at
1824 the start of the function, to allocate all locals, and so in fact
1825 the stack origin almost always simply points to the opening brace
1826 of the function. Net result is, for stack origins, the mechanism
1827 can tell you in which function the undefined value was created, but
1828 that's all. Users will need to carefully check all locals in the
1829 specified function.
1830
1831 Shadowing registers and memory
1832 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1833
1834 Memory is shadowed using a two level cache structure (ocacheL1 and
1835 ocacheL2). Memory references are first directed to ocacheL1. This
1836 is a traditional 2-way set associative cache with 32-byte lines and
1837 approximate LRU replacement within each set.
1838
1839 A naive implementation would require storing one 32 bit otag for
1840 each byte of memory covered, a 4:1 space overhead. Instead, there
1841 is one otag for every 4 bytes of memory covered, plus a 4-bit mask
1842 that shows which of the 4 bytes have that shadow value and which
1843 have a shadow value of zero (indicating no origin). Hence a lot of
1844 space is saved, but the cost is that only one different origin per
1845 4 bytes of address space can be represented. This is a source of
1846 imprecision, but how much of a problem it really is remains to be
1847 seen.
1848
1849 A cache line that contains all zeroes ("no origins") contains no
1850 useful information, and can be ejected from the L1 cache "for
1851 free", in the sense that a read miss on the L1 causes a line of
1852 zeroes to be installed. However, ejecting a line containing
1853 nonzeroes risks losing origin information permanently. In order to
1854 prevent such lossage, ejected nonzero lines are placed in a
1855 secondary cache (ocacheL2), which is an OSet (AVL tree) of cache
1856 lines. This can grow arbitrarily large, and so should ensure that
1857 Memcheck runs out of memory in preference to losing useful origin
1858 info due to cache size limitations.
1859
1860 Shadowing registers is a bit tricky, because the shadow values are
1861 32 bits, regardless of the size of the register. That gives a
1862 problem for registers smaller than 32 bits. The solution is to
1863 find spaces in the guest state that are unused, and use those to
1864 shadow guest state fragments smaller than 32 bits. For example, on
1865 ppc32/64, each vector register is 16 bytes long. If 4 bytes of the
1866 shadow are allocated for the register's otag, then there are still
1867 12 bytes left over which could be used to shadow 3 other values.
1868
1869 This implies there is some non-obvious mapping from guest state
1870 (start,length) pairs to the relevant shadow offset (for the origin
1871 tags). And it is unfortunately guest-architecture specific. The
1872 mapping is contained in mc_machine.c, which is quite lengthy but
1873 straightforward.
1874
1875 Instrumenting the IR
1876 ~~~~~~~~~~~~~~~~~~~~
1877
1878 Instrumentation is largely straightforward, and done by the
1879 functions schemeE and schemeS in mc_translate.c. These generate
1880 code for handling the origin tags of expressions (E) and statements
1881 (S) respectively. The rather strange names are a reference to the
1882 "compilation schemes" shown in Simon Peyton Jones' book "The
1883 Implementation of Functional Programming Languages" (Prentice Hall,
1884 1987, see
1885 http://research.microsoft.com/~simonpj/papers/slpj-book-1987/index.htm).
1886
1887 schemeS merely arranges to move shadow values around the guest
1888 state to track the incoming IR. schemeE is largely trivial too.
1889 The only significant point is how to compute the otag corresponding
1890 to binary (or ternary, quaternary, etc) operator applications. The
1891 rule is simple: just take whichever value is larger (32-bit
1892 unsigned max). Constants get the special value zero. Hence this
1893 rule always propagates a nonzero (known) otag in preference to a
1894 zero (unknown, or more likely, value-is-defined) tag, as we want.
1895 If two different undefined values are inputs to a binary operator
1896 application, then which is propagated is arbitrary, but that
1897 doesn't matter, since the program is erroneous in using either of
1898 the values, and so there's no point in attempting to propagate
1899 both.
1900
1901 Since constants are abstracted to (otag) zero, much of the
1902 instrumentation code can be folded out without difficulty by the
1903 generic post-instrumentation IR cleanup pass, using these rules:
1904 Max32U(0,x) -> x, Max32U(x,0) -> x, Max32(x,y) where x and y are
1905 constants is evaluated at JIT time. And the resulting dead code
1906 removal. In practice this causes surprisingly few Max32Us to
1907 survive through to backend code generation.
1908
1909 Integration with the V-bits machinery
1910 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1911
1912 This is again largely straightforward. Mostly the otag and V bits
1913 stuff are independent. The only point of interaction is when the V
1914 bits instrumenter creates a call to a helper function to report an
1915 uninitialised value error -- in that case it must first use schemeE
1916 to get hold of the origin tag expression for the value, and pass
1917 that to the helper too.
1918
1919 There is the usual stuff to do with setting address range
1920 permissions. When memory is painted undefined, we must also know
1921 the origin tag to paint with, which involves some tedious plumbing,
1922 particularly to do with the fast case stack handlers. When memory
1923 is painted defined or noaccess then the origin tags must be forced
1924 to zero.
1925
1926 One of the goals of the implementation was to ensure that the
1927 non-origin tracking mode isn't slowed down at all. To do this,
1928 various functions to do with memory permissions setting (again,
1929 mostly pertaining to the stack) are duplicated for the with- and
1930 without-otag case.
1931
1932 Dealing with stack redzones, and the NIA cache
1933 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1934
1935 This is one of the few non-obvious parts of the implementation.
1936
1937 Some ABIs (amd64-ELF, ppc64-ELF, ppc32/64-XCOFF) define a small
1938 reserved area below the stack pointer, that can be used as scratch
1939 space by compiler generated code for functions. In the Memcheck
1940 sources this is referred to as the "stack redzone". The important
1941 thing here is that such redzones are considered volatile across
1942 function calls and returns. So Memcheck takes care to mark them as
1943 undefined for each call and return, on the afflicted platforms.
1944 Past experience shows this is essential in order to get reliable
1945 messages about uninitialised values that come from the stack.
1946
1947 So the question is, when we paint a redzone undefined, what origin
1948 tag should we use for it? Consider a function f() calling g(). If
1949 we paint the redzone using an otag derived from the ExeContext of
1950 the CALL/BL instruction in f, then any errors in g causing it to
1951 use uninitialised values that happen to lie in the redzone, will be
1952 reported as having their origin in f. Which is highly confusing.
1953
1954 The same applies for returns: if, on a return, we paint the redzone
1955 using a origin tag derived from the ExeContext of the RET/BLR
1956 instruction in g, then any later errors in f causing it to use
1957 uninitialised values in the redzone, will be reported as having
1958 their origin in g. Which is just as confusing.
1959
1960 To do it right, in both cases we need to use an origin tag which
1961 pertains to the instruction which dynamically follows the CALL/BL
1962 or RET/BLR. In short, one derived from the NIA - the "next
1963 instruction address".
1964
1965 To make this work, Memcheck's redzone-painting helper,
1966 MC_(helperc_MAKE_STACK_UNINIT), now takes a third argument, the
1967 NIA. It converts the NIA to a 1-element ExeContext, and uses that
1968 ExeContext's ECU as the basis for the otag used to paint the
1969 redzone. The expensive part of this is converting an NIA into an
1970 ECU, since this happens once for every call and every return. So
1971 we use a simple 511-line, 2-way set associative cache
1972 (nia_to_ecu_cache) to cache the mappings, and that knocks most of
1973 the cost out.
1974
1975 Further background comments
1976 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
1977
1978 > Question: why is otag a UInt? Wouldn't a UWord be better? Isn't
1979 > it really just the address of the relevant ExeContext?
1980
1981 Well, it's not the address, but a value which has a 1-1 mapping
1982 with ExeContexts, and is guaranteed not to be zero, since zero
1983 denotes (to memcheck) "unknown origin or defined value". So these
1984 UInts are just numbers starting at 4 and incrementing by 4; each
1985 ExeContext is given a number when it is created. (*** NOTE this
1986 confuses otags and ECUs; see comments above ***).
1987
1988 Making these otags 32-bit regardless of the machine's word size
1989 makes the 64-bit implementation easier (next para). And it doesn't
1990 really limit us in any way, since for the tags to overflow would
1991 require that the program somehow caused 2^30-1 different
1992 ExeContexts to be created, in which case it is probably in deep
1993 trouble. Not to mention V will have soaked up many tens of
1994 gigabytes of memory merely to store them all.
1995
1996 So having 64-bit origins doesn't really buy you anything, and has
1997 the following downsides:
1998
1999 Suppose that instead, an otag is a UWord. This would mean that, on
2000 a 64-bit target,
2001
2002 1. It becomes hard to shadow any element of guest state which is
2003 smaller than 8 bytes. To do so means you'd need to find some
2004 8-byte-sized hole in the guest state which you don't want to
2005 shadow, and use that instead to hold the otag. On ppc64, the
2006 condition code register(s) are split into 20 UChar sized pieces,
2007 all of which need to be tracked (guest_XER_SO .. guest_CR7_0)
2008 and so that would entail finding 160 bytes somewhere else in the
2009 guest state.
2010
2011 Even on x86, I want to track origins for %AH .. %DH (bits 15:8
2012 of %EAX .. %EDX) that are separate from %AL .. %DL (bits 7:0 of
2013 same) and so I had to look for 4 untracked otag-sized areas in
2014 the guest state to make that possible.
2015
2016 The same problem exists of course when origin tags are only 32
2017 bits, but it's less extreme.
2018
2019 2. (More compelling) it doubles the size of the origin shadow
2020 memory. Given that the shadow memory is organised as a fixed
2021 size cache, and that accuracy of tracking is limited by origins
2022 falling out the cache due to space conflicts, this isn't good.
2023
2024 > Another question: is the origin tracking perfect, or are there
2025 > cases where it fails to determine an origin?
2026
2027 It is imperfect for at least for the following reasons, and
2028 probably more:
2029
2030 * Insufficient capacity in the origin cache. When a line is
2031 evicted from the cache it is gone forever, and so subsequent
2032 queries for the line produce zero, indicating no origin
2033 information. Interestingly, a line containing all zeroes can be
2034 evicted "free" from the cache, since it contains no useful
2035 information, so there is scope perhaps for some cleverer cache
2036 management schemes. (*** NOTE, with the introduction of the
2037 second level origin tag cache, ocacheL2, this is no longer a
2038 problem. ***)
2039
2040 * The origin cache only stores one otag per 32-bits of address
2041 space, plus 4 bits indicating which of the 4 bytes has that tag
2042 and which are considered defined. The result is that if two
2043 undefined bytes in the same word are stored in memory, the first
2044 stored byte's origin will be lost and replaced by the origin for
2045 the second byte.
2046
2047 * Nonzero origin tags for defined values. Consider a binary
2048 operator application op(x,y). Suppose y is undefined (and so has
2049 a valid nonzero origin tag), and x is defined, but erroneously
2050 has a nonzero origin tag (defined values should have tag zero).
2051 If the erroneous tag has a numeric value greater than y's tag,
2052 then the rule for propagating origin tags though binary
2053 operations, which is simply to take the unsigned max of the two
2054 tags, will erroneously propagate x's tag rather than y's.
2055
2056 * Some obscure uses of x86/amd64 byte registers can cause lossage
2057 or confusion of origins. %AH .. %DH are treated as different
2058 from, and unrelated to, their parent registers, %EAX .. %EDX.
2059 So some wierd sequences like
2060
2061 movb undefined-value, %AH
2062 movb defined-value, %AL
2063 .. use %AX or %EAX ..
2064
2065 will cause the origin attributed to %AH to be ignored, since %AL,
2066 %AX, %EAX are treated as the same register, and %AH as a
2067 completely separate one.
2068
2069 But having said all that, it actually seems to work fairly well in
2070 practice.
2071 */
2072
2073 static UWord stats_ocacheL1_find = 0;
2074 static UWord stats_ocacheL1_found_at_1 = 0;
2075 static UWord stats_ocacheL1_found_at_N = 0;
2076 static UWord stats_ocacheL1_misses = 0;
2077 static UWord stats_ocacheL1_lossage = 0;
2078 static UWord stats_ocacheL1_movefwds = 0;
2079
2080 static UWord stats__ocacheL2_refs = 0;
2081 static UWord stats__ocacheL2_misses = 0;
2082 static UWord stats__ocacheL2_n_nodes_max = 0;
2083
2084 /* Cache of 32-bit values, one every 32 bits of address space */
2085
2086 #define OC_BITS_PER_LINE 5
2087 #define OC_W32S_PER_LINE (1 << (OC_BITS_PER_LINE - 2))
2088
oc_line_offset(Addr a)2089 static INLINE UWord oc_line_offset ( Addr a ) {
2090 return (a >> 2) & (OC_W32S_PER_LINE - 1);
2091 }
is_valid_oc_tag(Addr tag)2092 static INLINE Bool is_valid_oc_tag ( Addr tag ) {
2093 return 0 == (tag & ((1 << OC_BITS_PER_LINE) - 1));
2094 }
2095
2096 #define OC_LINES_PER_SET 2
2097
2098 #define OC_N_SET_BITS 20
2099 #define OC_N_SETS (1 << OC_N_SET_BITS)
2100
2101 /* These settings give:
2102 64 bit host: ocache: 100,663,296 sizeB 67,108,864 useful
2103 32 bit host: ocache: 92,274,688 sizeB 67,108,864 useful
2104 */
2105
2106 #define OC_MOVE_FORWARDS_EVERY_BITS 7
2107
2108
2109 typedef
2110 struct {
2111 Addr tag;
2112 UInt w32[OC_W32S_PER_LINE];
2113 UChar descr[OC_W32S_PER_LINE];
2114 }
2115 OCacheLine;
2116
2117 /* Classify and also sanity-check 'line'. Return 'e' (empty) if not
2118 in use, 'n' (nonzero) if it contains at least one valid origin tag,
2119 and 'z' if all the represented tags are zero. */
classify_OCacheLine(OCacheLine * line)2120 static UChar classify_OCacheLine ( OCacheLine* line )
2121 {
2122 UWord i;
2123 if (line->tag == 1/*invalid*/)
2124 return 'e'; /* EMPTY */
2125 tl_assert(is_valid_oc_tag(line->tag));
2126 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2127 tl_assert(0 == ((~0xF) & line->descr[i]));
2128 if (line->w32[i] > 0 && line->descr[i] > 0)
2129 return 'n'; /* NONZERO - contains useful info */
2130 }
2131 return 'z'; /* ZERO - no useful info */
2132 }
2133
2134 typedef
2135 struct {
2136 OCacheLine line[OC_LINES_PER_SET];
2137 }
2138 OCacheSet;
2139
2140 typedef
2141 struct {
2142 OCacheSet set[OC_N_SETS];
2143 }
2144 OCache;
2145
2146 static OCache* ocacheL1 = NULL;
2147 static UWord ocacheL1_event_ctr = 0;
2148
2149 static void init_ocacheL2 ( void ); /* fwds */
init_OCache(void)2150 static void init_OCache ( void )
2151 {
2152 UWord line, set;
2153 tl_assert(MC_(clo_mc_level) >= 3);
2154 tl_assert(ocacheL1 == NULL);
2155 ocacheL1 = VG_(am_shadow_alloc)(sizeof(OCache));
2156 if (ocacheL1 == NULL) {
2157 VG_(out_of_memory_NORETURN)( "memcheck:allocating ocacheL1",
2158 sizeof(OCache) );
2159 }
2160 tl_assert(ocacheL1 != NULL);
2161 for (set = 0; set < OC_N_SETS; set++) {
2162 for (line = 0; line < OC_LINES_PER_SET; line++) {
2163 ocacheL1->set[set].line[line].tag = 1/*invalid*/;
2164 }
2165 }
2166 init_ocacheL2();
2167 }
2168
moveLineForwards(OCacheSet * set,UWord lineno)2169 static void moveLineForwards ( OCacheSet* set, UWord lineno )
2170 {
2171 OCacheLine tmp;
2172 stats_ocacheL1_movefwds++;
2173 tl_assert(lineno > 0 && lineno < OC_LINES_PER_SET);
2174 tmp = set->line[lineno-1];
2175 set->line[lineno-1] = set->line[lineno];
2176 set->line[lineno] = tmp;
2177 }
2178
zeroise_OCacheLine(OCacheLine * line,Addr tag)2179 static void zeroise_OCacheLine ( OCacheLine* line, Addr tag ) {
2180 UWord i;
2181 for (i = 0; i < OC_W32S_PER_LINE; i++) {
2182 line->w32[i] = 0; /* NO ORIGIN */
2183 line->descr[i] = 0; /* REALLY REALLY NO ORIGIN! */
2184 }
2185 line->tag = tag;
2186 }
2187
2188 //////////////////////////////////////////////////////////////
2189 //// OCache backing store
2190
2191 static OSet* ocacheL2 = NULL;
2192
ocacheL2_malloc(HChar * cc,SizeT szB)2193 static void* ocacheL2_malloc ( HChar* cc, SizeT szB ) {
2194 return VG_(malloc)(cc, szB);
2195 }
ocacheL2_free(void * v)2196 static void ocacheL2_free ( void* v ) {
2197 VG_(free)( v );
2198 }
2199
2200 /* Stats: # nodes currently in tree */
2201 static UWord stats__ocacheL2_n_nodes = 0;
2202
init_ocacheL2(void)2203 static void init_ocacheL2 ( void )
2204 {
2205 tl_assert(!ocacheL2);
2206 tl_assert(sizeof(Word) == sizeof(Addr)); /* since OCacheLine.tag :: Addr */
2207 tl_assert(0 == offsetof(OCacheLine,tag));
2208 ocacheL2
2209 = VG_(OSetGen_Create)( offsetof(OCacheLine,tag),
2210 NULL, /* fast cmp */
2211 ocacheL2_malloc, "mc.ioL2", ocacheL2_free);
2212 tl_assert(ocacheL2);
2213 stats__ocacheL2_n_nodes = 0;
2214 }
2215
2216 /* Find line with the given tag in the tree, or NULL if not found. */
ocacheL2_find_tag(Addr tag)2217 static OCacheLine* ocacheL2_find_tag ( Addr tag )
2218 {
2219 OCacheLine* line;
2220 tl_assert(is_valid_oc_tag(tag));
2221 stats__ocacheL2_refs++;
2222 line = VG_(OSetGen_Lookup)( ocacheL2, &tag );
2223 return line;
2224 }
2225
2226 /* Delete the line with the given tag from the tree, if it is present, and
2227 free up the associated memory. */
ocacheL2_del_tag(Addr tag)2228 static void ocacheL2_del_tag ( Addr tag )
2229 {
2230 OCacheLine* line;
2231 tl_assert(is_valid_oc_tag(tag));
2232 stats__ocacheL2_refs++;
2233 line = VG_(OSetGen_Remove)( ocacheL2, &tag );
2234 if (line) {
2235 VG_(OSetGen_FreeNode)(ocacheL2, line);
2236 tl_assert(stats__ocacheL2_n_nodes > 0);
2237 stats__ocacheL2_n_nodes--;
2238 }
2239 }
2240
2241 /* Add a copy of the given line to the tree. It must not already be
2242 present. */
ocacheL2_add_line(OCacheLine * line)2243 static void ocacheL2_add_line ( OCacheLine* line )
2244 {
2245 OCacheLine* copy;
2246 tl_assert(is_valid_oc_tag(line->tag));
2247 copy = VG_(OSetGen_AllocNode)( ocacheL2, sizeof(OCacheLine) );
2248 tl_assert(copy);
2249 *copy = *line;
2250 stats__ocacheL2_refs++;
2251 VG_(OSetGen_Insert)( ocacheL2, copy );
2252 stats__ocacheL2_n_nodes++;
2253 if (stats__ocacheL2_n_nodes > stats__ocacheL2_n_nodes_max)
2254 stats__ocacheL2_n_nodes_max = stats__ocacheL2_n_nodes;
2255 }
2256
2257 ////
2258 //////////////////////////////////////////////////////////////
2259
2260 __attribute__((noinline))
find_OCacheLine_SLOW(Addr a)2261 static OCacheLine* find_OCacheLine_SLOW ( Addr a )
2262 {
2263 OCacheLine *victim, *inL2;
2264 UChar c;
2265 UWord line;
2266 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2267 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2268 UWord tag = a & tagmask;
2269 tl_assert(setno >= 0 && setno < OC_N_SETS);
2270
2271 /* we already tried line == 0; skip therefore. */
2272 for (line = 1; line < OC_LINES_PER_SET; line++) {
2273 if (ocacheL1->set[setno].line[line].tag == tag) {
2274 if (line == 1) {
2275 stats_ocacheL1_found_at_1++;
2276 } else {
2277 stats_ocacheL1_found_at_N++;
2278 }
2279 if (UNLIKELY(0 == (ocacheL1_event_ctr++
2280 & ((1<<OC_MOVE_FORWARDS_EVERY_BITS)-1)))) {
2281 moveLineForwards( &ocacheL1->set[setno], line );
2282 line--;
2283 }
2284 return &ocacheL1->set[setno].line[line];
2285 }
2286 }
2287
2288 /* A miss. Use the last slot. Implicitly this means we're
2289 ejecting the line in the last slot. */
2290 stats_ocacheL1_misses++;
2291 tl_assert(line == OC_LINES_PER_SET);
2292 line--;
2293 tl_assert(line > 0);
2294
2295 /* First, move the to-be-ejected line to the L2 cache. */
2296 victim = &ocacheL1->set[setno].line[line];
2297 c = classify_OCacheLine(victim);
2298 switch (c) {
2299 case 'e':
2300 /* the line is empty (has invalid tag); ignore it. */
2301 break;
2302 case 'z':
2303 /* line contains zeroes. We must ensure the backing store is
2304 updated accordingly, either by copying the line there
2305 verbatim, or by ensuring it isn't present there. We
2306 chosse the latter on the basis that it reduces the size of
2307 the backing store. */
2308 ocacheL2_del_tag( victim->tag );
2309 break;
2310 case 'n':
2311 /* line contains at least one real, useful origin. Copy it
2312 to the backing store. */
2313 stats_ocacheL1_lossage++;
2314 inL2 = ocacheL2_find_tag( victim->tag );
2315 if (inL2) {
2316 *inL2 = *victim;
2317 } else {
2318 ocacheL2_add_line( victim );
2319 }
2320 break;
2321 default:
2322 tl_assert(0);
2323 }
2324
2325 /* Now we must reload the L1 cache from the backing tree, if
2326 possible. */
2327 tl_assert(tag != victim->tag); /* stay sane */
2328 inL2 = ocacheL2_find_tag( tag );
2329 if (inL2) {
2330 /* We're in luck. It's in the L2. */
2331 ocacheL1->set[setno].line[line] = *inL2;
2332 } else {
2333 /* Missed at both levels of the cache hierarchy. We have to
2334 declare it as full of zeroes (unknown origins). */
2335 stats__ocacheL2_misses++;
2336 zeroise_OCacheLine( &ocacheL1->set[setno].line[line], tag );
2337 }
2338
2339 /* Move it one forwards */
2340 moveLineForwards( &ocacheL1->set[setno], line );
2341 line--;
2342
2343 return &ocacheL1->set[setno].line[line];
2344 }
2345
find_OCacheLine(Addr a)2346 static INLINE OCacheLine* find_OCacheLine ( Addr a )
2347 {
2348 UWord setno = (a >> OC_BITS_PER_LINE) & (OC_N_SETS - 1);
2349 UWord tagmask = ~((1 << OC_BITS_PER_LINE) - 1);
2350 UWord tag = a & tagmask;
2351
2352 stats_ocacheL1_find++;
2353
2354 if (OC_ENABLE_ASSERTIONS) {
2355 tl_assert(setno >= 0 && setno < OC_N_SETS);
2356 tl_assert(0 == (tag & (4 * OC_W32S_PER_LINE - 1)));
2357 }
2358
2359 if (LIKELY(ocacheL1->set[setno].line[0].tag == tag)) {
2360 return &ocacheL1->set[setno].line[0];
2361 }
2362
2363 return find_OCacheLine_SLOW( a );
2364 }
2365
set_aligned_word64_Origin_to_undef(Addr a,UInt otag)2366 static INLINE void set_aligned_word64_Origin_to_undef ( Addr a, UInt otag )
2367 {
2368 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2369 //// Set the origins for a+0 .. a+7
2370 { OCacheLine* line;
2371 UWord lineoff = oc_line_offset(a);
2372 if (OC_ENABLE_ASSERTIONS) {
2373 tl_assert(lineoff >= 0
2374 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2375 }
2376 line = find_OCacheLine( a );
2377 line->descr[lineoff+0] = 0xF;
2378 line->descr[lineoff+1] = 0xF;
2379 line->w32[lineoff+0] = otag;
2380 line->w32[lineoff+1] = otag;
2381 }
2382 //// END inlined, specialised version of MC_(helperc_b_store8)
2383 }
2384
2385
2386 /*------------------------------------------------------------*/
2387 /*--- Aligned fast case permission setters, ---*/
2388 /*--- for dealing with stacks ---*/
2389 /*------------------------------------------------------------*/
2390
2391 /*--------------------- 32-bit ---------------------*/
2392
2393 /* Nb: by "aligned" here we mean 4-byte aligned */
2394
make_aligned_word32_undefined(Addr a)2395 static INLINE void make_aligned_word32_undefined ( Addr a )
2396 {
2397 PROF_EVENT(300, "make_aligned_word32_undefined");
2398
2399 #ifndef PERF_FAST_STACK2
2400 make_mem_undefined(a, 4);
2401 #else
2402 {
2403 UWord sm_off;
2404 SecMap* sm;
2405
2406 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2407 PROF_EVENT(301, "make_aligned_word32_undefined-slow1");
2408 make_mem_undefined(a, 4);
2409 return;
2410 }
2411
2412 sm = get_secmap_for_writing_low(a);
2413 sm_off = SM_OFF(a);
2414 sm->vabits8[sm_off] = VA_BITS8_UNDEFINED;
2415 }
2416 #endif
2417 }
2418
2419 static INLINE
make_aligned_word32_undefined_w_otag(Addr a,UInt otag)2420 void make_aligned_word32_undefined_w_otag ( Addr a, UInt otag )
2421 {
2422 make_aligned_word32_undefined(a);
2423 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2424 //// Set the origins for a+0 .. a+3
2425 { OCacheLine* line;
2426 UWord lineoff = oc_line_offset(a);
2427 if (OC_ENABLE_ASSERTIONS) {
2428 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2429 }
2430 line = find_OCacheLine( a );
2431 line->descr[lineoff] = 0xF;
2432 line->w32[lineoff] = otag;
2433 }
2434 //// END inlined, specialised version of MC_(helperc_b_store4)
2435 }
2436
2437 static INLINE
make_aligned_word32_noaccess(Addr a)2438 void make_aligned_word32_noaccess ( Addr a )
2439 {
2440 PROF_EVENT(310, "make_aligned_word32_noaccess");
2441
2442 #ifndef PERF_FAST_STACK2
2443 MC_(make_mem_noaccess)(a, 4);
2444 #else
2445 {
2446 UWord sm_off;
2447 SecMap* sm;
2448
2449 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2450 PROF_EVENT(311, "make_aligned_word32_noaccess-slow1");
2451 MC_(make_mem_noaccess)(a, 4);
2452 return;
2453 }
2454
2455 sm = get_secmap_for_writing_low(a);
2456 sm_off = SM_OFF(a);
2457 sm->vabits8[sm_off] = VA_BITS8_NOACCESS;
2458
2459 //// BEGIN inlined, specialised version of MC_(helperc_b_store4)
2460 //// Set the origins for a+0 .. a+3.
2461 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2462 OCacheLine* line;
2463 UWord lineoff = oc_line_offset(a);
2464 if (OC_ENABLE_ASSERTIONS) {
2465 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
2466 }
2467 line = find_OCacheLine( a );
2468 line->descr[lineoff] = 0;
2469 }
2470 //// END inlined, specialised version of MC_(helperc_b_store4)
2471 }
2472 #endif
2473 }
2474
2475 /*--------------------- 64-bit ---------------------*/
2476
2477 /* Nb: by "aligned" here we mean 8-byte aligned */
2478
make_aligned_word64_undefined(Addr a)2479 static INLINE void make_aligned_word64_undefined ( Addr a )
2480 {
2481 PROF_EVENT(320, "make_aligned_word64_undefined");
2482
2483 #ifndef PERF_FAST_STACK2
2484 make_mem_undefined(a, 8);
2485 #else
2486 {
2487 UWord sm_off16;
2488 SecMap* sm;
2489
2490 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2491 PROF_EVENT(321, "make_aligned_word64_undefined-slow1");
2492 make_mem_undefined(a, 8);
2493 return;
2494 }
2495
2496 sm = get_secmap_for_writing_low(a);
2497 sm_off16 = SM_OFF_16(a);
2498 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_UNDEFINED;
2499 }
2500 #endif
2501 }
2502
2503 static INLINE
make_aligned_word64_undefined_w_otag(Addr a,UInt otag)2504 void make_aligned_word64_undefined_w_otag ( Addr a, UInt otag )
2505 {
2506 make_aligned_word64_undefined(a);
2507 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2508 //// Set the origins for a+0 .. a+7
2509 { OCacheLine* line;
2510 UWord lineoff = oc_line_offset(a);
2511 tl_assert(lineoff >= 0
2512 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2513 line = find_OCacheLine( a );
2514 line->descr[lineoff+0] = 0xF;
2515 line->descr[lineoff+1] = 0xF;
2516 line->w32[lineoff+0] = otag;
2517 line->w32[lineoff+1] = otag;
2518 }
2519 //// END inlined, specialised version of MC_(helperc_b_store8)
2520 }
2521
2522 static INLINE
make_aligned_word64_noaccess(Addr a)2523 void make_aligned_word64_noaccess ( Addr a )
2524 {
2525 PROF_EVENT(330, "make_aligned_word64_noaccess");
2526
2527 #ifndef PERF_FAST_STACK2
2528 MC_(make_mem_noaccess)(a, 8);
2529 #else
2530 {
2531 UWord sm_off16;
2532 SecMap* sm;
2533
2534 if (UNLIKELY(a > MAX_PRIMARY_ADDRESS)) {
2535 PROF_EVENT(331, "make_aligned_word64_noaccess-slow1");
2536 MC_(make_mem_noaccess)(a, 8);
2537 return;
2538 }
2539
2540 sm = get_secmap_for_writing_low(a);
2541 sm_off16 = SM_OFF_16(a);
2542 ((UShort*)(sm->vabits8))[sm_off16] = VA_BITS16_NOACCESS;
2543
2544 //// BEGIN inlined, specialised version of MC_(helperc_b_store8)
2545 //// Clear the origins for a+0 .. a+7.
2546 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
2547 OCacheLine* line;
2548 UWord lineoff = oc_line_offset(a);
2549 tl_assert(lineoff >= 0
2550 && lineoff < OC_W32S_PER_LINE -1/*'cos 8-aligned*/);
2551 line = find_OCacheLine( a );
2552 line->descr[lineoff+0] = 0;
2553 line->descr[lineoff+1] = 0;
2554 }
2555 //// END inlined, specialised version of MC_(helperc_b_store8)
2556 }
2557 #endif
2558 }
2559
2560
2561 /*------------------------------------------------------------*/
2562 /*--- Stack pointer adjustment ---*/
2563 /*------------------------------------------------------------*/
2564
2565 #ifdef PERF_FAST_STACK
2566 # define MAYBE_USED
2567 #else
2568 # define MAYBE_USED __attribute__((unused))
2569 #endif
2570
2571 /*--------------- adjustment by 4 bytes ---------------*/
2572
2573 MAYBE_USED
mc_new_mem_stack_4_w_ECU(Addr new_SP,UInt ecu)2574 static void VG_REGPARM(2) mc_new_mem_stack_4_w_ECU(Addr new_SP, UInt ecu)
2575 {
2576 UInt otag = ecu | MC_OKIND_STACK;
2577 PROF_EVENT(110, "new_mem_stack_4");
2578 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2579 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2580 } else {
2581 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 4, otag );
2582 }
2583 }
2584
2585 MAYBE_USED
mc_new_mem_stack_4(Addr new_SP)2586 static void VG_REGPARM(1) mc_new_mem_stack_4(Addr new_SP)
2587 {
2588 PROF_EVENT(110, "new_mem_stack_4");
2589 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2590 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2591 } else {
2592 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 4 );
2593 }
2594 }
2595
2596 MAYBE_USED
mc_die_mem_stack_4(Addr new_SP)2597 static void VG_REGPARM(1) mc_die_mem_stack_4(Addr new_SP)
2598 {
2599 PROF_EVENT(120, "die_mem_stack_4");
2600 if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2601 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2602 } else {
2603 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-4, 4 );
2604 }
2605 }
2606
2607 /*--------------- adjustment by 8 bytes ---------------*/
2608
2609 MAYBE_USED
mc_new_mem_stack_8_w_ECU(Addr new_SP,UInt ecu)2610 static void VG_REGPARM(2) mc_new_mem_stack_8_w_ECU(Addr new_SP, UInt ecu)
2611 {
2612 UInt otag = ecu | MC_OKIND_STACK;
2613 PROF_EVENT(111, "new_mem_stack_8");
2614 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2615 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
2616 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2617 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2618 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2619 } else {
2620 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 8, otag );
2621 }
2622 }
2623
2624 MAYBE_USED
mc_new_mem_stack_8(Addr new_SP)2625 static void VG_REGPARM(1) mc_new_mem_stack_8(Addr new_SP)
2626 {
2627 PROF_EVENT(111, "new_mem_stack_8");
2628 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2629 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2630 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2631 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2632 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2633 } else {
2634 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 8 );
2635 }
2636 }
2637
2638 MAYBE_USED
mc_die_mem_stack_8(Addr new_SP)2639 static void VG_REGPARM(1) mc_die_mem_stack_8(Addr new_SP)
2640 {
2641 PROF_EVENT(121, "die_mem_stack_8");
2642 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2643 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2644 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2645 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2646 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2647 } else {
2648 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-8, 8 );
2649 }
2650 }
2651
2652 /*--------------- adjustment by 12 bytes ---------------*/
2653
2654 MAYBE_USED
mc_new_mem_stack_12_w_ECU(Addr new_SP,UInt ecu)2655 static void VG_REGPARM(2) mc_new_mem_stack_12_w_ECU(Addr new_SP, UInt ecu)
2656 {
2657 UInt otag = ecu | MC_OKIND_STACK;
2658 PROF_EVENT(112, "new_mem_stack_12");
2659 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2660 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2661 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2662 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2663 /* from previous test we don't have 8-alignment at offset +0,
2664 hence must have 8 alignment at offsets +4/-4. Hence safe to
2665 do 4 at +0 and then 8 at +4/. */
2666 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2667 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4, otag );
2668 } else {
2669 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 12, otag );
2670 }
2671 }
2672
2673 MAYBE_USED
mc_new_mem_stack_12(Addr new_SP)2674 static void VG_REGPARM(1) mc_new_mem_stack_12(Addr new_SP)
2675 {
2676 PROF_EVENT(112, "new_mem_stack_12");
2677 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2678 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2679 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2680 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2681 /* from previous test we don't have 8-alignment at offset +0,
2682 hence must have 8 alignment at offsets +4/-4. Hence safe to
2683 do 4 at +0 and then 8 at +4/. */
2684 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2685 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2686 } else {
2687 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 12 );
2688 }
2689 }
2690
2691 MAYBE_USED
mc_die_mem_stack_12(Addr new_SP)2692 static void VG_REGPARM(1) mc_die_mem_stack_12(Addr new_SP)
2693 {
2694 PROF_EVENT(122, "die_mem_stack_12");
2695 /* Note the -12 in the test */
2696 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP-12 )) {
2697 /* We have 8-alignment at -12, hence ok to do 8 at -12 and 4 at
2698 -4. */
2699 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2700 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2701 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2702 /* We have 4-alignment at +0, but we don't have 8-alignment at
2703 -12. So we must have 8-alignment at -8. Hence do 4 at -12
2704 and then 8 at -8. */
2705 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2706 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2707 } else {
2708 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-12, 12 );
2709 }
2710 }
2711
2712 /*--------------- adjustment by 16 bytes ---------------*/
2713
2714 MAYBE_USED
mc_new_mem_stack_16_w_ECU(Addr new_SP,UInt ecu)2715 static void VG_REGPARM(2) mc_new_mem_stack_16_w_ECU(Addr new_SP, UInt ecu)
2716 {
2717 UInt otag = ecu | MC_OKIND_STACK;
2718 PROF_EVENT(113, "new_mem_stack_16");
2719 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2720 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2721 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2722 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
2723 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2724 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2725 Hence do 4 at +0, 8 at +4, 4 at +12. */
2726 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2727 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2728 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2729 } else {
2730 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 16, otag );
2731 }
2732 }
2733
2734 MAYBE_USED
mc_new_mem_stack_16(Addr new_SP)2735 static void VG_REGPARM(1) mc_new_mem_stack_16(Addr new_SP)
2736 {
2737 PROF_EVENT(113, "new_mem_stack_16");
2738 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2739 /* Have 8-alignment at +0, hence do 8 at +0 and 8 at +8. */
2740 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2741 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2742 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2743 /* Have 4 alignment at +0 but not 8; hence 8 must be at +4.
2744 Hence do 4 at +0, 8 at +4, 4 at +12. */
2745 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2746 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2747 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2748 } else {
2749 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 16 );
2750 }
2751 }
2752
2753 MAYBE_USED
mc_die_mem_stack_16(Addr new_SP)2754 static void VG_REGPARM(1) mc_die_mem_stack_16(Addr new_SP)
2755 {
2756 PROF_EVENT(123, "die_mem_stack_16");
2757 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2758 /* Have 8-alignment at +0, hence do 8 at -16 and 8 at -8. */
2759 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2760 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-8 );
2761 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2762 /* 8 alignment must be at -12. Do 4 at -16, 8 at -12, 4 at -4. */
2763 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2764 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2765 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2766 } else {
2767 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-16, 16 );
2768 }
2769 }
2770
2771 /*--------------- adjustment by 32 bytes ---------------*/
2772
2773 MAYBE_USED
mc_new_mem_stack_32_w_ECU(Addr new_SP,UInt ecu)2774 static void VG_REGPARM(2) mc_new_mem_stack_32_w_ECU(Addr new_SP, UInt ecu)
2775 {
2776 UInt otag = ecu | MC_OKIND_STACK;
2777 PROF_EVENT(114, "new_mem_stack_32");
2778 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2779 /* Straightforward */
2780 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2781 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2782 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2783 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2784 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2785 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
2786 +0,+28. */
2787 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2788 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+4 , otag );
2789 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+12, otag );
2790 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+20, otag );
2791 make_aligned_word32_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+28, otag );
2792 } else {
2793 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 32, otag );
2794 }
2795 }
2796
2797 MAYBE_USED
mc_new_mem_stack_32(Addr new_SP)2798 static void VG_REGPARM(1) mc_new_mem_stack_32(Addr new_SP)
2799 {
2800 PROF_EVENT(114, "new_mem_stack_32");
2801 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2802 /* Straightforward */
2803 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2804 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2805 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2806 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2807 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2808 /* 8 alignment must be at +4. Hence do 8 at +4,+12,+20 and 4 at
2809 +0,+28. */
2810 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2811 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+4 );
2812 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+12 );
2813 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+20 );
2814 make_aligned_word32_undefined ( -VG_STACK_REDZONE_SZB + new_SP+28 );
2815 } else {
2816 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 32 );
2817 }
2818 }
2819
2820 MAYBE_USED
mc_die_mem_stack_32(Addr new_SP)2821 static void VG_REGPARM(1) mc_die_mem_stack_32(Addr new_SP)
2822 {
2823 PROF_EVENT(124, "die_mem_stack_32");
2824 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2825 /* Straightforward */
2826 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2827 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2828 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2829 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2830 } else if (VG_IS_4_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2831 /* 8 alignment must be at -4 etc. Hence do 8 at -12,-20,-28 and
2832 4 at -32,-4. */
2833 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2834 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-28 );
2835 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-20 );
2836 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-12 );
2837 make_aligned_word32_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-4 );
2838 } else {
2839 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-32, 32 );
2840 }
2841 }
2842
2843 /*--------------- adjustment by 112 bytes ---------------*/
2844
2845 MAYBE_USED
mc_new_mem_stack_112_w_ECU(Addr new_SP,UInt ecu)2846 static void VG_REGPARM(2) mc_new_mem_stack_112_w_ECU(Addr new_SP, UInt ecu)
2847 {
2848 UInt otag = ecu | MC_OKIND_STACK;
2849 PROF_EVENT(115, "new_mem_stack_112");
2850 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2851 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2852 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2853 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2854 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2855 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
2856 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
2857 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
2858 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
2859 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
2860 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
2861 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
2862 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
2863 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
2864 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
2865 } else {
2866 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 112, otag );
2867 }
2868 }
2869
2870 MAYBE_USED
mc_new_mem_stack_112(Addr new_SP)2871 static void VG_REGPARM(1) mc_new_mem_stack_112(Addr new_SP)
2872 {
2873 PROF_EVENT(115, "new_mem_stack_112");
2874 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2875 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2876 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2877 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2878 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2879 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
2880 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
2881 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
2882 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
2883 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
2884 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
2885 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
2886 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
2887 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
2888 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
2889 } else {
2890 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 112 );
2891 }
2892 }
2893
2894 MAYBE_USED
mc_die_mem_stack_112(Addr new_SP)2895 static void VG_REGPARM(1) mc_die_mem_stack_112(Addr new_SP)
2896 {
2897 PROF_EVENT(125, "die_mem_stack_112");
2898 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2899 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
2900 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
2901 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
2902 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
2903 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
2904 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
2905 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
2906 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
2907 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
2908 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
2909 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2910 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2911 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2912 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2913 } else {
2914 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-112, 112 );
2915 }
2916 }
2917
2918 /*--------------- adjustment by 128 bytes ---------------*/
2919
2920 MAYBE_USED
mc_new_mem_stack_128_w_ECU(Addr new_SP,UInt ecu)2921 static void VG_REGPARM(2) mc_new_mem_stack_128_w_ECU(Addr new_SP, UInt ecu)
2922 {
2923 UInt otag = ecu | MC_OKIND_STACK;
2924 PROF_EVENT(116, "new_mem_stack_128");
2925 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2926 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP , otag );
2927 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8 , otag );
2928 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
2929 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
2930 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
2931 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
2932 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
2933 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
2934 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
2935 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
2936 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
2937 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
2938 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
2939 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
2940 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
2941 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
2942 } else {
2943 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 128, otag );
2944 }
2945 }
2946
2947 MAYBE_USED
mc_new_mem_stack_128(Addr new_SP)2948 static void VG_REGPARM(1) mc_new_mem_stack_128(Addr new_SP)
2949 {
2950 PROF_EVENT(116, "new_mem_stack_128");
2951 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2952 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
2953 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
2954 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
2955 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
2956 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
2957 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
2958 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
2959 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
2960 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
2961 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
2962 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
2963 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
2964 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
2965 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
2966 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
2967 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
2968 } else {
2969 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 128 );
2970 }
2971 }
2972
2973 MAYBE_USED
mc_die_mem_stack_128(Addr new_SP)2974 static void VG_REGPARM(1) mc_die_mem_stack_128(Addr new_SP)
2975 {
2976 PROF_EVENT(126, "die_mem_stack_128");
2977 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
2978 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
2979 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
2980 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
2981 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
2982 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
2983 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
2984 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
2985 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
2986 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
2987 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
2988 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
2989 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
2990 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
2991 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
2992 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
2993 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
2994 } else {
2995 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-128, 128 );
2996 }
2997 }
2998
2999 /*--------------- adjustment by 144 bytes ---------------*/
3000
3001 MAYBE_USED
mc_new_mem_stack_144_w_ECU(Addr new_SP,UInt ecu)3002 static void VG_REGPARM(2) mc_new_mem_stack_144_w_ECU(Addr new_SP, UInt ecu)
3003 {
3004 UInt otag = ecu | MC_OKIND_STACK;
3005 PROF_EVENT(117, "new_mem_stack_144");
3006 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3007 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3008 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3009 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3010 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3011 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3012 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3013 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3014 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3015 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3016 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3017 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3018 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3019 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3020 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3021 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3022 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3023 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3024 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3025 } else {
3026 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 144, otag );
3027 }
3028 }
3029
3030 MAYBE_USED
mc_new_mem_stack_144(Addr new_SP)3031 static void VG_REGPARM(1) mc_new_mem_stack_144(Addr new_SP)
3032 {
3033 PROF_EVENT(117, "new_mem_stack_144");
3034 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3035 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3036 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3037 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3038 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3039 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3040 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3041 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3042 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3043 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3044 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3045 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3046 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3047 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3048 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3049 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3050 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3051 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3052 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3053 } else {
3054 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 144 );
3055 }
3056 }
3057
3058 MAYBE_USED
mc_die_mem_stack_144(Addr new_SP)3059 static void VG_REGPARM(1) mc_die_mem_stack_144(Addr new_SP)
3060 {
3061 PROF_EVENT(127, "die_mem_stack_144");
3062 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3063 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3064 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3065 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3066 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3067 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3068 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3069 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3070 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3071 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3072 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3073 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3074 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3075 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3076 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3077 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3078 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3079 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3080 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3081 } else {
3082 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-144, 144 );
3083 }
3084 }
3085
3086 /*--------------- adjustment by 160 bytes ---------------*/
3087
3088 MAYBE_USED
mc_new_mem_stack_160_w_ECU(Addr new_SP,UInt ecu)3089 static void VG_REGPARM(2) mc_new_mem_stack_160_w_ECU(Addr new_SP, UInt ecu)
3090 {
3091 UInt otag = ecu | MC_OKIND_STACK;
3092 PROF_EVENT(118, "new_mem_stack_160");
3093 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3094 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP, otag );
3095 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+8, otag );
3096 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+16, otag );
3097 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+24, otag );
3098 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+32, otag );
3099 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+40, otag );
3100 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+48, otag );
3101 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+56, otag );
3102 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+64, otag );
3103 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+72, otag );
3104 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+80, otag );
3105 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+88, otag );
3106 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+96, otag );
3107 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+104, otag );
3108 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+112, otag );
3109 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+120, otag );
3110 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+128, otag );
3111 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+136, otag );
3112 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+144, otag );
3113 make_aligned_word64_undefined_w_otag ( -VG_STACK_REDZONE_SZB + new_SP+152, otag );
3114 } else {
3115 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + new_SP, 160, otag );
3116 }
3117 }
3118
3119 MAYBE_USED
mc_new_mem_stack_160(Addr new_SP)3120 static void VG_REGPARM(1) mc_new_mem_stack_160(Addr new_SP)
3121 {
3122 PROF_EVENT(118, "new_mem_stack_160");
3123 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3124 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP );
3125 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+8 );
3126 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+16 );
3127 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+24 );
3128 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+32 );
3129 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+40 );
3130 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+48 );
3131 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+56 );
3132 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+64 );
3133 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+72 );
3134 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+80 );
3135 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+88 );
3136 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+96 );
3137 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+104 );
3138 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+112 );
3139 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+120 );
3140 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+128 );
3141 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+136 );
3142 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+144 );
3143 make_aligned_word64_undefined ( -VG_STACK_REDZONE_SZB + new_SP+152 );
3144 } else {
3145 make_mem_undefined ( -VG_STACK_REDZONE_SZB + new_SP, 160 );
3146 }
3147 }
3148
3149 MAYBE_USED
mc_die_mem_stack_160(Addr new_SP)3150 static void VG_REGPARM(1) mc_die_mem_stack_160(Addr new_SP)
3151 {
3152 PROF_EVENT(128, "die_mem_stack_160");
3153 if (VG_IS_8_ALIGNED( -VG_STACK_REDZONE_SZB + new_SP )) {
3154 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-160);
3155 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-152);
3156 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-144);
3157 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-136);
3158 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-128);
3159 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-120);
3160 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-112);
3161 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-104);
3162 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-96 );
3163 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-88 );
3164 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-80 );
3165 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-72 );
3166 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-64 );
3167 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-56 );
3168 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-48 );
3169 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-40 );
3170 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-32 );
3171 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-24 );
3172 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP-16 );
3173 make_aligned_word64_noaccess ( -VG_STACK_REDZONE_SZB + new_SP- 8 );
3174 } else {
3175 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + new_SP-160, 160 );
3176 }
3177 }
3178
3179 /*--------------- adjustment by N bytes ---------------*/
3180
mc_new_mem_stack_w_ECU(Addr a,SizeT len,UInt ecu)3181 static void mc_new_mem_stack_w_ECU ( Addr a, SizeT len, UInt ecu )
3182 {
3183 UInt otag = ecu | MC_OKIND_STACK;
3184 PROF_EVENT(115, "new_mem_stack_w_otag");
3185 MC_(make_mem_undefined_w_otag) ( -VG_STACK_REDZONE_SZB + a, len, otag );
3186 }
3187
mc_new_mem_stack(Addr a,SizeT len)3188 static void mc_new_mem_stack ( Addr a, SizeT len )
3189 {
3190 PROF_EVENT(115, "new_mem_stack");
3191 make_mem_undefined ( -VG_STACK_REDZONE_SZB + a, len );
3192 }
3193
mc_die_mem_stack(Addr a,SizeT len)3194 static void mc_die_mem_stack ( Addr a, SizeT len )
3195 {
3196 PROF_EVENT(125, "die_mem_stack");
3197 MC_(make_mem_noaccess) ( -VG_STACK_REDZONE_SZB + a, len );
3198 }
3199
3200
3201 /* The AMD64 ABI says:
3202
3203 "The 128-byte area beyond the location pointed to by %rsp is considered
3204 to be reserved and shall not be modified by signal or interrupt
3205 handlers. Therefore, functions may use this area for temporary data
3206 that is not needed across function calls. In particular, leaf functions
3207 may use this area for their entire stack frame, rather than adjusting
3208 the stack pointer in the prologue and epilogue. This area is known as
3209 red zone [sic]."
3210
3211 So after any call or return we need to mark this redzone as containing
3212 undefined values.
3213
3214 Consider this: we're in function f. f calls g. g moves rsp down
3215 modestly (say 16 bytes) and writes stuff all over the red zone, making it
3216 defined. g returns. f is buggy and reads from parts of the red zone
3217 that it didn't write on. But because g filled that area in, f is going
3218 to be picking up defined V bits and so any errors from reading bits of
3219 the red zone it didn't write, will be missed. The only solution I could
3220 think of was to make the red zone undefined when g returns to f.
3221
3222 This is in accordance with the ABI, which makes it clear the redzone
3223 is volatile across function calls.
3224
3225 The problem occurs the other way round too: f could fill the RZ up
3226 with defined values and g could mistakenly read them. So the RZ
3227 also needs to be nuked on function calls.
3228 */
3229
3230
3231 /* Here's a simple cache to hold nia -> ECU mappings. It could be
3232 improved so as to have a lower miss rate. */
3233
3234 static UWord stats__nia_cache_queries = 0;
3235 static UWord stats__nia_cache_misses = 0;
3236
3237 typedef
3238 struct { UWord nia0; UWord ecu0; /* nia0 maps to ecu0 */
3239 UWord nia1; UWord ecu1; } /* nia1 maps to ecu1 */
3240 WCacheEnt;
3241
3242 #define N_NIA_TO_ECU_CACHE 511
3243
3244 static WCacheEnt nia_to_ecu_cache[N_NIA_TO_ECU_CACHE];
3245
init_nia_to_ecu_cache(void)3246 static void init_nia_to_ecu_cache ( void )
3247 {
3248 UWord i;
3249 Addr zero_addr = 0;
3250 ExeContext* zero_ec;
3251 UInt zero_ecu;
3252 /* Fill all the slots with an entry for address zero, and the
3253 relevant otags accordingly. Hence the cache is initially filled
3254 with valid data. */
3255 zero_ec = VG_(make_depth_1_ExeContext_from_Addr)(zero_addr);
3256 tl_assert(zero_ec);
3257 zero_ecu = VG_(get_ECU_from_ExeContext)(zero_ec);
3258 tl_assert(VG_(is_plausible_ECU)(zero_ecu));
3259 for (i = 0; i < N_NIA_TO_ECU_CACHE; i++) {
3260 nia_to_ecu_cache[i].nia0 = zero_addr;
3261 nia_to_ecu_cache[i].ecu0 = zero_ecu;
3262 nia_to_ecu_cache[i].nia1 = zero_addr;
3263 nia_to_ecu_cache[i].ecu1 = zero_ecu;
3264 }
3265 }
3266
convert_nia_to_ecu(Addr nia)3267 static inline UInt convert_nia_to_ecu ( Addr nia )
3268 {
3269 UWord i;
3270 UInt ecu;
3271 ExeContext* ec;
3272
3273 tl_assert( sizeof(nia_to_ecu_cache[0].nia1) == sizeof(nia) );
3274
3275 stats__nia_cache_queries++;
3276 i = nia % N_NIA_TO_ECU_CACHE;
3277 tl_assert(i >= 0 && i < N_NIA_TO_ECU_CACHE);
3278
3279 if (LIKELY( nia_to_ecu_cache[i].nia0 == nia ))
3280 return nia_to_ecu_cache[i].ecu0;
3281
3282 if (LIKELY( nia_to_ecu_cache[i].nia1 == nia )) {
3283 # define SWAP(_w1,_w2) { UWord _t = _w1; _w1 = _w2; _w2 = _t; }
3284 SWAP( nia_to_ecu_cache[i].nia0, nia_to_ecu_cache[i].nia1 );
3285 SWAP( nia_to_ecu_cache[i].ecu0, nia_to_ecu_cache[i].ecu1 );
3286 # undef SWAP
3287 return nia_to_ecu_cache[i].ecu0;
3288 }
3289
3290 stats__nia_cache_misses++;
3291 ec = VG_(make_depth_1_ExeContext_from_Addr)(nia);
3292 tl_assert(ec);
3293 ecu = VG_(get_ECU_from_ExeContext)(ec);
3294 tl_assert(VG_(is_plausible_ECU)(ecu));
3295
3296 nia_to_ecu_cache[i].nia1 = nia_to_ecu_cache[i].nia0;
3297 nia_to_ecu_cache[i].ecu1 = nia_to_ecu_cache[i].ecu0;
3298
3299 nia_to_ecu_cache[i].nia0 = nia;
3300 nia_to_ecu_cache[i].ecu0 = (UWord)ecu;
3301 return ecu;
3302 }
3303
3304
3305 /* Note that this serves both the origin-tracking and
3306 no-origin-tracking modes. We assume that calls to it are
3307 sufficiently infrequent that it isn't worth specialising for the
3308 with/without origin-tracking cases. */
MC_(helperc_MAKE_STACK_UNINIT)3309 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len, Addr nia )
3310 {
3311 UInt otag;
3312 tl_assert(sizeof(UWord) == sizeof(SizeT));
3313 if (0)
3314 VG_(printf)("helperc_MAKE_STACK_UNINIT (%#lx,%lu,nia=%#lx)\n",
3315 base, len, nia );
3316
3317 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3318 UInt ecu = convert_nia_to_ecu ( nia );
3319 tl_assert(VG_(is_plausible_ECU)(ecu));
3320 otag = ecu | MC_OKIND_STACK;
3321 } else {
3322 tl_assert(nia == 0);
3323 otag = 0;
3324 }
3325
3326 # if 0
3327 /* Really slow version */
3328 MC_(make_mem_undefined)(base, len, otag);
3329 # endif
3330
3331 # if 0
3332 /* Slow(ish) version, which is fairly easily seen to be correct.
3333 */
3334 if (LIKELY( VG_IS_8_ALIGNED(base) && len==128 )) {
3335 make_aligned_word64_undefined(base + 0, otag);
3336 make_aligned_word64_undefined(base + 8, otag);
3337 make_aligned_word64_undefined(base + 16, otag);
3338 make_aligned_word64_undefined(base + 24, otag);
3339
3340 make_aligned_word64_undefined(base + 32, otag);
3341 make_aligned_word64_undefined(base + 40, otag);
3342 make_aligned_word64_undefined(base + 48, otag);
3343 make_aligned_word64_undefined(base + 56, otag);
3344
3345 make_aligned_word64_undefined(base + 64, otag);
3346 make_aligned_word64_undefined(base + 72, otag);
3347 make_aligned_word64_undefined(base + 80, otag);
3348 make_aligned_word64_undefined(base + 88, otag);
3349
3350 make_aligned_word64_undefined(base + 96, otag);
3351 make_aligned_word64_undefined(base + 104, otag);
3352 make_aligned_word64_undefined(base + 112, otag);
3353 make_aligned_word64_undefined(base + 120, otag);
3354 } else {
3355 MC_(make_mem_undefined)(base, len, otag);
3356 }
3357 # endif
3358
3359 /* Idea is: go fast when
3360 * 8-aligned and length is 128
3361 * the sm is available in the main primary map
3362 * the address range falls entirely with a single secondary map
3363 If all those conditions hold, just update the V+A bits by writing
3364 directly into the vabits array. (If the sm was distinguished, this
3365 will make a copy and then write to it.)
3366 */
3367
3368 if (LIKELY( len == 128 && VG_IS_8_ALIGNED(base) )) {
3369 /* Now we know the address range is suitably sized and aligned. */
3370 UWord a_lo = (UWord)(base);
3371 UWord a_hi = (UWord)(base + 128 - 1);
3372 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3373 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3374 // Now we know the entire range is within the main primary map.
3375 SecMap* sm = get_secmap_for_writing_low(a_lo);
3376 SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3377 /* Now we know that the entire address range falls within a
3378 single secondary map, and that that secondary 'lives' in
3379 the main primary map. */
3380 if (LIKELY(sm == sm_hi)) {
3381 // Finally, we know that the range is entirely within one secmap.
3382 UWord v_off = SM_OFF(a_lo);
3383 UShort* p = (UShort*)(&sm->vabits8[v_off]);
3384 p[ 0] = VA_BITS16_UNDEFINED;
3385 p[ 1] = VA_BITS16_UNDEFINED;
3386 p[ 2] = VA_BITS16_UNDEFINED;
3387 p[ 3] = VA_BITS16_UNDEFINED;
3388 p[ 4] = VA_BITS16_UNDEFINED;
3389 p[ 5] = VA_BITS16_UNDEFINED;
3390 p[ 6] = VA_BITS16_UNDEFINED;
3391 p[ 7] = VA_BITS16_UNDEFINED;
3392 p[ 8] = VA_BITS16_UNDEFINED;
3393 p[ 9] = VA_BITS16_UNDEFINED;
3394 p[10] = VA_BITS16_UNDEFINED;
3395 p[11] = VA_BITS16_UNDEFINED;
3396 p[12] = VA_BITS16_UNDEFINED;
3397 p[13] = VA_BITS16_UNDEFINED;
3398 p[14] = VA_BITS16_UNDEFINED;
3399 p[15] = VA_BITS16_UNDEFINED;
3400 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3401 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3402 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3403 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3404 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3405 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3406 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3407 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3408 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3409 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3410 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3411 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3412 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3413 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3414 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3415 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3416 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3417 }
3418 return;
3419 }
3420 }
3421 }
3422
3423 /* 288 bytes (36 ULongs) is the magic value for ELF ppc64. */
3424 if (LIKELY( len == 288 && VG_IS_8_ALIGNED(base) )) {
3425 /* Now we know the address range is suitably sized and aligned. */
3426 UWord a_lo = (UWord)(base);
3427 UWord a_hi = (UWord)(base + 288 - 1);
3428 tl_assert(a_lo < a_hi); // paranoia: detect overflow
3429 if (a_hi <= MAX_PRIMARY_ADDRESS) {
3430 // Now we know the entire range is within the main primary map.
3431 SecMap* sm = get_secmap_for_writing_low(a_lo);
3432 SecMap* sm_hi = get_secmap_for_writing_low(a_hi);
3433 /* Now we know that the entire address range falls within a
3434 single secondary map, and that that secondary 'lives' in
3435 the main primary map. */
3436 if (LIKELY(sm == sm_hi)) {
3437 // Finally, we know that the range is entirely within one secmap.
3438 UWord v_off = SM_OFF(a_lo);
3439 UShort* p = (UShort*)(&sm->vabits8[v_off]);
3440 p[ 0] = VA_BITS16_UNDEFINED;
3441 p[ 1] = VA_BITS16_UNDEFINED;
3442 p[ 2] = VA_BITS16_UNDEFINED;
3443 p[ 3] = VA_BITS16_UNDEFINED;
3444 p[ 4] = VA_BITS16_UNDEFINED;
3445 p[ 5] = VA_BITS16_UNDEFINED;
3446 p[ 6] = VA_BITS16_UNDEFINED;
3447 p[ 7] = VA_BITS16_UNDEFINED;
3448 p[ 8] = VA_BITS16_UNDEFINED;
3449 p[ 9] = VA_BITS16_UNDEFINED;
3450 p[10] = VA_BITS16_UNDEFINED;
3451 p[11] = VA_BITS16_UNDEFINED;
3452 p[12] = VA_BITS16_UNDEFINED;
3453 p[13] = VA_BITS16_UNDEFINED;
3454 p[14] = VA_BITS16_UNDEFINED;
3455 p[15] = VA_BITS16_UNDEFINED;
3456 p[16] = VA_BITS16_UNDEFINED;
3457 p[17] = VA_BITS16_UNDEFINED;
3458 p[18] = VA_BITS16_UNDEFINED;
3459 p[19] = VA_BITS16_UNDEFINED;
3460 p[20] = VA_BITS16_UNDEFINED;
3461 p[21] = VA_BITS16_UNDEFINED;
3462 p[22] = VA_BITS16_UNDEFINED;
3463 p[23] = VA_BITS16_UNDEFINED;
3464 p[24] = VA_BITS16_UNDEFINED;
3465 p[25] = VA_BITS16_UNDEFINED;
3466 p[26] = VA_BITS16_UNDEFINED;
3467 p[27] = VA_BITS16_UNDEFINED;
3468 p[28] = VA_BITS16_UNDEFINED;
3469 p[29] = VA_BITS16_UNDEFINED;
3470 p[30] = VA_BITS16_UNDEFINED;
3471 p[31] = VA_BITS16_UNDEFINED;
3472 p[32] = VA_BITS16_UNDEFINED;
3473 p[33] = VA_BITS16_UNDEFINED;
3474 p[34] = VA_BITS16_UNDEFINED;
3475 p[35] = VA_BITS16_UNDEFINED;
3476 if (UNLIKELY( MC_(clo_mc_level) == 3 )) {
3477 set_aligned_word64_Origin_to_undef( base + 8 * 0, otag );
3478 set_aligned_word64_Origin_to_undef( base + 8 * 1, otag );
3479 set_aligned_word64_Origin_to_undef( base + 8 * 2, otag );
3480 set_aligned_word64_Origin_to_undef( base + 8 * 3, otag );
3481 set_aligned_word64_Origin_to_undef( base + 8 * 4, otag );
3482 set_aligned_word64_Origin_to_undef( base + 8 * 5, otag );
3483 set_aligned_word64_Origin_to_undef( base + 8 * 6, otag );
3484 set_aligned_word64_Origin_to_undef( base + 8 * 7, otag );
3485 set_aligned_word64_Origin_to_undef( base + 8 * 8, otag );
3486 set_aligned_word64_Origin_to_undef( base + 8 * 9, otag );
3487 set_aligned_word64_Origin_to_undef( base + 8 * 10, otag );
3488 set_aligned_word64_Origin_to_undef( base + 8 * 11, otag );
3489 set_aligned_word64_Origin_to_undef( base + 8 * 12, otag );
3490 set_aligned_word64_Origin_to_undef( base + 8 * 13, otag );
3491 set_aligned_word64_Origin_to_undef( base + 8 * 14, otag );
3492 set_aligned_word64_Origin_to_undef( base + 8 * 15, otag );
3493 set_aligned_word64_Origin_to_undef( base + 8 * 16, otag );
3494 set_aligned_word64_Origin_to_undef( base + 8 * 17, otag );
3495 set_aligned_word64_Origin_to_undef( base + 8 * 18, otag );
3496 set_aligned_word64_Origin_to_undef( base + 8 * 19, otag );
3497 set_aligned_word64_Origin_to_undef( base + 8 * 20, otag );
3498 set_aligned_word64_Origin_to_undef( base + 8 * 21, otag );
3499 set_aligned_word64_Origin_to_undef( base + 8 * 22, otag );
3500 set_aligned_word64_Origin_to_undef( base + 8 * 23, otag );
3501 set_aligned_word64_Origin_to_undef( base + 8 * 24, otag );
3502 set_aligned_word64_Origin_to_undef( base + 8 * 25, otag );
3503 set_aligned_word64_Origin_to_undef( base + 8 * 26, otag );
3504 set_aligned_word64_Origin_to_undef( base + 8 * 27, otag );
3505 set_aligned_word64_Origin_to_undef( base + 8 * 28, otag );
3506 set_aligned_word64_Origin_to_undef( base + 8 * 29, otag );
3507 set_aligned_word64_Origin_to_undef( base + 8 * 30, otag );
3508 set_aligned_word64_Origin_to_undef( base + 8 * 31, otag );
3509 set_aligned_word64_Origin_to_undef( base + 8 * 32, otag );
3510 set_aligned_word64_Origin_to_undef( base + 8 * 33, otag );
3511 set_aligned_word64_Origin_to_undef( base + 8 * 34, otag );
3512 set_aligned_word64_Origin_to_undef( base + 8 * 35, otag );
3513 }
3514 return;
3515 }
3516 }
3517 }
3518
3519 /* else fall into slow case */
3520 MC_(make_mem_undefined_w_otag)(base, len, otag);
3521 }
3522
3523
3524 /*------------------------------------------------------------*/
3525 /*--- Checking memory ---*/
3526 /*------------------------------------------------------------*/
3527
3528 typedef
3529 enum {
3530 MC_Ok = 5,
3531 MC_AddrErr = 6,
3532 MC_ValueErr = 7
3533 }
3534 MC_ReadResult;
3535
3536
3537 /* Check permissions for address range. If inadequate permissions
3538 exist, *bad_addr is set to the offending address, so the caller can
3539 know what it is. */
3540
3541 /* Returns True if [a .. a+len) is not addressible. Otherwise,
3542 returns False, and if bad_addr is non-NULL, sets *bad_addr to
3543 indicate the lowest failing address. Functions below are
3544 similar. */
MC_(check_mem_is_noaccess)3545 Bool MC_(check_mem_is_noaccess) ( Addr a, SizeT len, Addr* bad_addr )
3546 {
3547 SizeT i;
3548 UWord vabits2;
3549
3550 PROF_EVENT(60, "check_mem_is_noaccess");
3551 for (i = 0; i < len; i++) {
3552 PROF_EVENT(61, "check_mem_is_noaccess(loop)");
3553 vabits2 = get_vabits2(a);
3554 if (VA_BITS2_NOACCESS != vabits2) {
3555 if (bad_addr != NULL) *bad_addr = a;
3556 return False;
3557 }
3558 a++;
3559 }
3560 return True;
3561 }
3562
is_mem_addressable(Addr a,SizeT len,Addr * bad_addr)3563 static Bool is_mem_addressable ( Addr a, SizeT len,
3564 /*OUT*/Addr* bad_addr )
3565 {
3566 SizeT i;
3567 UWord vabits2;
3568
3569 PROF_EVENT(62, "is_mem_addressable");
3570 for (i = 0; i < len; i++) {
3571 PROF_EVENT(63, "is_mem_addressable(loop)");
3572 vabits2 = get_vabits2(a);
3573 if (VA_BITS2_NOACCESS == vabits2) {
3574 if (bad_addr != NULL) *bad_addr = a;
3575 return False;
3576 }
3577 a++;
3578 }
3579 return True;
3580 }
3581
is_mem_defined(Addr a,SizeT len,Addr * bad_addr,UInt * otag)3582 static MC_ReadResult is_mem_defined ( Addr a, SizeT len,
3583 /*OUT*/Addr* bad_addr,
3584 /*OUT*/UInt* otag )
3585 {
3586 SizeT i;
3587 UWord vabits2;
3588
3589 PROF_EVENT(64, "is_mem_defined");
3590 DEBUG("is_mem_defined\n");
3591
3592 if (otag) *otag = 0;
3593 if (bad_addr) *bad_addr = 0;
3594 for (i = 0; i < len; i++) {
3595 PROF_EVENT(65, "is_mem_defined(loop)");
3596 vabits2 = get_vabits2(a);
3597 if (VA_BITS2_DEFINED != vabits2) {
3598 // Error! Nb: Report addressability errors in preference to
3599 // definedness errors. And don't report definedeness errors unless
3600 // --undef-value-errors=yes.
3601 if (bad_addr) {
3602 *bad_addr = a;
3603 }
3604 if (VA_BITS2_NOACCESS == vabits2) {
3605 return MC_AddrErr;
3606 }
3607 if (MC_(clo_mc_level) >= 2) {
3608 if (otag && MC_(clo_mc_level) == 3) {
3609 *otag = MC_(helperc_b_load1)( a );
3610 }
3611 return MC_ValueErr;
3612 }
3613 }
3614 a++;
3615 }
3616 return MC_Ok;
3617 }
3618
3619
3620 /* Like is_mem_defined but doesn't give up at the first uninitialised
3621 byte -- the entire range is always checked. This is important for
3622 detecting errors in the case where a checked range strays into
3623 invalid memory, but that fact is not detected by the ordinary
3624 is_mem_defined(), because of an undefined section that precedes the
3625 out of range section, possibly as a result of an alignment hole in
3626 the checked data. This version always checks the entire range and
3627 can report both a definedness and an accessbility error, if
3628 necessary. */
is_mem_defined_comprehensive(Addr a,SizeT len,Bool * errorV,Addr * bad_addrV,UInt * otagV,Bool * errorA,Addr * bad_addrA)3629 static void is_mem_defined_comprehensive (
3630 Addr a, SizeT len,
3631 /*OUT*/Bool* errorV, /* is there a definedness err? */
3632 /*OUT*/Addr* bad_addrV, /* if so where? */
3633 /*OUT*/UInt* otagV, /* and what's its otag? */
3634 /*OUT*/Bool* errorA, /* is there an addressability err? */
3635 /*OUT*/Addr* bad_addrA /* if so where? */
3636 )
3637 {
3638 SizeT i;
3639 UWord vabits2;
3640 Bool already_saw_errV = False;
3641
3642 PROF_EVENT(64, "is_mem_defined"); // fixme
3643 DEBUG("is_mem_defined_comprehensive\n");
3644
3645 tl_assert(!(*errorV || *errorA));
3646
3647 for (i = 0; i < len; i++) {
3648 PROF_EVENT(65, "is_mem_defined(loop)"); // fixme
3649 vabits2 = get_vabits2(a);
3650 switch (vabits2) {
3651 case VA_BITS2_DEFINED:
3652 a++;
3653 break;
3654 case VA_BITS2_UNDEFINED:
3655 case VA_BITS2_PARTDEFINED:
3656 if (!already_saw_errV) {
3657 *errorV = True;
3658 *bad_addrV = a;
3659 if (MC_(clo_mc_level) == 3) {
3660 *otagV = MC_(helperc_b_load1)( a );
3661 } else {
3662 *otagV = 0;
3663 }
3664 already_saw_errV = True;
3665 }
3666 a++; /* keep going */
3667 break;
3668 case VA_BITS2_NOACCESS:
3669 *errorA = True;
3670 *bad_addrA = a;
3671 return; /* give up now. */
3672 default:
3673 tl_assert(0);
3674 }
3675 }
3676 }
3677
3678
3679 /* Check a zero-terminated ascii string. Tricky -- don't want to
3680 examine the actual bytes, to find the end, until we're sure it is
3681 safe to do so. */
3682
mc_is_defined_asciiz(Addr a,Addr * bad_addr,UInt * otag)3683 static Bool mc_is_defined_asciiz ( Addr a, Addr* bad_addr, UInt* otag )
3684 {
3685 UWord vabits2;
3686
3687 PROF_EVENT(66, "mc_is_defined_asciiz");
3688 DEBUG("mc_is_defined_asciiz\n");
3689
3690 if (otag) *otag = 0;
3691 if (bad_addr) *bad_addr = 0;
3692 while (True) {
3693 PROF_EVENT(67, "mc_is_defined_asciiz(loop)");
3694 vabits2 = get_vabits2(a);
3695 if (VA_BITS2_DEFINED != vabits2) {
3696 // Error! Nb: Report addressability errors in preference to
3697 // definedness errors. And don't report definedeness errors unless
3698 // --undef-value-errors=yes.
3699 if (bad_addr) {
3700 *bad_addr = a;
3701 }
3702 if (VA_BITS2_NOACCESS == vabits2) {
3703 return MC_AddrErr;
3704 }
3705 if (MC_(clo_mc_level) >= 2) {
3706 if (otag && MC_(clo_mc_level) == 3) {
3707 *otag = MC_(helperc_b_load1)( a );
3708 }
3709 return MC_ValueErr;
3710 }
3711 }
3712 /* Ok, a is safe to read. */
3713 if (* ((UChar*)a) == 0) {
3714 return MC_Ok;
3715 }
3716 a++;
3717 }
3718 }
3719
3720
3721 /*------------------------------------------------------------*/
3722 /*--- Memory event handlers ---*/
3723 /*------------------------------------------------------------*/
3724
3725 static
check_mem_is_addressable(CorePart part,ThreadId tid,Char * s,Addr base,SizeT size)3726 void check_mem_is_addressable ( CorePart part, ThreadId tid, Char* s,
3727 Addr base, SizeT size )
3728 {
3729 Addr bad_addr;
3730 Bool ok = is_mem_addressable ( base, size, &bad_addr );
3731
3732 if (!ok) {
3733 switch (part) {
3734 case Vg_CoreSysCall:
3735 MC_(record_memparam_error) ( tid, bad_addr,
3736 /*isAddrErr*/True, s, 0/*otag*/ );
3737 break;
3738
3739 case Vg_CoreSignal:
3740 MC_(record_core_mem_error)( tid, s );
3741 break;
3742
3743 default:
3744 VG_(tool_panic)("check_mem_is_addressable: unexpected CorePart");
3745 }
3746 }
3747 }
3748
3749 static
check_mem_is_defined(CorePart part,ThreadId tid,Char * s,Addr base,SizeT size)3750 void check_mem_is_defined ( CorePart part, ThreadId tid, Char* s,
3751 Addr base, SizeT size )
3752 {
3753 UInt otag = 0;
3754 Addr bad_addr;
3755 MC_ReadResult res = is_mem_defined ( base, size, &bad_addr, &otag );
3756
3757 if (MC_Ok != res) {
3758 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3759
3760 switch (part) {
3761 case Vg_CoreSysCall:
3762 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3763 isAddrErr ? 0 : otag );
3764 break;
3765
3766 case Vg_CoreSysCallArgInMem:
3767 MC_(record_regparam_error) ( tid, s, otag );
3768 break;
3769
3770 /* If we're being asked to jump to a silly address, record an error
3771 message before potentially crashing the entire system. */
3772 case Vg_CoreTranslate:
3773 MC_(record_jump_error)( tid, bad_addr );
3774 break;
3775
3776 default:
3777 VG_(tool_panic)("check_mem_is_defined: unexpected CorePart");
3778 }
3779 }
3780 }
3781
3782 static
check_mem_is_defined_asciiz(CorePart part,ThreadId tid,Char * s,Addr str)3783 void check_mem_is_defined_asciiz ( CorePart part, ThreadId tid,
3784 Char* s, Addr str )
3785 {
3786 MC_ReadResult res;
3787 Addr bad_addr = 0; // shut GCC up
3788 UInt otag = 0;
3789
3790 tl_assert(part == Vg_CoreSysCall);
3791 res = mc_is_defined_asciiz ( (Addr)str, &bad_addr, &otag );
3792 if (MC_Ok != res) {
3793 Bool isAddrErr = ( MC_AddrErr == res ? True : False );
3794 MC_(record_memparam_error) ( tid, bad_addr, isAddrErr, s,
3795 isAddrErr ? 0 : otag );
3796 }
3797 }
3798
3799 /* Handling of mmap and mprotect is not as simple as it seems.
3800
3801 The underlying semantics are that memory obtained from mmap is
3802 always initialised, but may be inaccessible. And changes to the
3803 protection of memory do not change its contents and hence not its
3804 definedness state. Problem is we can't model
3805 inaccessible-but-with-some-definedness state; once we mark memory
3806 as inaccessible we lose all info about definedness, and so can't
3807 restore that if it is later made accessible again.
3808
3809 One obvious thing to do is this:
3810
3811 mmap/mprotect NONE -> noaccess
3812 mmap/mprotect other -> defined
3813
3814 The problem case here is: taking accessible memory, writing
3815 uninitialised data to it, mprotecting it NONE and later mprotecting
3816 it back to some accessible state causes the undefinedness to be
3817 lost.
3818
3819 A better proposal is:
3820
3821 (1) mmap NONE -> make noaccess
3822 (2) mmap other -> make defined
3823
3824 (3) mprotect NONE -> # no change
3825 (4) mprotect other -> change any "noaccess" to "defined"
3826
3827 (2) is OK because memory newly obtained from mmap really is defined
3828 (zeroed out by the kernel -- doing anything else would
3829 constitute a massive security hole.)
3830
3831 (1) is OK because the only way to make the memory usable is via
3832 (4), in which case we also wind up correctly marking it all as
3833 defined.
3834
3835 (3) is the weak case. We choose not to change memory state.
3836 (presumably the range is in some mixture of "defined" and
3837 "undefined", viz, accessible but with arbitrary V bits). Doing
3838 nothing means we retain the V bits, so that if the memory is
3839 later mprotected "other", the V bits remain unchanged, so there
3840 can be no false negatives. The bad effect is that if there's
3841 an access in the area, then MC cannot warn; but at least we'll
3842 get a SEGV to show, so it's better than nothing.
3843
3844 Consider the sequence (3) followed by (4). Any memory that was
3845 "defined" or "undefined" previously retains its state (as
3846 required). Any memory that was "noaccess" before can only have
3847 been made that way by (1), and so it's OK to change it to
3848 "defined".
3849
3850 See https://bugs.kde.org/show_bug.cgi?id=205541
3851 and https://bugs.kde.org/show_bug.cgi?id=210268
3852 */
3853 static
mc_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)3854 void mc_new_mem_mmap ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx,
3855 ULong di_handle )
3856 {
3857 if (rr || ww || xx) {
3858 /* (2) mmap/mprotect other -> defined */
3859 MC_(make_mem_defined)(a, len);
3860 } else {
3861 /* (1) mmap/mprotect NONE -> noaccess */
3862 MC_(make_mem_noaccess)(a, len);
3863 }
3864 }
3865
3866 static
mc_new_mem_mprotect(Addr a,SizeT len,Bool rr,Bool ww,Bool xx)3867 void mc_new_mem_mprotect ( Addr a, SizeT len, Bool rr, Bool ww, Bool xx )
3868 {
3869 if (rr || ww || xx) {
3870 /* (4) mprotect other -> change any "noaccess" to "defined" */
3871 make_mem_defined_if_noaccess(a, len);
3872 } else {
3873 /* (3) mprotect NONE -> # no change */
3874 /* do nothing */
3875 }
3876 }
3877
3878
3879 static
mc_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)3880 void mc_new_mem_startup( Addr a, SizeT len,
3881 Bool rr, Bool ww, Bool xx, ULong di_handle )
3882 {
3883 // Because code is defined, initialised variables get put in the data
3884 // segment and are defined, and uninitialised variables get put in the
3885 // bss segment and are auto-zeroed (and so defined).
3886 //
3887 // It's possible that there will be padding between global variables.
3888 // This will also be auto-zeroed, and marked as defined by Memcheck. If
3889 // a program uses it, Memcheck will not complain. This is arguably a
3890 // false negative, but it's a grey area -- the behaviour is defined (the
3891 // padding is zeroed) but it's probably not what the user intended. And
3892 // we can't avoid it.
3893 //
3894 // Note: we generally ignore RWX permissions, because we can't track them
3895 // without requiring more than one A bit which would slow things down a
3896 // lot. But on Darwin the 0th page is mapped but !R and !W and !X.
3897 // So we mark any such pages as "unaddressable".
3898 DEBUG("mc_new_mem_startup(%#lx, %llu, rr=%u, ww=%u, xx=%u)\n",
3899 a, (ULong)len, rr, ww, xx);
3900 mc_new_mem_mmap(a, len, rr, ww, xx, di_handle);
3901 }
3902
3903 static
mc_post_mem_write(CorePart part,ThreadId tid,Addr a,SizeT len)3904 void mc_post_mem_write(CorePart part, ThreadId tid, Addr a, SizeT len)
3905 {
3906 MC_(make_mem_defined)(a, len);
3907 }
3908
3909
3910 /*------------------------------------------------------------*/
3911 /*--- Register event handlers ---*/
3912 /*------------------------------------------------------------*/
3913
3914 /* Try and get a nonzero origin for the guest state section of thread
3915 tid characterised by (offset,size). Return 0 if nothing to show
3916 for it. */
mb_get_origin_for_guest_offset(ThreadId tid,Int offset,SizeT size)3917 static UInt mb_get_origin_for_guest_offset ( ThreadId tid,
3918 Int offset, SizeT size )
3919 {
3920 Int sh2off;
3921 UInt area[3];
3922 UInt otag;
3923 sh2off = MC_(get_otrack_shadow_offset)( offset, size );
3924 if (sh2off == -1)
3925 return 0; /* This piece of guest state is not tracked */
3926 tl_assert(sh2off >= 0);
3927 tl_assert(0 == (sh2off % 4));
3928 area[0] = 0x31313131;
3929 area[2] = 0x27272727;
3930 VG_(get_shadow_regs_area)( tid, (UChar *)&area[1], 2/*shadowno*/,sh2off,4 );
3931 tl_assert(area[0] == 0x31313131);
3932 tl_assert(area[2] == 0x27272727);
3933 otag = area[1];
3934 return otag;
3935 }
3936
3937
3938 /* When some chunk of guest state is written, mark the corresponding
3939 shadow area as valid. This is used to initialise arbitrarily large
3940 chunks of guest state, hence the _SIZE value, which has to be as
3941 big as the biggest guest state.
3942 */
mc_post_reg_write(CorePart part,ThreadId tid,PtrdiffT offset,SizeT size)3943 static void mc_post_reg_write ( CorePart part, ThreadId tid,
3944 PtrdiffT offset, SizeT size)
3945 {
3946 # define MAX_REG_WRITE_SIZE 1696
3947 UChar area[MAX_REG_WRITE_SIZE];
3948 tl_assert(size <= MAX_REG_WRITE_SIZE);
3949 VG_(memset)(area, V_BITS8_DEFINED, size);
3950 VG_(set_shadow_regs_area)( tid, 1/*shadowNo*/,offset,size, area );
3951 # undef MAX_REG_WRITE_SIZE
3952 }
3953
3954 static
mc_post_reg_write_clientcall(ThreadId tid,PtrdiffT offset,SizeT size,Addr f)3955 void mc_post_reg_write_clientcall ( ThreadId tid,
3956 PtrdiffT offset, SizeT size, Addr f)
3957 {
3958 mc_post_reg_write(/*dummy*/0, tid, offset, size);
3959 }
3960
3961 /* Look at the definedness of the guest's shadow state for
3962 [offset, offset+len). If any part of that is undefined, record
3963 a parameter error.
3964 */
mc_pre_reg_read(CorePart part,ThreadId tid,Char * s,PtrdiffT offset,SizeT size)3965 static void mc_pre_reg_read ( CorePart part, ThreadId tid, Char* s,
3966 PtrdiffT offset, SizeT size)
3967 {
3968 Int i;
3969 Bool bad;
3970 UInt otag;
3971
3972 UChar area[16];
3973 tl_assert(size <= 16);
3974
3975 VG_(get_shadow_regs_area)( tid, area, 1/*shadowNo*/,offset,size );
3976
3977 bad = False;
3978 for (i = 0; i < size; i++) {
3979 if (area[i] != V_BITS8_DEFINED) {
3980 bad = True;
3981 break;
3982 }
3983 }
3984
3985 if (!bad)
3986 return;
3987
3988 /* We've found some undefinedness. See if we can also find an
3989 origin for it. */
3990 otag = mb_get_origin_for_guest_offset( tid, offset, size );
3991 MC_(record_regparam_error) ( tid, s, otag );
3992 }
3993
3994
3995 /*------------------------------------------------------------*/
3996 /*--- Functions called directly from generated code: ---*/
3997 /*--- Load/store handlers. ---*/
3998 /*------------------------------------------------------------*/
3999
4000 /* Types: LOADV32, LOADV16, LOADV8 are:
4001 UWord fn ( Addr a )
4002 so they return 32-bits on 32-bit machines and 64-bits on
4003 64-bit machines. Addr has the same size as a host word.
4004
4005 LOADV64 is always ULong fn ( Addr a )
4006
4007 Similarly for STOREV8, STOREV16, STOREV32, the supplied vbits
4008 are a UWord, and for STOREV64 they are a ULong.
4009 */
4010
4011 /* If any part of '_a' indicated by the mask is 1, either '_a' is not
4012 naturally '_sz/8'-aligned, or it exceeds the range covered by the
4013 primary map. This is all very tricky (and important!), so let's
4014 work through the maths by hand (below), *and* assert for these
4015 values at startup. */
4016 #define MASK(_szInBytes) \
4017 ( ~((0x10000UL-(_szInBytes)) | ((N_PRIMARY_MAP-1) << 16)) )
4018
4019 /* MASK only exists so as to define this macro. */
4020 #define UNALIGNED_OR_HIGH(_a,_szInBits) \
4021 ((_a) & MASK((_szInBits>>3)))
4022
4023 /* On a 32-bit machine:
4024
4025 N_PRIMARY_BITS == 16, so
4026 N_PRIMARY_MAP == 0x10000, so
4027 N_PRIMARY_MAP-1 == 0xFFFF, so
4028 (N_PRIMARY_MAP-1) << 16 == 0xFFFF0000, and so
4029
4030 MASK(1) = ~ ( (0x10000 - 1) | 0xFFFF0000 )
4031 = ~ ( 0xFFFF | 0xFFFF0000 )
4032 = ~ 0xFFFF'FFFF
4033 = 0
4034
4035 MASK(2) = ~ ( (0x10000 - 2) | 0xFFFF0000 )
4036 = ~ ( 0xFFFE | 0xFFFF0000 )
4037 = ~ 0xFFFF'FFFE
4038 = 1
4039
4040 MASK(4) = ~ ( (0x10000 - 4) | 0xFFFF0000 )
4041 = ~ ( 0xFFFC | 0xFFFF0000 )
4042 = ~ 0xFFFF'FFFC
4043 = 3
4044
4045 MASK(8) = ~ ( (0x10000 - 8) | 0xFFFF0000 )
4046 = ~ ( 0xFFF8 | 0xFFFF0000 )
4047 = ~ 0xFFFF'FFF8
4048 = 7
4049
4050 Hence in the 32-bit case, "a & MASK(1/2/4/8)" is a nonzero value
4051 precisely when a is not 1/2/4/8-bytes aligned. And obviously, for
4052 the 1-byte alignment case, it is always a zero value, since MASK(1)
4053 is zero. All as expected.
4054
4055 On a 64-bit machine, it's more complex, since we're testing
4056 simultaneously for misalignment and for the address being at or
4057 above 32G:
4058
4059 N_PRIMARY_BITS == 19, so
4060 N_PRIMARY_MAP == 0x80000, so
4061 N_PRIMARY_MAP-1 == 0x7FFFF, so
4062 (N_PRIMARY_MAP-1) << 16 == 0x7FFFF'0000, and so
4063
4064 MASK(1) = ~ ( (0x10000 - 1) | 0x7FFFF'0000 )
4065 = ~ ( 0xFFFF | 0x7FFFF'0000 )
4066 = ~ 0x7FFFF'FFFF
4067 = 0xFFFF'FFF8'0000'0000
4068
4069 MASK(2) = ~ ( (0x10000 - 2) | 0x7FFFF'0000 )
4070 = ~ ( 0xFFFE | 0x7FFFF'0000 )
4071 = ~ 0x7FFFF'FFFE
4072 = 0xFFFF'FFF8'0000'0001
4073
4074 MASK(4) = ~ ( (0x10000 - 4) | 0x7FFFF'0000 )
4075 = ~ ( 0xFFFC | 0x7FFFF'0000 )
4076 = ~ 0x7FFFF'FFFC
4077 = 0xFFFF'FFF8'0000'0003
4078
4079 MASK(8) = ~ ( (0x10000 - 8) | 0x7FFFF'0000 )
4080 = ~ ( 0xFFF8 | 0x7FFFF'0000 )
4081 = ~ 0x7FFFF'FFF8
4082 = 0xFFFF'FFF8'0000'0007
4083 */
4084
4085
4086 /* ------------------------ Size = 8 ------------------------ */
4087
4088 static INLINE
mc_LOADV64(Addr a,Bool isBigEndian)4089 ULong mc_LOADV64 ( Addr a, Bool isBigEndian )
4090 {
4091 PROF_EVENT(200, "mc_LOADV64");
4092
4093 #ifndef PERF_FAST_LOADV
4094 return mc_LOADVn_slow( a, 64, isBigEndian );
4095 #else
4096 {
4097 UWord sm_off16, vabits16;
4098 SecMap* sm;
4099
4100 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4101 PROF_EVENT(201, "mc_LOADV64-slow1");
4102 return (ULong)mc_LOADVn_slow( a, 64, isBigEndian );
4103 }
4104
4105 sm = get_secmap_for_reading_low(a);
4106 sm_off16 = SM_OFF_16(a);
4107 vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4108
4109 // Handle common case quickly: a is suitably aligned, is mapped, and
4110 // addressible.
4111 // Convert V bits from compact memory form to expanded register form.
4112 if (LIKELY(vabits16 == VA_BITS16_DEFINED)) {
4113 return V_BITS64_DEFINED;
4114 } else if (LIKELY(vabits16 == VA_BITS16_UNDEFINED)) {
4115 return V_BITS64_UNDEFINED;
4116 } else {
4117 /* Slow case: the 8 bytes are not all-defined or all-undefined. */
4118 PROF_EVENT(202, "mc_LOADV64-slow2");
4119 return mc_LOADVn_slow( a, 64, isBigEndian );
4120 }
4121 }
4122 #endif
4123 }
4124
MC_(helperc_LOADV64be)4125 VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
4126 {
4127 return mc_LOADV64(a, True);
4128 }
MC_(helperc_LOADV64le)4129 VG_REGPARM(1) ULong MC_(helperc_LOADV64le) ( Addr a )
4130 {
4131 return mc_LOADV64(a, False);
4132 }
4133
4134
4135 static INLINE
mc_STOREV64(Addr a,ULong vbits64,Bool isBigEndian)4136 void mc_STOREV64 ( Addr a, ULong vbits64, Bool isBigEndian )
4137 {
4138 PROF_EVENT(210, "mc_STOREV64");
4139
4140 #ifndef PERF_FAST_STOREV
4141 // XXX: this slow case seems to be marginally faster than the fast case!
4142 // Investigate further.
4143 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4144 #else
4145 {
4146 UWord sm_off16, vabits16;
4147 SecMap* sm;
4148
4149 if (UNLIKELY( UNALIGNED_OR_HIGH(a,64) )) {
4150 PROF_EVENT(211, "mc_STOREV64-slow1");
4151 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4152 return;
4153 }
4154
4155 sm = get_secmap_for_reading_low(a);
4156 sm_off16 = SM_OFF_16(a);
4157 vabits16 = ((UShort*)(sm->vabits8))[sm_off16];
4158
4159 if (LIKELY( !is_distinguished_sm(sm) &&
4160 (VA_BITS16_DEFINED == vabits16 ||
4161 VA_BITS16_UNDEFINED == vabits16) ))
4162 {
4163 /* Handle common case quickly: a is suitably aligned, */
4164 /* is mapped, and is addressible. */
4165 // Convert full V-bits in register to compact 2-bit form.
4166 if (V_BITS64_DEFINED == vbits64) {
4167 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_DEFINED;
4168 } else if (V_BITS64_UNDEFINED == vbits64) {
4169 ((UShort*)(sm->vabits8))[sm_off16] = (UShort)VA_BITS16_UNDEFINED;
4170 } else {
4171 /* Slow but general case -- writing partially defined bytes. */
4172 PROF_EVENT(212, "mc_STOREV64-slow2");
4173 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4174 }
4175 } else {
4176 /* Slow but general case. */
4177 PROF_EVENT(213, "mc_STOREV64-slow3");
4178 mc_STOREVn_slow( a, 64, vbits64, isBigEndian );
4179 }
4180 }
4181 #endif
4182 }
4183
MC_(helperc_STOREV64be)4184 VG_REGPARM(1) void MC_(helperc_STOREV64be) ( Addr a, ULong vbits64 )
4185 {
4186 mc_STOREV64(a, vbits64, True);
4187 }
MC_(helperc_STOREV64le)4188 VG_REGPARM(1) void MC_(helperc_STOREV64le) ( Addr a, ULong vbits64 )
4189 {
4190 mc_STOREV64(a, vbits64, False);
4191 }
4192
4193
4194 /* ------------------------ Size = 4 ------------------------ */
4195
4196 static INLINE
mc_LOADV32(Addr a,Bool isBigEndian)4197 UWord mc_LOADV32 ( Addr a, Bool isBigEndian )
4198 {
4199 PROF_EVENT(220, "mc_LOADV32");
4200
4201 #ifndef PERF_FAST_LOADV
4202 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4203 #else
4204 {
4205 UWord sm_off, vabits8;
4206 SecMap* sm;
4207
4208 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4209 PROF_EVENT(221, "mc_LOADV32-slow1");
4210 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4211 }
4212
4213 sm = get_secmap_for_reading_low(a);
4214 sm_off = SM_OFF(a);
4215 vabits8 = sm->vabits8[sm_off];
4216
4217 // Handle common case quickly: a is suitably aligned, is mapped, and the
4218 // entire word32 it lives in is addressible.
4219 // Convert V bits from compact memory form to expanded register form.
4220 // For 64-bit platforms, set the high 32 bits of retval to 1 (undefined).
4221 // Almost certainly not necessary, but be paranoid.
4222 if (LIKELY(vabits8 == VA_BITS8_DEFINED)) {
4223 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_DEFINED);
4224 } else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) {
4225 return ((UWord)0xFFFFFFFF00000000ULL | (UWord)V_BITS32_UNDEFINED);
4226 } else {
4227 /* Slow case: the 4 bytes are not all-defined or all-undefined. */
4228 PROF_EVENT(222, "mc_LOADV32-slow2");
4229 return (UWord)mc_LOADVn_slow( a, 32, isBigEndian );
4230 }
4231 }
4232 #endif
4233 }
4234
MC_(helperc_LOADV32be)4235 VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
4236 {
4237 return mc_LOADV32(a, True);
4238 }
MC_(helperc_LOADV32le)4239 VG_REGPARM(1) UWord MC_(helperc_LOADV32le) ( Addr a )
4240 {
4241 return mc_LOADV32(a, False);
4242 }
4243
4244
4245 static INLINE
mc_STOREV32(Addr a,UWord vbits32,Bool isBigEndian)4246 void mc_STOREV32 ( Addr a, UWord vbits32, Bool isBigEndian )
4247 {
4248 PROF_EVENT(230, "mc_STOREV32");
4249
4250 #ifndef PERF_FAST_STOREV
4251 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4252 #else
4253 {
4254 UWord sm_off, vabits8;
4255 SecMap* sm;
4256
4257 if (UNLIKELY( UNALIGNED_OR_HIGH(a,32) )) {
4258 PROF_EVENT(231, "mc_STOREV32-slow1");
4259 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4260 return;
4261 }
4262
4263 sm = get_secmap_for_reading_low(a);
4264 sm_off = SM_OFF(a);
4265 vabits8 = sm->vabits8[sm_off];
4266
4267 // Cleverness: sometimes we don't have to write the shadow memory at
4268 // all, if we can tell that what we want to write is the same as what is
4269 // already there. The 64/16/8 bit cases also have cleverness at this
4270 // point, but it works a little differently to the code below.
4271 if (V_BITS32_DEFINED == vbits32) {
4272 if (vabits8 == (UInt)VA_BITS8_DEFINED) {
4273 return;
4274 } else if (!is_distinguished_sm(sm) && VA_BITS8_UNDEFINED == vabits8) {
4275 sm->vabits8[sm_off] = (UInt)VA_BITS8_DEFINED;
4276 } else {
4277 // not defined/undefined, or distinguished and changing state
4278 PROF_EVENT(232, "mc_STOREV32-slow2");
4279 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4280 }
4281 } else if (V_BITS32_UNDEFINED == vbits32) {
4282 if (vabits8 == (UInt)VA_BITS8_UNDEFINED) {
4283 return;
4284 } else if (!is_distinguished_sm(sm) && VA_BITS8_DEFINED == vabits8) {
4285 sm->vabits8[sm_off] = (UInt)VA_BITS8_UNDEFINED;
4286 } else {
4287 // not defined/undefined, or distinguished and changing state
4288 PROF_EVENT(233, "mc_STOREV32-slow3");
4289 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4290 }
4291 } else {
4292 // Partially defined word
4293 PROF_EVENT(234, "mc_STOREV32-slow4");
4294 mc_STOREVn_slow( a, 32, (ULong)vbits32, isBigEndian );
4295 }
4296 }
4297 #endif
4298 }
4299
MC_(helperc_STOREV32be)4300 VG_REGPARM(2) void MC_(helperc_STOREV32be) ( Addr a, UWord vbits32 )
4301 {
4302 mc_STOREV32(a, vbits32, True);
4303 }
MC_(helperc_STOREV32le)4304 VG_REGPARM(2) void MC_(helperc_STOREV32le) ( Addr a, UWord vbits32 )
4305 {
4306 mc_STOREV32(a, vbits32, False);
4307 }
4308
4309
4310 /* ------------------------ Size = 2 ------------------------ */
4311
4312 static INLINE
mc_LOADV16(Addr a,Bool isBigEndian)4313 UWord mc_LOADV16 ( Addr a, Bool isBigEndian )
4314 {
4315 PROF_EVENT(240, "mc_LOADV16");
4316
4317 #ifndef PERF_FAST_LOADV
4318 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4319 #else
4320 {
4321 UWord sm_off, vabits8;
4322 SecMap* sm;
4323
4324 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4325 PROF_EVENT(241, "mc_LOADV16-slow1");
4326 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4327 }
4328
4329 sm = get_secmap_for_reading_low(a);
4330 sm_off = SM_OFF(a);
4331 vabits8 = sm->vabits8[sm_off];
4332 // Handle common case quickly: a is suitably aligned, is mapped, and is
4333 // addressible.
4334 // Convert V bits from compact memory form to expanded register form
4335 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS16_DEFINED; }
4336 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS16_UNDEFINED; }
4337 else {
4338 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4339 // the two sub-bytes.
4340 UChar vabits4 = extract_vabits4_from_vabits8(a, vabits8);
4341 if (vabits4 == VA_BITS4_DEFINED ) { return V_BITS16_DEFINED; }
4342 else if (vabits4 == VA_BITS4_UNDEFINED) { return V_BITS16_UNDEFINED; }
4343 else {
4344 /* Slow case: the two bytes are not all-defined or all-undefined. */
4345 PROF_EVENT(242, "mc_LOADV16-slow2");
4346 return (UWord)mc_LOADVn_slow( a, 16, isBigEndian );
4347 }
4348 }
4349 }
4350 #endif
4351 }
4352
MC_(helperc_LOADV16be)4353 VG_REGPARM(1) UWord MC_(helperc_LOADV16be) ( Addr a )
4354 {
4355 return mc_LOADV16(a, True);
4356 }
MC_(helperc_LOADV16le)4357 VG_REGPARM(1) UWord MC_(helperc_LOADV16le) ( Addr a )
4358 {
4359 return mc_LOADV16(a, False);
4360 }
4361
4362
4363 static INLINE
mc_STOREV16(Addr a,UWord vbits16,Bool isBigEndian)4364 void mc_STOREV16 ( Addr a, UWord vbits16, Bool isBigEndian )
4365 {
4366 PROF_EVENT(250, "mc_STOREV16");
4367
4368 #ifndef PERF_FAST_STOREV
4369 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4370 #else
4371 {
4372 UWord sm_off, vabits8;
4373 SecMap* sm;
4374
4375 if (UNLIKELY( UNALIGNED_OR_HIGH(a,16) )) {
4376 PROF_EVENT(251, "mc_STOREV16-slow1");
4377 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4378 return;
4379 }
4380
4381 sm = get_secmap_for_reading_low(a);
4382 sm_off = SM_OFF(a);
4383 vabits8 = sm->vabits8[sm_off];
4384 if (LIKELY( !is_distinguished_sm(sm) &&
4385 (VA_BITS8_DEFINED == vabits8 ||
4386 VA_BITS8_UNDEFINED == vabits8) ))
4387 {
4388 /* Handle common case quickly: a is suitably aligned, */
4389 /* is mapped, and is addressible. */
4390 // Convert full V-bits in register to compact 2-bit form.
4391 if (V_BITS16_DEFINED == vbits16) {
4392 insert_vabits4_into_vabits8( a, VA_BITS4_DEFINED ,
4393 &(sm->vabits8[sm_off]) );
4394 } else if (V_BITS16_UNDEFINED == vbits16) {
4395 insert_vabits4_into_vabits8( a, VA_BITS4_UNDEFINED,
4396 &(sm->vabits8[sm_off]) );
4397 } else {
4398 /* Slow but general case -- writing partially defined bytes. */
4399 PROF_EVENT(252, "mc_STOREV16-slow2");
4400 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4401 }
4402 } else {
4403 /* Slow but general case. */
4404 PROF_EVENT(253, "mc_STOREV16-slow3");
4405 mc_STOREVn_slow( a, 16, (ULong)vbits16, isBigEndian );
4406 }
4407 }
4408 #endif
4409 }
4410
MC_(helperc_STOREV16be)4411 VG_REGPARM(2) void MC_(helperc_STOREV16be) ( Addr a, UWord vbits16 )
4412 {
4413 mc_STOREV16(a, vbits16, True);
4414 }
MC_(helperc_STOREV16le)4415 VG_REGPARM(2) void MC_(helperc_STOREV16le) ( Addr a, UWord vbits16 )
4416 {
4417 mc_STOREV16(a, vbits16, False);
4418 }
4419
4420
4421 /* ------------------------ Size = 1 ------------------------ */
4422 /* Note: endianness is irrelevant for size == 1 */
4423
4424 VG_REGPARM(1)
MC_(helperc_LOADV8)4425 UWord MC_(helperc_LOADV8) ( Addr a )
4426 {
4427 PROF_EVENT(260, "mc_LOADV8");
4428
4429 #ifndef PERF_FAST_LOADV
4430 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4431 #else
4432 {
4433 UWord sm_off, vabits8;
4434 SecMap* sm;
4435
4436 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4437 PROF_EVENT(261, "mc_LOADV8-slow1");
4438 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4439 }
4440
4441 sm = get_secmap_for_reading_low(a);
4442 sm_off = SM_OFF(a);
4443 vabits8 = sm->vabits8[sm_off];
4444 // Convert V bits from compact memory form to expanded register form
4445 // Handle common case quickly: a is mapped, and the entire
4446 // word32 it lives in is addressible.
4447 if (LIKELY(vabits8 == VA_BITS8_DEFINED )) { return V_BITS8_DEFINED; }
4448 else if (LIKELY(vabits8 == VA_BITS8_UNDEFINED)) { return V_BITS8_UNDEFINED; }
4449 else {
4450 // The 4 (yes, 4) bytes are not all-defined or all-undefined, check
4451 // the single byte.
4452 UChar vabits2 = extract_vabits2_from_vabits8(a, vabits8);
4453 if (vabits2 == VA_BITS2_DEFINED ) { return V_BITS8_DEFINED; }
4454 else if (vabits2 == VA_BITS2_UNDEFINED) { return V_BITS8_UNDEFINED; }
4455 else {
4456 /* Slow case: the byte is not all-defined or all-undefined. */
4457 PROF_EVENT(262, "mc_LOADV8-slow2");
4458 return (UWord)mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
4459 }
4460 }
4461 }
4462 #endif
4463 }
4464
4465
4466 VG_REGPARM(2)
MC_(helperc_STOREV8)4467 void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
4468 {
4469 PROF_EVENT(270, "mc_STOREV8");
4470
4471 #ifndef PERF_FAST_STOREV
4472 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4473 #else
4474 {
4475 UWord sm_off, vabits8;
4476 SecMap* sm;
4477
4478 if (UNLIKELY( UNALIGNED_OR_HIGH(a,8) )) {
4479 PROF_EVENT(271, "mc_STOREV8-slow1");
4480 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4481 return;
4482 }
4483
4484 sm = get_secmap_for_reading_low(a);
4485 sm_off = SM_OFF(a);
4486 vabits8 = sm->vabits8[sm_off];
4487 if (LIKELY
4488 ( !is_distinguished_sm(sm) &&
4489 ( (VA_BITS8_DEFINED == vabits8 || VA_BITS8_UNDEFINED == vabits8)
4490 || (VA_BITS2_NOACCESS != extract_vabits2_from_vabits8(a, vabits8))
4491 )
4492 )
4493 )
4494 {
4495 /* Handle common case quickly: a is mapped, the entire word32 it
4496 lives in is addressible. */
4497 // Convert full V-bits in register to compact 2-bit form.
4498 if (V_BITS8_DEFINED == vbits8) {
4499 insert_vabits2_into_vabits8( a, VA_BITS2_DEFINED,
4500 &(sm->vabits8[sm_off]) );
4501 } else if (V_BITS8_UNDEFINED == vbits8) {
4502 insert_vabits2_into_vabits8( a, VA_BITS2_UNDEFINED,
4503 &(sm->vabits8[sm_off]) );
4504 } else {
4505 /* Slow but general case -- writing partially defined bytes. */
4506 PROF_EVENT(272, "mc_STOREV8-slow2");
4507 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4508 }
4509 } else {
4510 /* Slow but general case. */
4511 PROF_EVENT(273, "mc_STOREV8-slow3");
4512 mc_STOREVn_slow( a, 8, (ULong)vbits8, False/*irrelevant*/ );
4513 }
4514 }
4515 #endif
4516 }
4517
4518
4519 /*------------------------------------------------------------*/
4520 /*--- Functions called directly from generated code: ---*/
4521 /*--- Value-check failure handlers. ---*/
4522 /*------------------------------------------------------------*/
4523
4524 /* Call these ones when an origin is available ... */
4525 VG_REGPARM(1)
MC_(helperc_value_check0_fail_w_o)4526 void MC_(helperc_value_check0_fail_w_o) ( UWord origin ) {
4527 MC_(record_cond_error) ( VG_(get_running_tid)(), (UInt)origin );
4528 }
4529
4530 VG_REGPARM(1)
MC_(helperc_value_check1_fail_w_o)4531 void MC_(helperc_value_check1_fail_w_o) ( UWord origin ) {
4532 MC_(record_value_error) ( VG_(get_running_tid)(), 1, (UInt)origin );
4533 }
4534
4535 VG_REGPARM(1)
MC_(helperc_value_check4_fail_w_o)4536 void MC_(helperc_value_check4_fail_w_o) ( UWord origin ) {
4537 MC_(record_value_error) ( VG_(get_running_tid)(), 4, (UInt)origin );
4538 }
4539
4540 VG_REGPARM(1)
MC_(helperc_value_check8_fail_w_o)4541 void MC_(helperc_value_check8_fail_w_o) ( UWord origin ) {
4542 MC_(record_value_error) ( VG_(get_running_tid)(), 8, (UInt)origin );
4543 }
4544
4545 VG_REGPARM(2)
MC_(helperc_value_checkN_fail_w_o)4546 void MC_(helperc_value_checkN_fail_w_o) ( HWord sz, UWord origin ) {
4547 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, (UInt)origin );
4548 }
4549
4550 /* ... and these when an origin isn't available. */
4551
4552 VG_REGPARM(0)
MC_(helperc_value_check0_fail_no_o)4553 void MC_(helperc_value_check0_fail_no_o) ( void ) {
4554 MC_(record_cond_error) ( VG_(get_running_tid)(), 0/*origin*/ );
4555 }
4556
4557 VG_REGPARM(0)
MC_(helperc_value_check1_fail_no_o)4558 void MC_(helperc_value_check1_fail_no_o) ( void ) {
4559 MC_(record_value_error) ( VG_(get_running_tid)(), 1, 0/*origin*/ );
4560 }
4561
4562 VG_REGPARM(0)
MC_(helperc_value_check4_fail_no_o)4563 void MC_(helperc_value_check4_fail_no_o) ( void ) {
4564 MC_(record_value_error) ( VG_(get_running_tid)(), 4, 0/*origin*/ );
4565 }
4566
4567 VG_REGPARM(0)
MC_(helperc_value_check8_fail_no_o)4568 void MC_(helperc_value_check8_fail_no_o) ( void ) {
4569 MC_(record_value_error) ( VG_(get_running_tid)(), 8, 0/*origin*/ );
4570 }
4571
4572 VG_REGPARM(1)
MC_(helperc_value_checkN_fail_no_o)4573 void MC_(helperc_value_checkN_fail_no_o) ( HWord sz ) {
4574 MC_(record_value_error) ( VG_(get_running_tid)(), (Int)sz, 0/*origin*/ );
4575 }
4576
4577
4578 /*------------------------------------------------------------*/
4579 /*--- Metadata get/set functions, for client requests. ---*/
4580 /*------------------------------------------------------------*/
4581
4582 // Nb: this expands the V+A bits out into register-form V bits, even though
4583 // they're in memory. This is for backward compatibility, and because it's
4584 // probably what the user wants.
4585
4586 /* Copy Vbits from/to address 'a'. Returns: 1 == OK, 2 == alignment
4587 error [no longer used], 3 == addressing error. */
4588 /* Nb: We used to issue various definedness/addressability errors from here,
4589 but we took them out because they ranged from not-very-helpful to
4590 downright annoying, and they complicated the error data structures. */
mc_get_or_set_vbits_for_client(Addr a,Addr vbits,SizeT szB,Bool setting,Bool is_client_request)4591 static Int mc_get_or_set_vbits_for_client (
4592 Addr a,
4593 Addr vbits,
4594 SizeT szB,
4595 Bool setting, /* True <=> set vbits, False <=> get vbits */
4596 Bool is_client_request /* True <=> real user request
4597 False <=> internal call from gdbserver */
4598 )
4599 {
4600 SizeT i;
4601 Bool ok;
4602 UChar vbits8;
4603
4604 /* Check that arrays are addressible before doing any getting/setting.
4605 vbits to be checked only for real user request. */
4606 for (i = 0; i < szB; i++) {
4607 if (VA_BITS2_NOACCESS == get_vabits2(a + i) ||
4608 (is_client_request && VA_BITS2_NOACCESS == get_vabits2(vbits + i))) {
4609 return 3;
4610 }
4611 }
4612
4613 /* Do the copy */
4614 if (setting) {
4615 /* setting */
4616 for (i = 0; i < szB; i++) {
4617 ok = set_vbits8(a + i, ((UChar*)vbits)[i]);
4618 tl_assert(ok);
4619 }
4620 } else {
4621 /* getting */
4622 for (i = 0; i < szB; i++) {
4623 ok = get_vbits8(a + i, &vbits8);
4624 tl_assert(ok);
4625 ((UChar*)vbits)[i] = vbits8;
4626 }
4627 if (is_client_request)
4628 // The bytes in vbits[] have now been set, so mark them as such.
4629 MC_(make_mem_defined)(vbits, szB);
4630 }
4631
4632 return 1;
4633 }
4634
4635
4636 /*------------------------------------------------------------*/
4637 /*--- Detecting leaked (unreachable) malloc'd blocks. ---*/
4638 /*------------------------------------------------------------*/
4639
4640 /* For the memory leak detector, say whether an entire 64k chunk of
4641 address space is possibly in use, or not. If in doubt return
4642 True.
4643 */
MC_(is_within_valid_secondary)4644 Bool MC_(is_within_valid_secondary) ( Addr a )
4645 {
4646 SecMap* sm = maybe_get_secmap_for ( a );
4647 if (sm == NULL || sm == &sm_distinguished[SM_DIST_NOACCESS]) {
4648 /* Definitely not in use. */
4649 return False;
4650 } else {
4651 return True;
4652 }
4653 }
4654
4655
4656 /* For the memory leak detector, say whether or not a given word
4657 address is to be regarded as valid. */
MC_(is_valid_aligned_word)4658 Bool MC_(is_valid_aligned_word) ( Addr a )
4659 {
4660 tl_assert(sizeof(UWord) == 4 || sizeof(UWord) == 8);
4661 tl_assert(VG_IS_WORD_ALIGNED(a));
4662 if (get_vabits8_for_aligned_word32 (a) != VA_BITS8_DEFINED)
4663 return False;
4664 if (sizeof(UWord) == 8) {
4665 if (get_vabits8_for_aligned_word32 (a + 4) != VA_BITS8_DEFINED)
4666 return False;
4667 }
4668 if (UNLIKELY(MC_(in_ignored_range)(a)))
4669 return False;
4670 else
4671 return True;
4672 }
4673
4674
4675 /*------------------------------------------------------------*/
4676 /*--- Initialisation ---*/
4677 /*------------------------------------------------------------*/
4678
init_shadow_memory(void)4679 static void init_shadow_memory ( void )
4680 {
4681 Int i;
4682 SecMap* sm;
4683
4684 tl_assert(V_BIT_UNDEFINED == 1);
4685 tl_assert(V_BIT_DEFINED == 0);
4686 tl_assert(V_BITS8_UNDEFINED == 0xFF);
4687 tl_assert(V_BITS8_DEFINED == 0);
4688
4689 /* Build the 3 distinguished secondaries */
4690 sm = &sm_distinguished[SM_DIST_NOACCESS];
4691 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_NOACCESS;
4692
4693 sm = &sm_distinguished[SM_DIST_UNDEFINED];
4694 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_UNDEFINED;
4695
4696 sm = &sm_distinguished[SM_DIST_DEFINED];
4697 for (i = 0; i < SM_CHUNKS; i++) sm->vabits8[i] = VA_BITS8_DEFINED;
4698
4699 /* Set up the primary map. */
4700 /* These entries gradually get overwritten as the used address
4701 space expands. */
4702 for (i = 0; i < N_PRIMARY_MAP; i++)
4703 primary_map[i] = &sm_distinguished[SM_DIST_NOACCESS];
4704
4705 /* Auxiliary primary maps */
4706 init_auxmap_L1_L2();
4707
4708 /* auxmap_size = auxmap_used = 0;
4709 no ... these are statically initialised */
4710
4711 /* Secondary V bit table */
4712 secVBitTable = createSecVBitTable();
4713 }
4714
4715
4716 /*------------------------------------------------------------*/
4717 /*--- Sanity check machinery (permanently engaged) ---*/
4718 /*------------------------------------------------------------*/
4719
mc_cheap_sanity_check(void)4720 static Bool mc_cheap_sanity_check ( void )
4721 {
4722 n_sanity_cheap++;
4723 PROF_EVENT(490, "cheap_sanity_check");
4724 /* Check for sane operating level */
4725 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
4726 return False;
4727 /* nothing else useful we can rapidly check */
4728 return True;
4729 }
4730
mc_expensive_sanity_check(void)4731 static Bool mc_expensive_sanity_check ( void )
4732 {
4733 Int i;
4734 Word n_secmaps_found;
4735 SecMap* sm;
4736 HChar* errmsg;
4737 Bool bad = False;
4738
4739 if (0) VG_(printf)("expensive sanity check\n");
4740 if (0) return True;
4741
4742 n_sanity_expensive++;
4743 PROF_EVENT(491, "expensive_sanity_check");
4744
4745 /* Check for sane operating level */
4746 if (MC_(clo_mc_level) < 1 || MC_(clo_mc_level) > 3)
4747 return False;
4748
4749 /* Check that the 3 distinguished SMs are still as they should be. */
4750
4751 /* Check noaccess DSM. */
4752 sm = &sm_distinguished[SM_DIST_NOACCESS];
4753 for (i = 0; i < SM_CHUNKS; i++)
4754 if (sm->vabits8[i] != VA_BITS8_NOACCESS)
4755 bad = True;
4756
4757 /* Check undefined DSM. */
4758 sm = &sm_distinguished[SM_DIST_UNDEFINED];
4759 for (i = 0; i < SM_CHUNKS; i++)
4760 if (sm->vabits8[i] != VA_BITS8_UNDEFINED)
4761 bad = True;
4762
4763 /* Check defined DSM. */
4764 sm = &sm_distinguished[SM_DIST_DEFINED];
4765 for (i = 0; i < SM_CHUNKS; i++)
4766 if (sm->vabits8[i] != VA_BITS8_DEFINED)
4767 bad = True;
4768
4769 if (bad) {
4770 VG_(printf)("memcheck expensive sanity: "
4771 "distinguished_secondaries have changed\n");
4772 return False;
4773 }
4774
4775 /* If we're not checking for undefined value errors, the secondary V bit
4776 * table should be empty. */
4777 if (MC_(clo_mc_level) == 1) {
4778 if (0 != VG_(OSetGen_Size)(secVBitTable))
4779 return False;
4780 }
4781
4782 /* check the auxiliary maps, very thoroughly */
4783 n_secmaps_found = 0;
4784 errmsg = check_auxmap_L1_L2_sanity( &n_secmaps_found );
4785 if (errmsg) {
4786 VG_(printf)("memcheck expensive sanity, auxmaps:\n\t%s", errmsg);
4787 return False;
4788 }
4789
4790 /* n_secmaps_found is now the number referred to by the auxiliary
4791 primary map. Now add on the ones referred to by the main
4792 primary map. */
4793 for (i = 0; i < N_PRIMARY_MAP; i++) {
4794 if (primary_map[i] == NULL) {
4795 bad = True;
4796 } else {
4797 if (!is_distinguished_sm(primary_map[i]))
4798 n_secmaps_found++;
4799 }
4800 }
4801
4802 /* check that the number of secmaps issued matches the number that
4803 are reachable (iow, no secmap leaks) */
4804 if (n_secmaps_found != (n_issued_SMs - n_deissued_SMs))
4805 bad = True;
4806
4807 if (bad) {
4808 VG_(printf)("memcheck expensive sanity: "
4809 "apparent secmap leakage\n");
4810 return False;
4811 }
4812
4813 if (bad) {
4814 VG_(printf)("memcheck expensive sanity: "
4815 "auxmap covers wrong address space\n");
4816 return False;
4817 }
4818
4819 /* there is only one pointer to each secmap (expensive) */
4820
4821 return True;
4822 }
4823
4824 /*------------------------------------------------------------*/
4825 /*--- Command line args ---*/
4826 /*------------------------------------------------------------*/
4827
4828 Bool MC_(clo_partial_loads_ok) = False;
4829 Long MC_(clo_freelist_vol) = 20*1000*1000LL;
4830 Long MC_(clo_freelist_big_blocks) = 1*1000*1000LL;
4831 LeakCheckMode MC_(clo_leak_check) = LC_Summary;
4832 VgRes MC_(clo_leak_resolution) = Vg_HighRes;
4833 Bool MC_(clo_show_reachable) = False;
4834 Bool MC_(clo_show_possibly_lost) = True;
4835 Bool MC_(clo_workaround_gcc296_bugs) = False;
4836 Int MC_(clo_malloc_fill) = -1;
4837 Int MC_(clo_free_fill) = -1;
4838 Int MC_(clo_mc_level) = 2;
4839
mc_process_cmd_line_options(Char * arg)4840 static Bool mc_process_cmd_line_options(Char* arg)
4841 {
4842 Char* tmp_str;
4843
4844 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
4845
4846 /* Set MC_(clo_mc_level):
4847 1 = A bit tracking only
4848 2 = A and V bit tracking, but no V bit origins
4849 3 = A and V bit tracking, and V bit origins
4850
4851 Do this by inspecting --undef-value-errors= and
4852 --track-origins=. Reject the case --undef-value-errors=no
4853 --track-origins=yes as meaningless.
4854 */
4855 if (0 == VG_(strcmp)(arg, "--undef-value-errors=no")) {
4856 if (MC_(clo_mc_level) == 3) {
4857 goto bad_level;
4858 } else {
4859 MC_(clo_mc_level) = 1;
4860 return True;
4861 }
4862 }
4863 if (0 == VG_(strcmp)(arg, "--undef-value-errors=yes")) {
4864 if (MC_(clo_mc_level) == 1)
4865 MC_(clo_mc_level) = 2;
4866 return True;
4867 }
4868 if (0 == VG_(strcmp)(arg, "--track-origins=no")) {
4869 if (MC_(clo_mc_level) == 3)
4870 MC_(clo_mc_level) = 2;
4871 return True;
4872 }
4873 if (0 == VG_(strcmp)(arg, "--track-origins=yes")) {
4874 if (MC_(clo_mc_level) == 1) {
4875 goto bad_level;
4876 } else {
4877 MC_(clo_mc_level) = 3;
4878 return True;
4879 }
4880 }
4881
4882 if VG_BOOL_CLO(arg, "--partial-loads-ok", MC_(clo_partial_loads_ok)) {}
4883 else if VG_BOOL_CLO(arg, "--show-reachable", MC_(clo_show_reachable)) {}
4884 else if VG_BOOL_CLO(arg, "--show-possibly-lost",
4885 MC_(clo_show_possibly_lost)) {}
4886 else if VG_BOOL_CLO(arg, "--workaround-gcc296-bugs",
4887 MC_(clo_workaround_gcc296_bugs)) {}
4888
4889 else if VG_BINT_CLO(arg, "--freelist-vol", MC_(clo_freelist_vol),
4890 0, 10*1000*1000*1000LL) {}
4891
4892 else if VG_BINT_CLO(arg, "--freelist-big-blocks",
4893 MC_(clo_freelist_big_blocks),
4894 0, 10*1000*1000*1000LL) {}
4895
4896 else if VG_XACT_CLO(arg, "--leak-check=no",
4897 MC_(clo_leak_check), LC_Off) {}
4898 else if VG_XACT_CLO(arg, "--leak-check=summary",
4899 MC_(clo_leak_check), LC_Summary) {}
4900 else if VG_XACT_CLO(arg, "--leak-check=yes",
4901 MC_(clo_leak_check), LC_Full) {}
4902 else if VG_XACT_CLO(arg, "--leak-check=full",
4903 MC_(clo_leak_check), LC_Full) {}
4904
4905 else if VG_XACT_CLO(arg, "--leak-resolution=low",
4906 MC_(clo_leak_resolution), Vg_LowRes) {}
4907 else if VG_XACT_CLO(arg, "--leak-resolution=med",
4908 MC_(clo_leak_resolution), Vg_MedRes) {}
4909 else if VG_XACT_CLO(arg, "--leak-resolution=high",
4910 MC_(clo_leak_resolution), Vg_HighRes) {}
4911
4912 else if VG_STR_CLO(arg, "--ignore-ranges", tmp_str) {
4913 Int i;
4914 Bool ok = parse_ignore_ranges(tmp_str);
4915 if (!ok)
4916 return False;
4917 tl_assert(ignoreRanges.used >= 0);
4918 tl_assert(ignoreRanges.used < M_IGNORE_RANGES);
4919 for (i = 0; i < ignoreRanges.used; i++) {
4920 Addr s = ignoreRanges.start[i];
4921 Addr e = ignoreRanges.end[i];
4922 Addr limit = 0x4000000; /* 64M - entirely arbitrary limit */
4923 if (e <= s) {
4924 VG_(message)(Vg_DebugMsg,
4925 "ERROR: --ignore-ranges: end <= start in range:\n");
4926 VG_(message)(Vg_DebugMsg,
4927 " 0x%lx-0x%lx\n", s, e);
4928 return False;
4929 }
4930 if (e - s > limit) {
4931 VG_(message)(Vg_DebugMsg,
4932 "ERROR: --ignore-ranges: suspiciously large range:\n");
4933 VG_(message)(Vg_DebugMsg,
4934 " 0x%lx-0x%lx (size %ld)\n", s, e, (UWord)(e-s));
4935 return False;
4936 }
4937 }
4938 }
4939
4940 else if VG_BHEX_CLO(arg, "--malloc-fill", MC_(clo_malloc_fill), 0x00,0xFF) {}
4941 else if VG_BHEX_CLO(arg, "--free-fill", MC_(clo_free_fill), 0x00,0xFF) {}
4942
4943 else
4944 return VG_(replacement_malloc_process_cmd_line_option)(arg);
4945
4946 return True;
4947
4948
4949 bad_level:
4950 VG_(fmsg_bad_option)(arg,
4951 "--track-origins=yes has no effect when --undef-value-errors=no.\n");
4952 }
4953
mc_print_usage(void)4954 static void mc_print_usage(void)
4955 {
4956 VG_(printf)(
4957 " --leak-check=no|summary|full search for memory leaks at exit? [summary]\n"
4958 " --leak-resolution=low|med|high differentiation of leak stack traces [high]\n"
4959 " --show-reachable=no|yes show reachable blocks in leak check? [no]\n"
4960 " --show-possibly-lost=no|yes show possibly lost blocks in leak check?\n"
4961 " [yes]\n"
4962 " --undef-value-errors=no|yes check for undefined value errors [yes]\n"
4963 " --track-origins=no|yes show origins of undefined values? [no]\n"
4964 " --partial-loads-ok=no|yes too hard to explain here; see manual [no]\n"
4965 " --freelist-vol=<number> volume of freed blocks queue [20000000]\n"
4966 " --freelist-big-blocks=<number> releases first blocks with size >= [1000000]\n"
4967 " --workaround-gcc296-bugs=no|yes self explanatory [no]\n"
4968 " --ignore-ranges=0xPP-0xQQ[,0xRR-0xSS] assume given addresses are OK\n"
4969 " --malloc-fill=<hexnumber> fill malloc'd areas with given value\n"
4970 " --free-fill=<hexnumber> fill free'd areas with given value\n"
4971 );
4972 }
4973
mc_print_debug_usage(void)4974 static void mc_print_debug_usage(void)
4975 {
4976 VG_(printf)(
4977 " (none)\n"
4978 );
4979 }
4980
4981
4982 /*------------------------------------------------------------*/
4983 /*--- Client blocks ---*/
4984 /*------------------------------------------------------------*/
4985
4986 /* Client block management:
4987
4988 This is managed as an expanding array of client block descriptors.
4989 Indices of live descriptors are issued to the client, so it can ask
4990 to free them later. Therefore we cannot slide live entries down
4991 over dead ones. Instead we must use free/inuse flags and scan for
4992 an empty slot at allocation time. This in turn means allocation is
4993 relatively expensive, so we hope this does not happen too often.
4994
4995 An unused block has start == size == 0
4996 */
4997
4998 /* type CGenBlock is defined in mc_include.h */
4999
5000 /* This subsystem is self-initialising. */
5001 static UWord cgb_size = 0;
5002 static UWord cgb_used = 0;
5003 static CGenBlock* cgbs = NULL;
5004
5005 /* Stats for this subsystem. */
5006 static ULong cgb_used_MAX = 0; /* Max in use. */
5007 static ULong cgb_allocs = 0; /* Number of allocs. */
5008 static ULong cgb_discards = 0; /* Number of discards. */
5009 static ULong cgb_search = 0; /* Number of searches. */
5010
5011
5012 /* Get access to the client block array. */
MC_(get_ClientBlock_array)5013 void MC_(get_ClientBlock_array)( /*OUT*/CGenBlock** blocks,
5014 /*OUT*/UWord* nBlocks )
5015 {
5016 *blocks = cgbs;
5017 *nBlocks = cgb_used;
5018 }
5019
5020
5021 static
alloc_client_block(void)5022 Int alloc_client_block ( void )
5023 {
5024 UWord i, sz_new;
5025 CGenBlock* cgbs_new;
5026
5027 cgb_allocs++;
5028
5029 for (i = 0; i < cgb_used; i++) {
5030 cgb_search++;
5031 if (cgbs[i].start == 0 && cgbs[i].size == 0)
5032 return i;
5033 }
5034
5035 /* Not found. Try to allocate one at the end. */
5036 if (cgb_used < cgb_size) {
5037 cgb_used++;
5038 return cgb_used-1;
5039 }
5040
5041 /* Ok, we have to allocate a new one. */
5042 tl_assert(cgb_used == cgb_size);
5043 sz_new = (cgbs == NULL) ? 10 : (2 * cgb_size);
5044
5045 cgbs_new = VG_(malloc)( "mc.acb.1", sz_new * sizeof(CGenBlock) );
5046 for (i = 0; i < cgb_used; i++)
5047 cgbs_new[i] = cgbs[i];
5048
5049 if (cgbs != NULL)
5050 VG_(free)( cgbs );
5051 cgbs = cgbs_new;
5052
5053 cgb_size = sz_new;
5054 cgb_used++;
5055 if (cgb_used > cgb_used_MAX)
5056 cgb_used_MAX = cgb_used;
5057 return cgb_used-1;
5058 }
5059
5060
show_client_block_stats(void)5061 static void show_client_block_stats ( void )
5062 {
5063 VG_(message)(Vg_DebugMsg,
5064 "general CBs: %llu allocs, %llu discards, %llu maxinuse, %llu search\n",
5065 cgb_allocs, cgb_discards, cgb_used_MAX, cgb_search
5066 );
5067 }
print_monitor_help(void)5068 static void print_monitor_help ( void )
5069 {
5070 VG_(gdb_printf)
5071 (
5072 "\n"
5073 "memcheck monitor commands:\n"
5074 " get_vbits <addr> [<len>]\n"
5075 " returns validity bits for <len> (or 1) bytes at <addr>\n"
5076 " bit values 0 = valid, 1 = invalid, __ = unaddressable byte\n"
5077 " Example: get_vbits 0x8049c78 10\n"
5078 " make_memory [noaccess|undefined\n"
5079 " |defined|Definedifaddressable] <addr> [<len>]\n"
5080 " mark <len> (or 1) bytes at <addr> with the given accessibility\n"
5081 " check_memory [addressable|defined] <addr> [<len>]\n"
5082 " check that <len> (or 1) bytes at <addr> have the given accessibility\n"
5083 " and outputs a description of <addr>\n"
5084 " leak_check [full*|summary] [reachable|possibleleak*|definiteleak]\n"
5085 " [increased*|changed|any]\n"
5086 " [unlimited*|limited <max_loss_records_output>]\n"
5087 " * = defaults\n"
5088 " Examples: leak_check\n"
5089 " leak_check summary any\n"
5090 " leak_check full reachable any limited 100\n"
5091 " block_list <loss_record_nr>\n"
5092 " after a leak search, shows the list of blocks of <loss_record_nr>\n"
5093 " who_points_at <addr> [<len>]\n"
5094 " shows places pointing inside <len> (default 1) bytes at <addr>\n"
5095 " (with len 1, only shows \"start pointers\" pointing exactly to <addr>,\n"
5096 " with len > 1, will also show \"interior pointers\")\n"
5097 "\n");
5098 }
5099
5100 /* return True if request recognised, False otherwise */
handle_gdb_monitor_command(ThreadId tid,Char * req)5101 static Bool handle_gdb_monitor_command (ThreadId tid, Char *req)
5102 {
5103 Char* wcmd;
5104 Char s[VG_(strlen(req))]; /* copy for strtok_r */
5105 Char *ssaveptr;
5106
5107 VG_(strcpy) (s, req);
5108
5109 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
5110 /* NB: if possible, avoid introducing a new command below which
5111 starts with the same first letter(s) as an already existing
5112 command. This ensures a shorter abbreviation for the user. */
5113 switch (VG_(keyword_id)
5114 ("help get_vbits leak_check make_memory check_memory "
5115 "block_list who_points_at",
5116 wcmd, kwd_report_duplicated_matches)) {
5117 case -2: /* multiple matches */
5118 return True;
5119 case -1: /* not found */
5120 return False;
5121 case 0: /* help */
5122 print_monitor_help();
5123 return True;
5124 case 1: { /* get_vbits */
5125 Addr address;
5126 SizeT szB = 1;
5127 VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5128 if (szB != 0) {
5129 UChar vbits;
5130 Int i;
5131 Int unaddressable = 0;
5132 for (i = 0; i < szB; i++) {
5133 Int res = mc_get_or_set_vbits_for_client
5134 (address+i, (Addr) &vbits, 1,
5135 False, /* get them */
5136 False /* is client request */ );
5137 /* we are before the first character on next line, print a \n. */
5138 if ((i % 32) == 0 && i != 0)
5139 VG_(gdb_printf) ("\n");
5140 /* we are before the next block of 4 starts, print a space. */
5141 else if ((i % 4) == 0 && i != 0)
5142 VG_(gdb_printf) (" ");
5143 if (res == 1) {
5144 VG_(gdb_printf) ("%02x", vbits);
5145 } else {
5146 tl_assert(3 == res);
5147 unaddressable++;
5148 VG_(gdb_printf) ("__");
5149 }
5150 }
5151 VG_(gdb_printf) ("\n");
5152 if (unaddressable) {
5153 VG_(gdb_printf)
5154 ("Address %p len %ld has %d bytes unaddressable\n",
5155 (void *)address, szB, unaddressable);
5156 }
5157 }
5158 return True;
5159 }
5160 case 2: { /* leak_check */
5161 Int err = 0;
5162 LeakCheckParams lcp;
5163 Char* kw;
5164
5165 lcp.mode = LC_Full;
5166 lcp.show_reachable = False;
5167 lcp.show_possibly_lost = True;
5168 lcp.deltamode = LCD_Increased;
5169 lcp.max_loss_records_output = 999999999;
5170 lcp.requested_by_monitor_command = True;
5171
5172 for (kw = VG_(strtok_r) (NULL, " ", &ssaveptr);
5173 kw != NULL;
5174 kw = VG_(strtok_r) (NULL, " ", &ssaveptr)) {
5175 switch (VG_(keyword_id)
5176 ("full summary "
5177 "reachable possibleleak definiteleak "
5178 "increased changed any "
5179 "unlimited limited ",
5180 kw, kwd_report_all)) {
5181 case -2: err++; break;
5182 case -1: err++; break;
5183 case 0: /* full */
5184 lcp.mode = LC_Full; break;
5185 case 1: /* summary */
5186 lcp.mode = LC_Summary; break;
5187 case 2: /* reachable */
5188 lcp.show_reachable = True;
5189 lcp.show_possibly_lost = True; break;
5190 case 3: /* possibleleak */
5191 lcp.show_reachable = False;
5192 lcp.show_possibly_lost = True; break;
5193 case 4: /* definiteleak */
5194 lcp.show_reachable = False;
5195 lcp.show_possibly_lost = False; break;
5196 case 5: /* increased */
5197 lcp.deltamode = LCD_Increased; break;
5198 case 6: /* changed */
5199 lcp.deltamode = LCD_Changed; break;
5200 case 7: /* any */
5201 lcp.deltamode = LCD_Any; break;
5202 case 8: /* unlimited */
5203 lcp.max_loss_records_output = 999999999; break;
5204 case 9: { /* limited */
5205 int int_value;
5206 char* endptr;
5207
5208 wcmd = VG_(strtok_r) (NULL, " ", &ssaveptr);
5209 if (wcmd == NULL) {
5210 int_value = 0;
5211 endptr = "empty"; /* to report an error below */
5212 } else {
5213 int_value = VG_(strtoll10) (wcmd, (Char **)&endptr);
5214 }
5215 if (*endptr != '\0')
5216 VG_(gdb_printf) ("missing or malformed integer value\n");
5217 else if (int_value > 0)
5218 lcp.max_loss_records_output = (UInt) int_value;
5219 else
5220 VG_(gdb_printf) ("max_loss_records_output must be >= 1, got %d\n",
5221 int_value);
5222 break;
5223 }
5224 default:
5225 tl_assert (0);
5226 }
5227 }
5228 if (!err)
5229 MC_(detect_memory_leaks)(tid, &lcp);
5230 return True;
5231 }
5232
5233 case 3: { /* make_memory */
5234 Addr address;
5235 SizeT szB = 1;
5236 int kwdid = VG_(keyword_id)
5237 ("noaccess undefined defined Definedifaddressable",
5238 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5239 VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5240 if (address == (Addr) 0 && szB == 0) return True;
5241 switch (kwdid) {
5242 case -2: break;
5243 case -1: break;
5244 case 0: MC_(make_mem_noaccess) (address, szB); break;
5245 case 1: make_mem_undefined_w_tid_and_okind ( address, szB, tid,
5246 MC_OKIND_USER ); break;
5247 case 2: MC_(make_mem_defined) ( address, szB ); break;
5248 case 3: make_mem_defined_if_addressable ( address, szB ); break;;
5249 default: tl_assert(0);
5250 }
5251 return True;
5252 }
5253
5254 case 4: { /* check_memory */
5255 Addr address;
5256 SizeT szB = 1;
5257 Addr bad_addr;
5258 UInt okind;
5259 char* src;
5260 UInt otag;
5261 UInt ecu;
5262 ExeContext* origin_ec;
5263 MC_ReadResult res;
5264
5265 int kwdid = VG_(keyword_id)
5266 ("addressable defined",
5267 VG_(strtok_r) (NULL, " ", &ssaveptr), kwd_report_all);
5268 VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5269 if (address == (Addr) 0 && szB == 0) return True;
5270 switch (kwdid) {
5271 case -2: break;
5272 case -1: break;
5273 case 0:
5274 if (is_mem_addressable ( address, szB, &bad_addr ))
5275 VG_(gdb_printf) ("Address %p len %ld addressable\n",
5276 (void *)address, szB);
5277 else
5278 VG_(gdb_printf)
5279 ("Address %p len %ld not addressable:\nbad address %p\n",
5280 (void *)address, szB, (void *) bad_addr);
5281 MC_(pp_describe_addr) (address);
5282 break;
5283 case 1: res = is_mem_defined ( address, szB, &bad_addr, &otag );
5284 if (MC_AddrErr == res)
5285 VG_(gdb_printf)
5286 ("Address %p len %ld not addressable:\nbad address %p\n",
5287 (void *)address, szB, (void *) bad_addr);
5288 else if (MC_ValueErr == res) {
5289 okind = otag & 3;
5290 switch (okind) {
5291 case MC_OKIND_STACK:
5292 src = " was created by a stack allocation"; break;
5293 case MC_OKIND_HEAP:
5294 src = " was created by a heap allocation"; break;
5295 case MC_OKIND_USER:
5296 src = " was created by a client request"; break;
5297 case MC_OKIND_UNKNOWN:
5298 src = ""; break;
5299 default: tl_assert(0);
5300 }
5301 VG_(gdb_printf)
5302 ("Address %p len %ld not defined:\n"
5303 "Uninitialised value at %p%s\n",
5304 (void *)address, szB, (void *) bad_addr, src);
5305 ecu = otag & ~3;
5306 if (VG_(is_plausible_ECU)(ecu)) {
5307 origin_ec = VG_(get_ExeContext_from_ECU)( ecu );
5308 VG_(pp_ExeContext)( origin_ec );
5309 }
5310 }
5311 else
5312 VG_(gdb_printf) ("Address %p len %ld defined\n",
5313 (void *)address, szB);
5314 MC_(pp_describe_addr) (address);
5315 break;
5316 default: tl_assert(0);
5317 }
5318 return True;
5319 }
5320
5321 case 5: { /* block_list */
5322 Char* wl;
5323 Char *endptr;
5324 UInt lr_nr = 0;
5325 wl = VG_(strtok_r) (NULL, " ", &ssaveptr);
5326 lr_nr = VG_(strtoull10) (wl, &endptr);
5327 if (wl != NULL && *endptr != '\0') {
5328 VG_(gdb_printf) ("malformed integer\n");
5329 } else {
5330 // lr_nr-1 as what is shown to the user is 1 more than the index in lr_array.
5331 if (lr_nr == 0 || ! MC_(print_block_list) (lr_nr-1))
5332 VG_(gdb_printf) ("invalid loss record nr\n");
5333 }
5334 return True;
5335 }
5336
5337 case 6: { /* who_points_at */
5338 Addr address;
5339 SizeT szB = 1;
5340
5341 VG_(strtok_get_address_and_size) (&address, &szB, &ssaveptr);
5342 if (address == (Addr) 0) {
5343 VG_(gdb_printf) ("Cannot search who points at 0x0\n");
5344 return True;
5345 }
5346 MC_(who_points_at) (address, szB);
5347 return True;
5348 }
5349
5350 default:
5351 tl_assert(0);
5352 return False;
5353 }
5354 }
5355
5356 /*------------------------------------------------------------*/
5357 /*--- Client requests ---*/
5358 /*------------------------------------------------------------*/
5359
mc_handle_client_request(ThreadId tid,UWord * arg,UWord * ret)5360 static Bool mc_handle_client_request ( ThreadId tid, UWord* arg, UWord* ret )
5361 {
5362 Int i;
5363 Bool ok;
5364 Addr bad_addr;
5365
5366 if (!VG_IS_TOOL_USERREQ('M','C',arg[0])
5367 && VG_USERREQ__MALLOCLIKE_BLOCK != arg[0]
5368 && VG_USERREQ__RESIZEINPLACE_BLOCK != arg[0]
5369 && VG_USERREQ__FREELIKE_BLOCK != arg[0]
5370 && VG_USERREQ__CREATE_MEMPOOL != arg[0]
5371 && VG_USERREQ__DESTROY_MEMPOOL != arg[0]
5372 && VG_USERREQ__MEMPOOL_ALLOC != arg[0]
5373 && VG_USERREQ__MEMPOOL_FREE != arg[0]
5374 && VG_USERREQ__MEMPOOL_TRIM != arg[0]
5375 && VG_USERREQ__MOVE_MEMPOOL != arg[0]
5376 && VG_USERREQ__MEMPOOL_CHANGE != arg[0]
5377 && VG_USERREQ__MEMPOOL_EXISTS != arg[0]
5378 && VG_USERREQ__GDB_MONITOR_COMMAND != arg[0])
5379 return False;
5380
5381 switch (arg[0]) {
5382 case VG_USERREQ__CHECK_MEM_IS_ADDRESSABLE:
5383 ok = is_mem_addressable ( arg[1], arg[2], &bad_addr );
5384 if (!ok)
5385 MC_(record_user_error) ( tid, bad_addr, /*isAddrErr*/True, 0 );
5386 *ret = ok ? (UWord)NULL : bad_addr;
5387 break;
5388
5389 case VG_USERREQ__CHECK_MEM_IS_DEFINED: {
5390 Bool errorV = False;
5391 Addr bad_addrV = 0;
5392 UInt otagV = 0;
5393 Bool errorA = False;
5394 Addr bad_addrA = 0;
5395 is_mem_defined_comprehensive(
5396 arg[1], arg[2],
5397 &errorV, &bad_addrV, &otagV, &errorA, &bad_addrA
5398 );
5399 if (errorV) {
5400 MC_(record_user_error) ( tid, bad_addrV,
5401 /*isAddrErr*/False, otagV );
5402 }
5403 if (errorA) {
5404 MC_(record_user_error) ( tid, bad_addrA,
5405 /*isAddrErr*/True, 0 );
5406 }
5407 /* Return the lower of the two erring addresses, if any. */
5408 *ret = 0;
5409 if (errorV && !errorA) {
5410 *ret = bad_addrV;
5411 }
5412 if (!errorV && errorA) {
5413 *ret = bad_addrA;
5414 }
5415 if (errorV && errorA) {
5416 *ret = bad_addrV < bad_addrA ? bad_addrV : bad_addrA;
5417 }
5418 break;
5419 }
5420
5421 case VG_USERREQ__DO_LEAK_CHECK: {
5422 LeakCheckParams lcp;
5423
5424 if (arg[1] == 0)
5425 lcp.mode = LC_Full;
5426 else if (arg[1] == 1)
5427 lcp.mode = LC_Summary;
5428 else {
5429 VG_(message)(Vg_UserMsg,
5430 "Warning: unknown memcheck leak search mode\n");
5431 lcp.mode = LC_Full;
5432 }
5433
5434 lcp.show_reachable = MC_(clo_show_reachable);
5435 lcp.show_possibly_lost = MC_(clo_show_possibly_lost);
5436
5437 if (arg[2] == 0)
5438 lcp.deltamode = LCD_Any;
5439 else if (arg[2] == 1)
5440 lcp.deltamode = LCD_Increased;
5441 else if (arg[2] == 2)
5442 lcp.deltamode = LCD_Changed;
5443 else {
5444 VG_(message)
5445 (Vg_UserMsg,
5446 "Warning: unknown memcheck leak search deltamode\n");
5447 lcp.deltamode = LCD_Any;
5448 }
5449 lcp.max_loss_records_output = 999999999;
5450 lcp.requested_by_monitor_command = False;
5451
5452 MC_(detect_memory_leaks)(tid, &lcp);
5453 *ret = 0; /* return value is meaningless */
5454 break;
5455 }
5456
5457 case VG_USERREQ__MAKE_MEM_NOACCESS:
5458 MC_(make_mem_noaccess) ( arg[1], arg[2] );
5459 *ret = -1;
5460 break;
5461
5462 case VG_USERREQ__MAKE_MEM_UNDEFINED:
5463 make_mem_undefined_w_tid_and_okind ( arg[1], arg[2], tid,
5464 MC_OKIND_USER );
5465 *ret = -1;
5466 break;
5467
5468 case VG_USERREQ__MAKE_MEM_DEFINED:
5469 MC_(make_mem_defined) ( arg[1], arg[2] );
5470 *ret = -1;
5471 break;
5472
5473 case VG_USERREQ__MAKE_MEM_DEFINED_IF_ADDRESSABLE:
5474 make_mem_defined_if_addressable ( arg[1], arg[2] );
5475 *ret = -1;
5476 break;
5477
5478 case VG_USERREQ__CREATE_BLOCK: /* describe a block */
5479 if (arg[1] != 0 && arg[2] != 0) {
5480 i = alloc_client_block();
5481 /* VG_(printf)("allocated %d %p\n", i, cgbs); */
5482 cgbs[i].start = arg[1];
5483 cgbs[i].size = arg[2];
5484 cgbs[i].desc = VG_(strdup)("mc.mhcr.1", (Char *)arg[3]);
5485 cgbs[i].where = VG_(record_ExeContext) ( tid, 0/*first_ip_delta*/ );
5486 *ret = i;
5487 } else
5488 *ret = -1;
5489 break;
5490
5491 case VG_USERREQ__DISCARD: /* discard */
5492 if (cgbs == NULL
5493 || arg[2] >= cgb_used ||
5494 (cgbs[arg[2]].start == 0 && cgbs[arg[2]].size == 0)) {
5495 *ret = 1;
5496 } else {
5497 tl_assert(arg[2] >= 0 && arg[2] < cgb_used);
5498 cgbs[arg[2]].start = cgbs[arg[2]].size = 0;
5499 VG_(free)(cgbs[arg[2]].desc);
5500 cgb_discards++;
5501 *ret = 0;
5502 }
5503 break;
5504
5505 case VG_USERREQ__GET_VBITS:
5506 *ret = mc_get_or_set_vbits_for_client
5507 ( arg[1], arg[2], arg[3],
5508 False /* get them */,
5509 True /* is client request */ );
5510 break;
5511
5512 case VG_USERREQ__SET_VBITS:
5513 *ret = mc_get_or_set_vbits_for_client
5514 ( arg[1], arg[2], arg[3],
5515 True /* set them */,
5516 True /* is client request */ );
5517 break;
5518
5519 case VG_USERREQ__COUNT_LEAKS: { /* count leaked bytes */
5520 UWord** argp = (UWord**)arg;
5521 // MC_(bytes_leaked) et al were set by the last leak check (or zero
5522 // if no prior leak checks performed).
5523 *argp[1] = MC_(bytes_leaked) + MC_(bytes_indirect);
5524 *argp[2] = MC_(bytes_dubious);
5525 *argp[3] = MC_(bytes_reachable);
5526 *argp[4] = MC_(bytes_suppressed);
5527 // there is no argp[5]
5528 //*argp[5] = MC_(bytes_indirect);
5529 // XXX need to make *argp[1-4] defined; currently done in the
5530 // VALGRIND_COUNT_LEAKS_MACRO by initialising them to zero.
5531 *ret = 0;
5532 return True;
5533 }
5534 case VG_USERREQ__COUNT_LEAK_BLOCKS: { /* count leaked blocks */
5535 UWord** argp = (UWord**)arg;
5536 // MC_(blocks_leaked) et al were set by the last leak check (or zero
5537 // if no prior leak checks performed).
5538 *argp[1] = MC_(blocks_leaked) + MC_(blocks_indirect);
5539 *argp[2] = MC_(blocks_dubious);
5540 *argp[3] = MC_(blocks_reachable);
5541 *argp[4] = MC_(blocks_suppressed);
5542 // there is no argp[5]
5543 //*argp[5] = MC_(blocks_indirect);
5544 // XXX need to make *argp[1-4] defined; currently done in the
5545 // VALGRIND_COUNT_LEAK_BLOCKS_MACRO by initialising them to zero.
5546 *ret = 0;
5547 return True;
5548 }
5549 case VG_USERREQ__MALLOCLIKE_BLOCK: {
5550 Addr p = (Addr)arg[1];
5551 SizeT sizeB = arg[2];
5552 UInt rzB = arg[3];
5553 Bool is_zeroed = (Bool)arg[4];
5554
5555 MC_(new_block) ( tid, p, sizeB, /*ignored*/0, is_zeroed,
5556 MC_AllocCustom, MC_(malloc_list) );
5557 if (rzB > 0) {
5558 MC_(make_mem_noaccess) ( p - rzB, rzB);
5559 MC_(make_mem_noaccess) ( p + sizeB, rzB);
5560 }
5561 return True;
5562 }
5563 case VG_USERREQ__RESIZEINPLACE_BLOCK: {
5564 Addr p = (Addr)arg[1];
5565 SizeT oldSizeB = arg[2];
5566 SizeT newSizeB = arg[3];
5567 UInt rzB = arg[4];
5568
5569 MC_(handle_resizeInPlace) ( tid, p, oldSizeB, newSizeB, rzB );
5570 return True;
5571 }
5572 case VG_USERREQ__FREELIKE_BLOCK: {
5573 Addr p = (Addr)arg[1];
5574 UInt rzB = arg[2];
5575
5576 MC_(handle_free) ( tid, p, rzB, MC_AllocCustom );
5577 return True;
5578 }
5579
5580 case _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR: {
5581 Char* s = (Char*)arg[1];
5582 Addr dst = (Addr) arg[2];
5583 Addr src = (Addr) arg[3];
5584 SizeT len = (SizeT)arg[4];
5585 MC_(record_overlap_error)(tid, s, src, dst, len);
5586 return True;
5587 }
5588
5589 case VG_USERREQ__CREATE_MEMPOOL: {
5590 Addr pool = (Addr)arg[1];
5591 UInt rzB = arg[2];
5592 Bool is_zeroed = (Bool)arg[3];
5593
5594 MC_(create_mempool) ( pool, rzB, is_zeroed );
5595 return True;
5596 }
5597
5598 case VG_USERREQ__DESTROY_MEMPOOL: {
5599 Addr pool = (Addr)arg[1];
5600
5601 MC_(destroy_mempool) ( pool );
5602 return True;
5603 }
5604
5605 case VG_USERREQ__MEMPOOL_ALLOC: {
5606 Addr pool = (Addr)arg[1];
5607 Addr addr = (Addr)arg[2];
5608 UInt size = arg[3];
5609
5610 MC_(mempool_alloc) ( tid, pool, addr, size );
5611 return True;
5612 }
5613
5614 case VG_USERREQ__MEMPOOL_FREE: {
5615 Addr pool = (Addr)arg[1];
5616 Addr addr = (Addr)arg[2];
5617
5618 MC_(mempool_free) ( pool, addr );
5619 return True;
5620 }
5621
5622 case VG_USERREQ__MEMPOOL_TRIM: {
5623 Addr pool = (Addr)arg[1];
5624 Addr addr = (Addr)arg[2];
5625 UInt size = arg[3];
5626
5627 MC_(mempool_trim) ( pool, addr, size );
5628 return True;
5629 }
5630
5631 case VG_USERREQ__MOVE_MEMPOOL: {
5632 Addr poolA = (Addr)arg[1];
5633 Addr poolB = (Addr)arg[2];
5634
5635 MC_(move_mempool) ( poolA, poolB );
5636 return True;
5637 }
5638
5639 case VG_USERREQ__MEMPOOL_CHANGE: {
5640 Addr pool = (Addr)arg[1];
5641 Addr addrA = (Addr)arg[2];
5642 Addr addrB = (Addr)arg[3];
5643 UInt size = arg[4];
5644
5645 MC_(mempool_change) ( pool, addrA, addrB, size );
5646 return True;
5647 }
5648
5649 case VG_USERREQ__MEMPOOL_EXISTS: {
5650 Addr pool = (Addr)arg[1];
5651
5652 *ret = (UWord) MC_(mempool_exists) ( pool );
5653 return True;
5654 }
5655
5656 case VG_USERREQ__GDB_MONITOR_COMMAND: {
5657 Bool handled = handle_gdb_monitor_command (tid, (Char*)arg[1]);
5658 if (handled)
5659 *ret = 1;
5660 else
5661 *ret = 0;
5662 return handled;
5663 }
5664
5665 default:
5666 VG_(message)(
5667 Vg_UserMsg,
5668 "Warning: unknown memcheck client request code %llx\n",
5669 (ULong)arg[0]
5670 );
5671 return False;
5672 }
5673 return True;
5674 }
5675
5676
5677 /*------------------------------------------------------------*/
5678 /*--- Crude profiling machinery. ---*/
5679 /*------------------------------------------------------------*/
5680
5681 // We track a number of interesting events (using PROF_EVENT)
5682 // if MC_PROFILE_MEMORY is defined.
5683
5684 #ifdef MC_PROFILE_MEMORY
5685
5686 UInt MC_(event_ctr)[N_PROF_EVENTS];
5687 HChar* MC_(event_ctr_name)[N_PROF_EVENTS];
5688
init_prof_mem(void)5689 static void init_prof_mem ( void )
5690 {
5691 Int i;
5692 for (i = 0; i < N_PROF_EVENTS; i++) {
5693 MC_(event_ctr)[i] = 0;
5694 MC_(event_ctr_name)[i] = NULL;
5695 }
5696 }
5697
done_prof_mem(void)5698 static void done_prof_mem ( void )
5699 {
5700 Int i;
5701 Bool spaced = False;
5702 for (i = 0; i < N_PROF_EVENTS; i++) {
5703 if (!spaced && (i % 10) == 0) {
5704 VG_(printf)("\n");
5705 spaced = True;
5706 }
5707 if (MC_(event_ctr)[i] > 0) {
5708 spaced = False;
5709 VG_(printf)( "prof mem event %3d: %9d %s\n",
5710 i, MC_(event_ctr)[i],
5711 MC_(event_ctr_name)[i]
5712 ? MC_(event_ctr_name)[i] : "unnamed");
5713 }
5714 }
5715 }
5716
5717 #else
5718
init_prof_mem(void)5719 static void init_prof_mem ( void ) { }
done_prof_mem(void)5720 static void done_prof_mem ( void ) { }
5721
5722 #endif
5723
5724
5725 /*------------------------------------------------------------*/
5726 /*--- Origin tracking stuff ---*/
5727 /*------------------------------------------------------------*/
5728
5729 /*--------------------------------------------*/
5730 /*--- Origin tracking: load handlers ---*/
5731 /*--------------------------------------------*/
5732
merge_origins(UInt or1,UInt or2)5733 static INLINE UInt merge_origins ( UInt or1, UInt or2 ) {
5734 return or1 > or2 ? or1 : or2;
5735 }
5736
MC_(helperc_b_load1)5737 UWord VG_REGPARM(1) MC_(helperc_b_load1)( Addr a ) {
5738 OCacheLine* line;
5739 UChar descr;
5740 UWord lineoff = oc_line_offset(a);
5741 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
5742
5743 if (OC_ENABLE_ASSERTIONS) {
5744 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5745 }
5746
5747 line = find_OCacheLine( a );
5748
5749 descr = line->descr[lineoff];
5750 if (OC_ENABLE_ASSERTIONS) {
5751 tl_assert(descr < 0x10);
5752 }
5753
5754 if (LIKELY(0 == (descr & (1 << byteoff)))) {
5755 return 0;
5756 } else {
5757 return line->w32[lineoff];
5758 }
5759 }
5760
MC_(helperc_b_load2)5761 UWord VG_REGPARM(1) MC_(helperc_b_load2)( Addr a ) {
5762 OCacheLine* line;
5763 UChar descr;
5764 UWord lineoff, byteoff;
5765
5766 if (UNLIKELY(a & 1)) {
5767 /* Handle misaligned case, slowly. */
5768 UInt oLo = (UInt)MC_(helperc_b_load1)( a + 0 );
5769 UInt oHi = (UInt)MC_(helperc_b_load1)( a + 1 );
5770 return merge_origins(oLo, oHi);
5771 }
5772
5773 lineoff = oc_line_offset(a);
5774 byteoff = a & 3; /* 0 or 2 */
5775
5776 if (OC_ENABLE_ASSERTIONS) {
5777 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5778 }
5779 line = find_OCacheLine( a );
5780
5781 descr = line->descr[lineoff];
5782 if (OC_ENABLE_ASSERTIONS) {
5783 tl_assert(descr < 0x10);
5784 }
5785
5786 if (LIKELY(0 == (descr & (3 << byteoff)))) {
5787 return 0;
5788 } else {
5789 return line->w32[lineoff];
5790 }
5791 }
5792
MC_(helperc_b_load4)5793 UWord VG_REGPARM(1) MC_(helperc_b_load4)( Addr a ) {
5794 OCacheLine* line;
5795 UChar descr;
5796 UWord lineoff;
5797
5798 if (UNLIKELY(a & 3)) {
5799 /* Handle misaligned case, slowly. */
5800 UInt oLo = (UInt)MC_(helperc_b_load2)( a + 0 );
5801 UInt oHi = (UInt)MC_(helperc_b_load2)( a + 2 );
5802 return merge_origins(oLo, oHi);
5803 }
5804
5805 lineoff = oc_line_offset(a);
5806 if (OC_ENABLE_ASSERTIONS) {
5807 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5808 }
5809
5810 line = find_OCacheLine( a );
5811
5812 descr = line->descr[lineoff];
5813 if (OC_ENABLE_ASSERTIONS) {
5814 tl_assert(descr < 0x10);
5815 }
5816
5817 if (LIKELY(0 == descr)) {
5818 return 0;
5819 } else {
5820 return line->w32[lineoff];
5821 }
5822 }
5823
MC_(helperc_b_load8)5824 UWord VG_REGPARM(1) MC_(helperc_b_load8)( Addr a ) {
5825 OCacheLine* line;
5826 UChar descrLo, descrHi, descr;
5827 UWord lineoff;
5828
5829 if (UNLIKELY(a & 7)) {
5830 /* Handle misaligned case, slowly. */
5831 UInt oLo = (UInt)MC_(helperc_b_load4)( a + 0 );
5832 UInt oHi = (UInt)MC_(helperc_b_load4)( a + 4 );
5833 return merge_origins(oLo, oHi);
5834 }
5835
5836 lineoff = oc_line_offset(a);
5837 if (OC_ENABLE_ASSERTIONS) {
5838 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
5839 }
5840
5841 line = find_OCacheLine( a );
5842
5843 descrLo = line->descr[lineoff + 0];
5844 descrHi = line->descr[lineoff + 1];
5845 descr = descrLo | descrHi;
5846 if (OC_ENABLE_ASSERTIONS) {
5847 tl_assert(descr < 0x10);
5848 }
5849
5850 if (LIKELY(0 == descr)) {
5851 return 0; /* both 32-bit chunks are defined */
5852 } else {
5853 UInt oLo = descrLo == 0 ? 0 : line->w32[lineoff + 0];
5854 UInt oHi = descrHi == 0 ? 0 : line->w32[lineoff + 1];
5855 return merge_origins(oLo, oHi);
5856 }
5857 }
5858
MC_(helperc_b_load16)5859 UWord VG_REGPARM(1) MC_(helperc_b_load16)( Addr a ) {
5860 UInt oLo = (UInt)MC_(helperc_b_load8)( a + 0 );
5861 UInt oHi = (UInt)MC_(helperc_b_load8)( a + 8 );
5862 UInt oBoth = merge_origins(oLo, oHi);
5863 return (UWord)oBoth;
5864 }
5865
MC_(helperc_b_load32)5866 UWord VG_REGPARM(1) MC_(helperc_b_load32)( Addr a ) {
5867 UInt oQ0 = (UInt)MC_(helperc_b_load8)( a + 0 );
5868 UInt oQ1 = (UInt)MC_(helperc_b_load8)( a + 8 );
5869 UInt oQ2 = (UInt)MC_(helperc_b_load8)( a + 16 );
5870 UInt oQ3 = (UInt)MC_(helperc_b_load8)( a + 24 );
5871 UInt oAll = merge_origins(merge_origins(oQ0, oQ1),
5872 merge_origins(oQ2, oQ3));
5873 return (UWord)oAll;
5874 }
5875
5876
5877 /*--------------------------------------------*/
5878 /*--- Origin tracking: store handlers ---*/
5879 /*--------------------------------------------*/
5880
MC_(helperc_b_store1)5881 void VG_REGPARM(2) MC_(helperc_b_store1)( Addr a, UWord d32 ) {
5882 OCacheLine* line;
5883 UWord lineoff = oc_line_offset(a);
5884 UWord byteoff = a & 3; /* 0, 1, 2 or 3 */
5885
5886 if (OC_ENABLE_ASSERTIONS) {
5887 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5888 }
5889
5890 line = find_OCacheLine( a );
5891
5892 if (d32 == 0) {
5893 line->descr[lineoff] &= ~(1 << byteoff);
5894 } else {
5895 line->descr[lineoff] |= (1 << byteoff);
5896 line->w32[lineoff] = d32;
5897 }
5898 }
5899
MC_(helperc_b_store2)5900 void VG_REGPARM(2) MC_(helperc_b_store2)( Addr a, UWord d32 ) {
5901 OCacheLine* line;
5902 UWord lineoff, byteoff;
5903
5904 if (UNLIKELY(a & 1)) {
5905 /* Handle misaligned case, slowly. */
5906 MC_(helperc_b_store1)( a + 0, d32 );
5907 MC_(helperc_b_store1)( a + 1, d32 );
5908 return;
5909 }
5910
5911 lineoff = oc_line_offset(a);
5912 byteoff = a & 3; /* 0 or 2 */
5913
5914 if (OC_ENABLE_ASSERTIONS) {
5915 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5916 }
5917
5918 line = find_OCacheLine( a );
5919
5920 if (d32 == 0) {
5921 line->descr[lineoff] &= ~(3 << byteoff);
5922 } else {
5923 line->descr[lineoff] |= (3 << byteoff);
5924 line->w32[lineoff] = d32;
5925 }
5926 }
5927
MC_(helperc_b_store4)5928 void VG_REGPARM(2) MC_(helperc_b_store4)( Addr a, UWord d32 ) {
5929 OCacheLine* line;
5930 UWord lineoff;
5931
5932 if (UNLIKELY(a & 3)) {
5933 /* Handle misaligned case, slowly. */
5934 MC_(helperc_b_store2)( a + 0, d32 );
5935 MC_(helperc_b_store2)( a + 2, d32 );
5936 return;
5937 }
5938
5939 lineoff = oc_line_offset(a);
5940 if (OC_ENABLE_ASSERTIONS) {
5941 tl_assert(lineoff >= 0 && lineoff < OC_W32S_PER_LINE);
5942 }
5943
5944 line = find_OCacheLine( a );
5945
5946 if (d32 == 0) {
5947 line->descr[lineoff] = 0;
5948 } else {
5949 line->descr[lineoff] = 0xF;
5950 line->w32[lineoff] = d32;
5951 }
5952 }
5953
MC_(helperc_b_store8)5954 void VG_REGPARM(2) MC_(helperc_b_store8)( Addr a, UWord d32 ) {
5955 OCacheLine* line;
5956 UWord lineoff;
5957
5958 if (UNLIKELY(a & 7)) {
5959 /* Handle misaligned case, slowly. */
5960 MC_(helperc_b_store4)( a + 0, d32 );
5961 MC_(helperc_b_store4)( a + 4, d32 );
5962 return;
5963 }
5964
5965 lineoff = oc_line_offset(a);
5966 if (OC_ENABLE_ASSERTIONS) {
5967 tl_assert(lineoff == (lineoff & 6)); /*0,2,4,6*//*since 8-aligned*/
5968 }
5969
5970 line = find_OCacheLine( a );
5971
5972 if (d32 == 0) {
5973 line->descr[lineoff + 0] = 0;
5974 line->descr[lineoff + 1] = 0;
5975 } else {
5976 line->descr[lineoff + 0] = 0xF;
5977 line->descr[lineoff + 1] = 0xF;
5978 line->w32[lineoff + 0] = d32;
5979 line->w32[lineoff + 1] = d32;
5980 }
5981 }
5982
MC_(helperc_b_store16)5983 void VG_REGPARM(2) MC_(helperc_b_store16)( Addr a, UWord d32 ) {
5984 MC_(helperc_b_store8)( a + 0, d32 );
5985 MC_(helperc_b_store8)( a + 8, d32 );
5986 }
5987
MC_(helperc_b_store32)5988 void VG_REGPARM(2) MC_(helperc_b_store32)( Addr a, UWord d32 ) {
5989 MC_(helperc_b_store8)( a + 0, d32 );
5990 MC_(helperc_b_store8)( a + 8, d32 );
5991 MC_(helperc_b_store8)( a + 16, d32 );
5992 MC_(helperc_b_store8)( a + 24, d32 );
5993 }
5994
5995
5996 /*--------------------------------------------*/
5997 /*--- Origin tracking: sarp handlers ---*/
5998 /*--------------------------------------------*/
5999
6000 __attribute__((noinline))
ocache_sarp_Set_Origins(Addr a,UWord len,UInt otag)6001 static void ocache_sarp_Set_Origins ( Addr a, UWord len, UInt otag ) {
6002 if ((a & 1) && len >= 1) {
6003 MC_(helperc_b_store1)( a, otag );
6004 a++;
6005 len--;
6006 }
6007 if ((a & 2) && len >= 2) {
6008 MC_(helperc_b_store2)( a, otag );
6009 a += 2;
6010 len -= 2;
6011 }
6012 if (len >= 4)
6013 tl_assert(0 == (a & 3));
6014 while (len >= 4) {
6015 MC_(helperc_b_store4)( a, otag );
6016 a += 4;
6017 len -= 4;
6018 }
6019 if (len >= 2) {
6020 MC_(helperc_b_store2)( a, otag );
6021 a += 2;
6022 len -= 2;
6023 }
6024 if (len >= 1) {
6025 MC_(helperc_b_store1)( a, otag );
6026 //a++;
6027 len--;
6028 }
6029 tl_assert(len == 0);
6030 }
6031
6032 __attribute__((noinline))
ocache_sarp_Clear_Origins(Addr a,UWord len)6033 static void ocache_sarp_Clear_Origins ( Addr a, UWord len ) {
6034 if ((a & 1) && len >= 1) {
6035 MC_(helperc_b_store1)( a, 0 );
6036 a++;
6037 len--;
6038 }
6039 if ((a & 2) && len >= 2) {
6040 MC_(helperc_b_store2)( a, 0 );
6041 a += 2;
6042 len -= 2;
6043 }
6044 if (len >= 4)
6045 tl_assert(0 == (a & 3));
6046 while (len >= 4) {
6047 MC_(helperc_b_store4)( a, 0 );
6048 a += 4;
6049 len -= 4;
6050 }
6051 if (len >= 2) {
6052 MC_(helperc_b_store2)( a, 0 );
6053 a += 2;
6054 len -= 2;
6055 }
6056 if (len >= 1) {
6057 MC_(helperc_b_store1)( a, 0 );
6058 //a++;
6059 len--;
6060 }
6061 tl_assert(len == 0);
6062 }
6063
6064
6065 /*------------------------------------------------------------*/
6066 /*--- Setup and finalisation ---*/
6067 /*------------------------------------------------------------*/
6068
mc_post_clo_init(void)6069 static void mc_post_clo_init ( void )
6070 {
6071 /* If we've been asked to emit XML, mash around various other
6072 options so as to constrain the output somewhat. */
6073 if (VG_(clo_xml)) {
6074 /* Extract as much info as possible from the leak checker. */
6075 /* MC_(clo_show_reachable) = True; */
6076 MC_(clo_leak_check) = LC_Full;
6077 }
6078
6079 if (MC_(clo_freelist_big_blocks) >= MC_(clo_freelist_vol))
6080 VG_(message)(Vg_UserMsg,
6081 "Warning: --freelist-big-blocks value %lld has no effect\n"
6082 "as it is >= to --freelist-vol value %lld\n",
6083 MC_(clo_freelist_big_blocks),
6084 MC_(clo_freelist_vol));
6085
6086 tl_assert( MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3 );
6087
6088 if (MC_(clo_mc_level) == 3) {
6089 /* We're doing origin tracking. */
6090 # ifdef PERF_FAST_STACK
6091 VG_(track_new_mem_stack_4_w_ECU) ( mc_new_mem_stack_4_w_ECU );
6092 VG_(track_new_mem_stack_8_w_ECU) ( mc_new_mem_stack_8_w_ECU );
6093 VG_(track_new_mem_stack_12_w_ECU) ( mc_new_mem_stack_12_w_ECU );
6094 VG_(track_new_mem_stack_16_w_ECU) ( mc_new_mem_stack_16_w_ECU );
6095 VG_(track_new_mem_stack_32_w_ECU) ( mc_new_mem_stack_32_w_ECU );
6096 VG_(track_new_mem_stack_112_w_ECU) ( mc_new_mem_stack_112_w_ECU );
6097 VG_(track_new_mem_stack_128_w_ECU) ( mc_new_mem_stack_128_w_ECU );
6098 VG_(track_new_mem_stack_144_w_ECU) ( mc_new_mem_stack_144_w_ECU );
6099 VG_(track_new_mem_stack_160_w_ECU) ( mc_new_mem_stack_160_w_ECU );
6100 # endif
6101 VG_(track_new_mem_stack_w_ECU) ( mc_new_mem_stack_w_ECU );
6102 } else {
6103 /* Not doing origin tracking */
6104 # ifdef PERF_FAST_STACK
6105 VG_(track_new_mem_stack_4) ( mc_new_mem_stack_4 );
6106 VG_(track_new_mem_stack_8) ( mc_new_mem_stack_8 );
6107 VG_(track_new_mem_stack_12) ( mc_new_mem_stack_12 );
6108 VG_(track_new_mem_stack_16) ( mc_new_mem_stack_16 );
6109 VG_(track_new_mem_stack_32) ( mc_new_mem_stack_32 );
6110 VG_(track_new_mem_stack_112) ( mc_new_mem_stack_112 );
6111 VG_(track_new_mem_stack_128) ( mc_new_mem_stack_128 );
6112 VG_(track_new_mem_stack_144) ( mc_new_mem_stack_144 );
6113 VG_(track_new_mem_stack_160) ( mc_new_mem_stack_160 );
6114 # endif
6115 VG_(track_new_mem_stack) ( mc_new_mem_stack );
6116 }
6117
6118 /* This origin tracking cache is huge (~100M), so only initialise
6119 if we need it. */
6120 if (MC_(clo_mc_level) >= 3) {
6121 init_OCache();
6122 tl_assert(ocacheL1 != NULL);
6123 tl_assert(ocacheL2 != NULL);
6124 } else {
6125 tl_assert(ocacheL1 == NULL);
6126 tl_assert(ocacheL2 == NULL);
6127 }
6128
6129 /* Do not check definedness of guest state if --undef-value-errors=no */
6130 if (MC_(clo_mc_level) >= 2)
6131 VG_(track_pre_reg_read) ( mc_pre_reg_read );
6132 }
6133
print_SM_info(char * type,int n_SMs)6134 static void print_SM_info(char* type, int n_SMs)
6135 {
6136 VG_(message)(Vg_DebugMsg,
6137 " memcheck: SMs: %s = %d (%ldk, %ldM)\n",
6138 type,
6139 n_SMs,
6140 n_SMs * sizeof(SecMap) / 1024UL,
6141 n_SMs * sizeof(SecMap) / (1024 * 1024UL) );
6142 }
6143
mc_fini(Int exitcode)6144 static void mc_fini ( Int exitcode )
6145 {
6146 MC_(print_malloc_stats)();
6147
6148 if (MC_(clo_leak_check) != LC_Off) {
6149 LeakCheckParams lcp;
6150 lcp.mode = MC_(clo_leak_check);
6151 lcp.show_reachable = MC_(clo_show_reachable);
6152 lcp.show_possibly_lost = MC_(clo_show_possibly_lost);
6153 lcp.deltamode = LCD_Any;
6154 lcp.max_loss_records_output = 999999999;
6155 lcp.requested_by_monitor_command = False;
6156 MC_(detect_memory_leaks)(1/*bogus ThreadId*/, &lcp);
6157 } else {
6158 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6159 VG_(umsg)(
6160 "For a detailed leak analysis, rerun with: --leak-check=full\n"
6161 "\n"
6162 );
6163 }
6164 }
6165
6166 if (VG_(clo_verbosity) == 1 && !VG_(clo_xml)) {
6167 VG_(message)(Vg_UserMsg,
6168 "For counts of detected and suppressed errors, rerun with: -v\n");
6169 }
6170
6171 if (MC_(any_value_errors) && !VG_(clo_xml) && VG_(clo_verbosity) >= 1
6172 && MC_(clo_mc_level) == 2) {
6173 VG_(message)(Vg_UserMsg,
6174 "Use --track-origins=yes to see where "
6175 "uninitialised values come from\n");
6176 }
6177
6178 done_prof_mem();
6179
6180 if (VG_(clo_stats)) {
6181 SizeT max_secVBit_szB, max_SMs_szB, max_shmem_szB;
6182
6183 VG_(message)(Vg_DebugMsg,
6184 " memcheck: sanity checks: %d cheap, %d expensive\n",
6185 n_sanity_cheap, n_sanity_expensive );
6186 VG_(message)(Vg_DebugMsg,
6187 " memcheck: auxmaps: %lld auxmap entries (%lldk, %lldM) in use\n",
6188 n_auxmap_L2_nodes,
6189 n_auxmap_L2_nodes * 64,
6190 n_auxmap_L2_nodes / 16 );
6191 VG_(message)(Vg_DebugMsg,
6192 " memcheck: auxmaps_L1: %lld searches, %lld cmps, ratio %lld:10\n",
6193 n_auxmap_L1_searches, n_auxmap_L1_cmps,
6194 (10ULL * n_auxmap_L1_cmps)
6195 / (n_auxmap_L1_searches ? n_auxmap_L1_searches : 1)
6196 );
6197 VG_(message)(Vg_DebugMsg,
6198 " memcheck: auxmaps_L2: %lld searches, %lld nodes\n",
6199 n_auxmap_L2_searches, n_auxmap_L2_nodes
6200 );
6201
6202 print_SM_info("n_issued ", n_issued_SMs);
6203 print_SM_info("n_deissued ", n_deissued_SMs);
6204 print_SM_info("max_noaccess ", max_noaccess_SMs);
6205 print_SM_info("max_undefined", max_undefined_SMs);
6206 print_SM_info("max_defined ", max_defined_SMs);
6207 print_SM_info("max_non_DSM ", max_non_DSM_SMs);
6208
6209 // Three DSMs, plus the non-DSM ones
6210 max_SMs_szB = (3 + max_non_DSM_SMs) * sizeof(SecMap);
6211 // The 3*sizeof(Word) bytes is the AVL node metadata size.
6212 // The VG_ROUNDUP is because the OSet pool allocator will/must align
6213 // the elements on pointer size.
6214 // Note that the pool allocator has some additional small overhead
6215 // which is not counted in the below.
6216 // Hardwiring this logic sucks, but I don't see how else to do it.
6217 max_secVBit_szB = max_secVBit_nodes *
6218 (3*sizeof(Word) + VG_ROUNDUP(sizeof(SecVBitNode), sizeof(void*)));
6219 max_shmem_szB = sizeof(primary_map) + max_SMs_szB + max_secVBit_szB;
6220
6221 VG_(message)(Vg_DebugMsg,
6222 " memcheck: max sec V bit nodes: %d (%ldk, %ldM)\n",
6223 max_secVBit_nodes, max_secVBit_szB / 1024,
6224 max_secVBit_szB / (1024 * 1024));
6225 VG_(message)(Vg_DebugMsg,
6226 " memcheck: set_sec_vbits8 calls: %llu (new: %llu, updates: %llu)\n",
6227 sec_vbits_new_nodes + sec_vbits_updates,
6228 sec_vbits_new_nodes, sec_vbits_updates );
6229 VG_(message)(Vg_DebugMsg,
6230 " memcheck: max shadow mem size: %ldk, %ldM\n",
6231 max_shmem_szB / 1024, max_shmem_szB / (1024 * 1024));
6232
6233 if (MC_(clo_mc_level) >= 3) {
6234 VG_(message)(Vg_DebugMsg,
6235 " ocacheL1: %'12lu refs %'12lu misses (%'lu lossage)\n",
6236 stats_ocacheL1_find,
6237 stats_ocacheL1_misses,
6238 stats_ocacheL1_lossage );
6239 VG_(message)(Vg_DebugMsg,
6240 " ocacheL1: %'12lu at 0 %'12lu at 1\n",
6241 stats_ocacheL1_find - stats_ocacheL1_misses
6242 - stats_ocacheL1_found_at_1
6243 - stats_ocacheL1_found_at_N,
6244 stats_ocacheL1_found_at_1 );
6245 VG_(message)(Vg_DebugMsg,
6246 " ocacheL1: %'12lu at 2+ %'12lu move-fwds\n",
6247 stats_ocacheL1_found_at_N,
6248 stats_ocacheL1_movefwds );
6249 VG_(message)(Vg_DebugMsg,
6250 " ocacheL1: %'12lu sizeB %'12u useful\n",
6251 (UWord)sizeof(OCache),
6252 4 * OC_W32S_PER_LINE * OC_LINES_PER_SET * OC_N_SETS );
6253 VG_(message)(Vg_DebugMsg,
6254 " ocacheL2: %'12lu refs %'12lu misses\n",
6255 stats__ocacheL2_refs,
6256 stats__ocacheL2_misses );
6257 VG_(message)(Vg_DebugMsg,
6258 " ocacheL2: %'9lu max nodes %'9lu curr nodes\n",
6259 stats__ocacheL2_n_nodes_max,
6260 stats__ocacheL2_n_nodes );
6261 VG_(message)(Vg_DebugMsg,
6262 " niacache: %'12lu refs %'12lu misses\n",
6263 stats__nia_cache_queries, stats__nia_cache_misses);
6264 } else {
6265 tl_assert(ocacheL1 == NULL);
6266 tl_assert(ocacheL2 == NULL);
6267 }
6268 }
6269
6270 if (0) {
6271 VG_(message)(Vg_DebugMsg,
6272 "------ Valgrind's client block stats follow ---------------\n" );
6273 show_client_block_stats();
6274 }
6275 }
6276
6277 /* mark the given addr/len unaddressable for watchpoint implementation
6278 The PointKind will be handled at access time */
mc_mark_unaddressable_for_watchpoint(PointKind kind,Bool insert,Addr addr,SizeT len)6279 static Bool mc_mark_unaddressable_for_watchpoint (PointKind kind, Bool insert,
6280 Addr addr, SizeT len)
6281 {
6282 /* GDBTD this is somewhat fishy. We might rather have to save the previous
6283 accessibility and definedness in gdbserver so as to allow restoring it
6284 properly. Currently, we assume that the user only watches things
6285 which are properly addressable and defined */
6286 if (insert)
6287 MC_(make_mem_noaccess) (addr, len);
6288 else
6289 MC_(make_mem_defined) (addr, len);
6290 return True;
6291 }
6292
mc_pre_clo_init(void)6293 static void mc_pre_clo_init(void)
6294 {
6295 VG_(details_name) ("Memcheck");
6296 VG_(details_version) (NULL);
6297 VG_(details_description) ("a memory error detector");
6298 VG_(details_copyright_author)(
6299 "Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.");
6300 VG_(details_bug_reports_to) (VG_BUGS_TO);
6301 VG_(details_avg_translation_sizeB) ( 640 );
6302
6303 VG_(basic_tool_funcs) (mc_post_clo_init,
6304 MC_(instrument),
6305 mc_fini);
6306
6307 VG_(needs_final_IR_tidy_pass) ( MC_(final_tidy) );
6308
6309
6310 VG_(needs_core_errors) ();
6311 VG_(needs_tool_errors) (MC_(eq_Error),
6312 MC_(before_pp_Error),
6313 MC_(pp_Error),
6314 True,/*show TIDs for errors*/
6315 MC_(update_Error_extra),
6316 MC_(is_recognised_suppression),
6317 MC_(read_extra_suppression_info),
6318 MC_(error_matches_suppression),
6319 MC_(get_error_name),
6320 MC_(get_extra_suppression_info));
6321 VG_(needs_libc_freeres) ();
6322 VG_(needs_command_line_options)(mc_process_cmd_line_options,
6323 mc_print_usage,
6324 mc_print_debug_usage);
6325 VG_(needs_client_requests) (mc_handle_client_request);
6326 VG_(needs_sanity_checks) (mc_cheap_sanity_check,
6327 mc_expensive_sanity_check);
6328 VG_(needs_malloc_replacement) (MC_(malloc),
6329 MC_(__builtin_new),
6330 MC_(__builtin_vec_new),
6331 MC_(memalign),
6332 MC_(calloc),
6333 MC_(free),
6334 MC_(__builtin_delete),
6335 MC_(__builtin_vec_delete),
6336 MC_(realloc),
6337 MC_(malloc_usable_size),
6338 MC_MALLOC_DEFAULT_REDZONE_SZB );
6339 MC_(Malloc_Redzone_SzB) = VG_(malloc_effective_client_redzone_size)();
6340
6341 VG_(needs_xml_output) ();
6342
6343 VG_(track_new_mem_startup) ( mc_new_mem_startup );
6344 VG_(track_new_mem_stack_signal)( make_mem_undefined_w_tid );
6345 // We assume that brk()/sbrk() does not initialise new memory. Is this
6346 // accurate? John Reiser says:
6347 //
6348 // 0) sbrk() can *decrease* process address space. No zero fill is done
6349 // for a decrease, not even the fragment on the high end of the last page
6350 // that is beyond the new highest address. For maximum safety and
6351 // portability, then the bytes in the last page that reside above [the
6352 // new] sbrk(0) should be considered to be uninitialized, but in practice
6353 // it is exceedingly likely that they will retain their previous
6354 // contents.
6355 //
6356 // 1) If an increase is large enough to require new whole pages, then
6357 // those new whole pages (like all new pages) are zero-filled by the
6358 // operating system. So if sbrk(0) already is page aligned, then
6359 // sbrk(PAGE_SIZE) *does* zero-fill the new memory.
6360 //
6361 // 2) Any increase that lies within an existing allocated page is not
6362 // changed. So if (x = sbrk(0)) is not page aligned, then
6363 // sbrk(PAGE_SIZE) yields ((PAGE_SIZE -1) & -x) bytes which keep their
6364 // existing contents, and an additional PAGE_SIZE bytes which are zeroed.
6365 // ((PAGE_SIZE -1) & x) of them are "covered" by the sbrk(), and the rest
6366 // of them come along for the ride because the operating system deals
6367 // only in whole pages. Again, for maximum safety and portability, then
6368 // anything that lives above [the new] sbrk(0) should be considered
6369 // uninitialized, but in practice will retain previous contents [zero in
6370 // this case.]"
6371 //
6372 // In short:
6373 //
6374 // A key property of sbrk/brk is that new whole pages that are supplied
6375 // by the operating system *do* get initialized to zero.
6376 //
6377 // As for the portability of all this:
6378 //
6379 // sbrk and brk are not POSIX. However, any system that is a derivative
6380 // of *nix has sbrk and brk because there are too many softwares (such as
6381 // the Bourne shell) which rely on the traditional memory map (.text,
6382 // .data+.bss, stack) and the existence of sbrk/brk.
6383 //
6384 // So we should arguably observe all this. However:
6385 // - The current inaccuracy has caused maybe one complaint in seven years(?)
6386 // - Relying on the zeroed-ness of whole brk'd pages is pretty grotty... I
6387 // doubt most programmers know the above information.
6388 // So I'm not terribly unhappy with marking it as undefined. --njn.
6389 //
6390 // [More: I think most of what John said only applies to sbrk(). It seems
6391 // that brk() always deals in whole pages. And since this event deals
6392 // directly with brk(), not with sbrk(), perhaps it would be reasonable to
6393 // just mark all memory it allocates as defined.]
6394 //
6395 VG_(track_new_mem_brk) ( make_mem_undefined_w_tid );
6396
6397 // Handling of mmap and mprotect isn't simple (well, it is simple,
6398 // but the justification isn't.) See comments above, just prior to
6399 // mc_new_mem_mmap.
6400 VG_(track_new_mem_mmap) ( mc_new_mem_mmap );
6401 VG_(track_change_mem_mprotect) ( mc_new_mem_mprotect );
6402
6403 VG_(track_copy_mem_remap) ( MC_(copy_address_range_state) );
6404
6405 VG_(track_die_mem_stack_signal)( MC_(make_mem_noaccess) );
6406 VG_(track_die_mem_brk) ( MC_(make_mem_noaccess) );
6407 VG_(track_die_mem_munmap) ( MC_(make_mem_noaccess) );
6408
6409 /* Defer the specification of the new_mem_stack functions to the
6410 post_clo_init function, since we need to first parse the command
6411 line before deciding which set to use. */
6412
6413 # ifdef PERF_FAST_STACK
6414 VG_(track_die_mem_stack_4) ( mc_die_mem_stack_4 );
6415 VG_(track_die_mem_stack_8) ( mc_die_mem_stack_8 );
6416 VG_(track_die_mem_stack_12) ( mc_die_mem_stack_12 );
6417 VG_(track_die_mem_stack_16) ( mc_die_mem_stack_16 );
6418 VG_(track_die_mem_stack_32) ( mc_die_mem_stack_32 );
6419 VG_(track_die_mem_stack_112) ( mc_die_mem_stack_112 );
6420 VG_(track_die_mem_stack_128) ( mc_die_mem_stack_128 );
6421 VG_(track_die_mem_stack_144) ( mc_die_mem_stack_144 );
6422 VG_(track_die_mem_stack_160) ( mc_die_mem_stack_160 );
6423 # endif
6424 VG_(track_die_mem_stack) ( mc_die_mem_stack );
6425
6426 VG_(track_ban_mem_stack) ( MC_(make_mem_noaccess) );
6427
6428 VG_(track_pre_mem_read) ( check_mem_is_defined );
6429 VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
6430 VG_(track_pre_mem_write) ( check_mem_is_addressable );
6431 VG_(track_post_mem_write) ( mc_post_mem_write );
6432
6433 VG_(track_post_reg_write) ( mc_post_reg_write );
6434 VG_(track_post_reg_write_clientcall_return)( mc_post_reg_write_clientcall );
6435
6436 VG_(needs_watchpoint) ( mc_mark_unaddressable_for_watchpoint );
6437
6438 init_shadow_memory();
6439 MC_(chunk_poolalloc) = VG_(newPA) (sizeof(MC_Chunk),
6440 1000,
6441 VG_(malloc),
6442 "mc.cMC.1 (MC_Chunk pools)",
6443 VG_(free));
6444 MC_(malloc_list) = VG_(HT_construct)( "MC_(malloc_list)" );
6445 MC_(mempool_list) = VG_(HT_construct)( "MC_(mempool_list)" );
6446 init_prof_mem();
6447
6448 tl_assert( mc_expensive_sanity_check() );
6449
6450 // {LOADV,STOREV}[8421] will all fail horribly if this isn't true.
6451 tl_assert(sizeof(UWord) == sizeof(Addr));
6452 // Call me paranoid. I don't care.
6453 tl_assert(sizeof(void*) == sizeof(Addr));
6454
6455 // BYTES_PER_SEC_VBIT_NODE must be a power of two.
6456 tl_assert(-1 != VG_(log2)(BYTES_PER_SEC_VBIT_NODE));
6457
6458 /* This is small. Always initialise it. */
6459 init_nia_to_ecu_cache();
6460
6461 /* We can't initialise ocacheL1/ocacheL2 yet, since we don't know
6462 if we need to, since the command line args haven't been
6463 processed yet. Hence defer it to mc_post_clo_init. */
6464 tl_assert(ocacheL1 == NULL);
6465 tl_assert(ocacheL2 == NULL);
6466
6467 /* Check some important stuff. See extensive comments above
6468 re UNALIGNED_OR_HIGH for background. */
6469 # if VG_WORDSIZE == 4
6470 tl_assert(sizeof(void*) == 4);
6471 tl_assert(sizeof(Addr) == 4);
6472 tl_assert(sizeof(UWord) == 4);
6473 tl_assert(sizeof(Word) == 4);
6474 tl_assert(MAX_PRIMARY_ADDRESS == 0xFFFFFFFFUL);
6475 tl_assert(MASK(1) == 0UL);
6476 tl_assert(MASK(2) == 1UL);
6477 tl_assert(MASK(4) == 3UL);
6478 tl_assert(MASK(8) == 7UL);
6479 # else
6480 tl_assert(VG_WORDSIZE == 8);
6481 tl_assert(sizeof(void*) == 8);
6482 tl_assert(sizeof(Addr) == 8);
6483 tl_assert(sizeof(UWord) == 8);
6484 tl_assert(sizeof(Word) == 8);
6485 tl_assert(MAX_PRIMARY_ADDRESS == 0x7FFFFFFFFULL);
6486 tl_assert(MASK(1) == 0xFFFFFFF800000000ULL);
6487 tl_assert(MASK(2) == 0xFFFFFFF800000001ULL);
6488 tl_assert(MASK(4) == 0xFFFFFFF800000003ULL);
6489 tl_assert(MASK(8) == 0xFFFFFFF800000007ULL);
6490 # endif
6491 }
6492
6493 VG_DETERMINE_INTERFACE_VERSION(mc_pre_clo_init)
6494
6495 /*--------------------------------------------------------------------*/
6496 /*--- end mc_main.c ---*/
6497 /*--------------------------------------------------------------------*/
6498