1
2 //--------------------------------------------------------------------*/
3 //--- DHAT: a Dynamic Heap Analysis Tool dh_main.c ---*/
4 //--------------------------------------------------------------------*/
5
6 /*
7 This file is part of DHAT, a Valgrind tool for profiling the
8 heap usage of programs.
9
10 Copyright (C) 2010-2013 Mozilla Inc
11
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of the
15 License, or (at your option) any later version.
16
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25 02111-1307, USA.
26
27 The GNU General Public License is contained in the file COPYING.
28 */
29
30 /* Contributed by Julian Seward <jseward@acm.org> */
31
32
33 #include "pub_tool_basics.h"
34 #include "pub_tool_libcbase.h"
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
38 #include "pub_tool_mallocfree.h"
39 #include "pub_tool_options.h"
40 #include "pub_tool_replacemalloc.h"
41 #include "pub_tool_tooliface.h"
42 #include "pub_tool_wordfm.h"
43
44 #define HISTOGRAM_SIZE_LIMIT 1024
45
46
47 //------------------------------------------------------------//
48 //--- Globals ---//
49 //------------------------------------------------------------//
50
51 // Number of guest instructions executed so far. This is
52 // incremented directly from the generated code.
53 static ULong g_guest_instrs_executed = 0;
54
55 // Summary statistics for the entire run.
56 static ULong g_tot_blocks = 0; // total blocks allocated
57 static ULong g_tot_bytes = 0; // total bytes allocated
58
59 static ULong g_cur_blocks_live = 0; // curr # blocks live
60 static ULong g_cur_bytes_live = 0; // curr # bytes live
61
62 static ULong g_max_blocks_live = 0; // bytes and blocks at
63 static ULong g_max_bytes_live = 0; // the max residency point
64
65
66 //------------------------------------------------------------//
67 //--- an Interval Tree of live blocks ---//
68 //------------------------------------------------------------//
69
70 /* Tracks information about live blocks. */
71 typedef
72 struct {
73 Addr payload;
74 SizeT req_szB;
75 ExeContext* ap; /* allocation ec */
76 ULong allocd_at; /* instruction number */
77 ULong n_reads;
78 ULong n_writes;
79 /* Approx histogram, one byte per payload byte. Counts latch up
80 therefore at 0xFFFF. Can be NULL if the block is resized or if
81 the block is larger than HISTOGRAM_SIZE_LIMIT. */
82 UShort* histoW; /* [0 .. req_szB-1] */
83 }
84 Block;
85
86 /* May not contain zero-sized blocks. May not contain
87 overlapping blocks. */
88 static WordFM* interval_tree = NULL; /* WordFM* Block* void */
89
90 /* Here's the comparison function. Since the tree is required
91 to contain non-zero sized, non-overlapping blocks, it's good
92 enough to consider any overlap as a match. */
interval_tree_Cmp(UWord k1,UWord k2)93 static Word interval_tree_Cmp ( UWord k1, UWord k2 )
94 {
95 Block* b1 = (Block*)k1;
96 Block* b2 = (Block*)k2;
97 tl_assert(b1->req_szB > 0);
98 tl_assert(b2->req_szB > 0);
99 if (b1->payload + b1->req_szB <= b2->payload) return -1;
100 if (b2->payload + b2->req_szB <= b1->payload) return 1;
101 return 0;
102 }
103
104 // 2-entry cache for find_Block_containing
105 static Block* fbc_cache0 = NULL;
106 static Block* fbc_cache1 = NULL;
107
108 static UWord stats__n_fBc_cached = 0;
109 static UWord stats__n_fBc_uncached = 0;
110 static UWord stats__n_fBc_notfound = 0;
111
find_Block_containing(Addr a)112 static Block* find_Block_containing ( Addr a )
113 {
114 if (LIKELY(fbc_cache0
115 && fbc_cache0->payload <= a
116 && a < fbc_cache0->payload + fbc_cache0->req_szB)) {
117 // found at 0
118 stats__n_fBc_cached++;
119 return fbc_cache0;
120 }
121 if (LIKELY(fbc_cache1
122 && fbc_cache1->payload <= a
123 && a < fbc_cache1->payload + fbc_cache1->req_szB)) {
124 // found at 1; swap 0 and 1
125 Block* tmp = fbc_cache0;
126 fbc_cache0 = fbc_cache1;
127 fbc_cache1 = tmp;
128 stats__n_fBc_cached++;
129 return fbc_cache0;
130 }
131 Block fake;
132 fake.payload = a;
133 fake.req_szB = 1;
134 UWord foundkey = 1;
135 UWord foundval = 1;
136 Bool found = VG_(lookupFM)( interval_tree,
137 &foundkey, &foundval, (UWord)&fake );
138 if (!found) {
139 stats__n_fBc_notfound++;
140 return NULL;
141 }
142 tl_assert(foundval == 0); // we don't store vals in the interval tree
143 tl_assert(foundkey != 1);
144 Block* res = (Block*)foundkey;
145 tl_assert(res != &fake);
146 // put at the top position
147 fbc_cache1 = fbc_cache0;
148 fbc_cache0 = res;
149 stats__n_fBc_uncached++;
150 return res;
151 }
152
153 // delete a block; asserts if not found. (viz, 'a' must be
154 // known to be present.)
delete_Block_starting_at(Addr a)155 static void delete_Block_starting_at ( Addr a )
156 {
157 Block fake;
158 fake.payload = a;
159 fake.req_szB = 1;
160 Bool found = VG_(delFromFM)( interval_tree,
161 NULL, NULL, (Addr)&fake );
162 tl_assert(found);
163 fbc_cache0 = fbc_cache1 = NULL;
164 }
165
166
167 //------------------------------------------------------------//
168 //--- a FM of allocation points (APs) ---//
169 //------------------------------------------------------------//
170
171 typedef
172 struct {
173 // the allocation point that we're summarising stats for
174 ExeContext* ap;
175 // used when printing results
176 Bool shown;
177 // The current number of blocks and bytes live for this AP
178 ULong cur_blocks_live;
179 ULong cur_bytes_live;
180 // The number of blocks and bytes live at the max-liveness
181 // point. Note this is a bit subtle. max_blocks_live is not
182 // the maximum number of live blocks, but rather the number of
183 // blocks live at the point of maximum byte liveness. These are
184 // not necessarily the same thing.
185 ULong max_blocks_live;
186 ULong max_bytes_live;
187 // Total number of blocks and bytes allocated by this AP.
188 ULong tot_blocks;
189 ULong tot_bytes;
190 // Sum of death ages for all blocks allocated by this AP,
191 // that have subsequently been freed.
192 ULong death_ages_sum;
193 ULong deaths;
194 // Total number of reads and writes in all blocks allocated
195 // by this AP.
196 ULong n_reads;
197 ULong n_writes;
198 /* Histogram information. We maintain a histogram aggregated for
199 all retiring Blocks allocated by this AP, but only if:
200 - this AP has only ever allocated objects of one size
201 - that size is <= HISTOGRAM_SIZE_LIMIT
202 What we need therefore is a mechanism to see if this AP
203 has only ever allocated blocks of one size.
204
205 3 states:
206 Unknown because no retirement yet
207 Exactly xsize all retiring blocks are of this size
208 Mixed multiple different sizes seen
209 */
210 enum { Unknown=999, Exactly, Mixed } xsize_tag;
211 SizeT xsize;
212 UInt* histo; /* [0 .. xsize-1] */
213 }
214 APInfo;
215
216 /* maps ExeContext*'s to APInfo*'s. Note that the keys must match the
217 .ap field in the values. */
218 static WordFM* apinfo = NULL; /* WordFM* ExeContext* APInfo* */
219
220
221 /* 'bk' is being introduced (has just been allocated). Find the
222 relevant APInfo entry for it, or create one, based on the block's
223 allocation EC. Then, update the APInfo to the extent that we
224 actually can, to reflect the allocation. */
intro_Block(Block * bk)225 static void intro_Block ( Block* bk )
226 {
227 tl_assert(bk);
228 tl_assert(bk->ap);
229
230 APInfo* api = NULL;
231 UWord keyW = 0;
232 UWord valW = 0;
233 Bool found = VG_(lookupFM)( apinfo,
234 &keyW, &valW, (UWord)bk->ap );
235 if (found) {
236 api = (APInfo*)valW;
237 tl_assert(keyW == (UWord)bk->ap);
238 } else {
239 api = VG_(malloc)( "dh.main.intro_Block.1", sizeof(APInfo) );
240 VG_(memset)(api, 0, sizeof(*api));
241 api->ap = bk->ap;
242 Bool present = VG_(addToFM)( apinfo,
243 (UWord)bk->ap, (UWord)api );
244 tl_assert(!present);
245 // histo stuff
246 tl_assert(api->deaths == 0);
247 api->xsize_tag = Unknown;
248 api->xsize = 0;
249 if (0) VG_(printf)("api %p --> Unknown\n", api);
250 }
251
252 tl_assert(api->ap == bk->ap);
253
254 /* So: update stats to reflect an allocation */
255
256 // # live blocks
257 api->cur_blocks_live++;
258
259 // # live bytes
260 api->cur_bytes_live += bk->req_szB;
261 if (api->cur_bytes_live > api->max_bytes_live) {
262 api->max_bytes_live = api->cur_bytes_live;
263 api->max_blocks_live = api->cur_blocks_live;
264 }
265
266 // total blocks and bytes allocated here
267 api->tot_blocks++;
268 api->tot_bytes += bk->req_szB;
269
270 // update summary globals
271 g_tot_blocks++;
272 g_tot_bytes += bk->req_szB;
273
274 g_cur_blocks_live++;
275 g_cur_bytes_live += bk->req_szB;
276 if (g_cur_bytes_live > g_max_bytes_live) {
277 g_max_bytes_live = g_cur_bytes_live;
278 g_max_blocks_live = g_cur_blocks_live;
279 }
280 }
281
282
283 /* 'bk' is retiring (being freed). Find the relevant APInfo entry for
284 it, which must already exist. Then, fold info from 'bk' into that
285 entry. 'because_freed' is True if the block is retiring because
286 the client has freed it. If it is False then the block is retiring
287 because the program has finished, in which case we want to skip the
288 updates of the total blocks live etc for this AP, but still fold in
289 the access counts and histo data that have so far accumulated for
290 the block. */
retire_Block(Block * bk,Bool because_freed)291 static void retire_Block ( Block* bk, Bool because_freed )
292 {
293 tl_assert(bk);
294 tl_assert(bk->ap);
295
296 APInfo* api = NULL;
297 UWord keyW = 0;
298 UWord valW = 0;
299 Bool found = VG_(lookupFM)( apinfo,
300 &keyW, &valW, (UWord)bk->ap );
301
302 tl_assert(found);
303 api = (APInfo*)valW;
304 tl_assert(api->ap == bk->ap);
305
306 // update stats following this free.
307 if (0)
308 VG_(printf)("ec %p api->c_by_l %llu bk->rszB %llu\n",
309 bk->ap, api->cur_bytes_live, (ULong)bk->req_szB);
310
311 // update total blocks live etc for this AP
312 if (because_freed) {
313 tl_assert(api->cur_blocks_live >= 1);
314 tl_assert(api->cur_bytes_live >= bk->req_szB);
315 api->cur_blocks_live--;
316 api->cur_bytes_live -= bk->req_szB;
317
318 api->deaths++;
319
320 tl_assert(bk->allocd_at <= g_guest_instrs_executed);
321 api->death_ages_sum += (g_guest_instrs_executed - bk->allocd_at);
322
323 // update global summary stats
324 tl_assert(g_cur_blocks_live > 0);
325 g_cur_blocks_live--;
326 tl_assert(g_cur_bytes_live >= bk->req_szB);
327 g_cur_bytes_live -= bk->req_szB;
328 }
329
330 // access counts
331 api->n_reads += bk->n_reads;
332 api->n_writes += bk->n_writes;
333
334 // histo stuff. First, do state transitions for xsize/xsize_tag.
335 switch (api->xsize_tag) {
336
337 case Unknown:
338 tl_assert(api->xsize == 0);
339 tl_assert(api->deaths == 1 || api->deaths == 0);
340 tl_assert(!api->histo);
341 api->xsize_tag = Exactly;
342 api->xsize = bk->req_szB;
343 if (0) VG_(printf)("api %p --> Exactly(%lu)\n", api, api->xsize);
344 // and allocate the histo
345 if (bk->histoW) {
346 api->histo = VG_(malloc)("dh.main.retire_Block.1",
347 api->xsize * sizeof(UInt));
348 VG_(memset)(api->histo, 0, api->xsize * sizeof(UInt));
349 }
350 break;
351
352 case Exactly:
353 //tl_assert(api->deaths > 1);
354 if (bk->req_szB != api->xsize) {
355 if (0) VG_(printf)("api %p --> Mixed(%lu -> %lu)\n",
356 api, api->xsize, bk->req_szB);
357 api->xsize_tag = Mixed;
358 api->xsize = 0;
359 // deallocate the histo, if any
360 if (api->histo) {
361 VG_(free)(api->histo);
362 api->histo = NULL;
363 }
364 }
365 break;
366
367 case Mixed:
368 //tl_assert(api->deaths > 1);
369 break;
370
371 default:
372 tl_assert(0);
373 }
374
375 // See if we can fold the histo data from this block into
376 // the data for the AP
377 if (api->xsize_tag == Exactly && api->histo && bk->histoW) {
378 tl_assert(api->xsize == bk->req_szB);
379 UWord i;
380 for (i = 0; i < api->xsize; i++) {
381 // FIXME: do something better in case of overflow of api->histo[..]
382 // Right now, at least don't let it overflow/wrap around
383 if (api->histo[i] <= 0xFFFE0000)
384 api->histo[i] += (UInt)bk->histoW[i];
385 }
386 if (0) VG_(printf)("fold in, AP = %p\n", api);
387 }
388
389
390
391 #if 0
392 if (bk->histoB) {
393 VG_(printf)("block retiring, histo %lu: ", bk->req_szB);
394 UWord i;
395 for (i = 0; i < bk->req_szB; i++)
396 VG_(printf)("%u ", (UInt)bk->histoB[i]);
397 VG_(printf)("\n");
398 } else {
399 VG_(printf)("block retiring, no histo %lu\n", bk->req_szB);
400 }
401 #endif
402 }
403
404 /* This handles block resizing. When a block with AP 'ec' has a
405 size change of 'delta', call here to update the APInfo. */
apinfo_change_cur_bytes_live(ExeContext * ec,Long delta)406 static void apinfo_change_cur_bytes_live( ExeContext* ec, Long delta )
407 {
408 APInfo* api = NULL;
409 UWord keyW = 0;
410 UWord valW = 0;
411 Bool found = VG_(lookupFM)( apinfo,
412 &keyW, &valW, (UWord)ec );
413
414 tl_assert(found);
415 api = (APInfo*)valW;
416 tl_assert(api->ap == ec);
417
418 if (delta < 0) {
419 tl_assert(api->cur_bytes_live >= -delta);
420 tl_assert(g_cur_bytes_live >= -delta);
421 }
422
423 // adjust current live size
424 api->cur_bytes_live += delta;
425 g_cur_bytes_live += delta;
426
427 if (delta > 0 && api->cur_bytes_live > api->max_bytes_live) {
428 api->max_bytes_live = api->cur_bytes_live;
429 api->max_blocks_live = api->cur_blocks_live;
430 }
431
432 // update global summary stats
433 if (delta > 0 && g_cur_bytes_live > g_max_bytes_live) {
434 g_max_bytes_live = g_cur_bytes_live;
435 g_max_blocks_live = g_cur_blocks_live;
436 }
437 if (delta > 0)
438 g_tot_bytes += delta;
439
440 // adjust total allocation size
441 if (delta > 0)
442 api->tot_bytes += delta;
443 }
444
445
446 //------------------------------------------------------------//
447 //--- update both Block and APInfos after {m,re}alloc/free ---//
448 //------------------------------------------------------------//
449
450 static
new_block(ThreadId tid,void * p,SizeT req_szB,SizeT req_alignB,Bool is_zeroed)451 void* new_block ( ThreadId tid, void* p, SizeT req_szB, SizeT req_alignB,
452 Bool is_zeroed )
453 {
454 tl_assert(p == NULL); // don't handle custom allocators right now
455 SizeT actual_szB /*, slop_szB*/;
456
457 if ((SSizeT)req_szB < 0) return NULL;
458
459 if (req_szB == 0)
460 req_szB = 1; /* can't allow zero-sized blocks in the interval tree */
461
462 // Allocate and zero if necessary
463 if (!p) {
464 p = VG_(cli_malloc)( req_alignB, req_szB );
465 if (!p) {
466 return NULL;
467 }
468 if (is_zeroed) VG_(memset)(p, 0, req_szB);
469 actual_szB = VG_(malloc_usable_size)(p);
470 tl_assert(actual_szB >= req_szB);
471 /* slop_szB = actual_szB - req_szB; */
472 } else {
473 /* slop_szB = 0; */
474 }
475
476 // Make new HP_Chunk node, add to malloc_list
477 Block* bk = VG_(malloc)("dh.new_block.1", sizeof(Block));
478 bk->payload = (Addr)p;
479 bk->req_szB = req_szB;
480 bk->ap = VG_(record_ExeContext)(tid, 0/*first word delta*/);
481 bk->allocd_at = g_guest_instrs_executed;
482 bk->n_reads = 0;
483 bk->n_writes = 0;
484 // set up histogram array, if the block isn't too large
485 bk->histoW = NULL;
486 if (req_szB <= HISTOGRAM_SIZE_LIMIT) {
487 bk->histoW = VG_(malloc)("dh.new_block.2", req_szB * sizeof(UShort));
488 VG_(memset)(bk->histoW, 0, req_szB * sizeof(UShort));
489 }
490
491 Bool present = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
492 tl_assert(!present);
493 fbc_cache0 = fbc_cache1 = NULL;
494
495 intro_Block(bk);
496
497 if (0) VG_(printf)("ALLOC %ld -> %p\n", req_szB, p);
498
499 return p;
500 }
501
502 static
die_block(void * p,Bool custom_free)503 void die_block ( void* p, Bool custom_free )
504 {
505 tl_assert(!custom_free); // at least for now
506
507 Block* bk = find_Block_containing( (Addr)p );
508
509 if (!bk) {
510 return; // bogus free
511 }
512
513 tl_assert(bk->req_szB > 0);
514 // assert the block finder is behaving sanely
515 tl_assert(bk->payload <= (Addr)p);
516 tl_assert( (Addr)p < bk->payload + bk->req_szB );
517
518 if (bk->payload != (Addr)p) {
519 return; // bogus free
520 }
521
522 if (0) VG_(printf)(" FREE %p %llu\n",
523 p, g_guest_instrs_executed - bk->allocd_at);
524
525 retire_Block(bk, True/*because_freed*/);
526
527 VG_(cli_free)( (void*)bk->payload );
528 delete_Block_starting_at( bk->payload );
529 if (bk->histoW) {
530 VG_(free)( bk->histoW );
531 bk->histoW = NULL;
532 }
533 VG_(free)( bk );
534 }
535
536
537 static
renew_block(ThreadId tid,void * p_old,SizeT new_req_szB)538 void* renew_block ( ThreadId tid, void* p_old, SizeT new_req_szB )
539 {
540 if (0) VG_(printf)("REALL %p %ld\n", p_old, new_req_szB);
541 void* p_new = NULL;
542
543 tl_assert(new_req_szB > 0); // map 0 to 1
544
545 // Find the old block.
546 Block* bk = find_Block_containing( (Addr)p_old );
547 if (!bk) {
548 return NULL; // bogus realloc
549 }
550
551 tl_assert(bk->req_szB > 0);
552 // assert the block finder is behaving sanely
553 tl_assert(bk->payload <= (Addr)p_old);
554 tl_assert( (Addr)p_old < bk->payload + bk->req_szB );
555
556 if (bk->payload != (Addr)p_old) {
557 return NULL; // bogus realloc
558 }
559
560 // Keeping the histogram alive in any meaningful way across
561 // block resizing is too darn complicated. Just throw it away.
562 if (bk->histoW) {
563 VG_(free)(bk->histoW);
564 bk->histoW = NULL;
565 }
566
567 // Actually do the allocation, if necessary.
568 if (new_req_szB <= bk->req_szB) {
569
570 // New size is smaller or same; block not moved.
571 apinfo_change_cur_bytes_live(bk->ap,
572 (Long)new_req_szB - (Long)bk->req_szB);
573 bk->req_szB = new_req_szB;
574 return p_old;
575
576 } else {
577
578 // New size is bigger; make new block, copy shared contents, free old.
579 p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
580 if (!p_new) {
581 // Nb: if realloc fails, NULL is returned but the old block is not
582 // touched. What an awful function.
583 return NULL;
584 }
585 tl_assert(p_new != p_old);
586
587 VG_(memcpy)(p_new, p_old, bk->req_szB);
588 VG_(cli_free)(p_old);
589
590 // Since the block has moved, we need to re-insert it into the
591 // interval tree at the new place. Do this by removing
592 // and re-adding it.
593 delete_Block_starting_at( (Addr)p_old );
594 // now 'bk' is no longer in the tree, but the Block itself
595 // is still alive
596
597 // Update the metadata.
598 apinfo_change_cur_bytes_live(bk->ap,
599 (Long)new_req_szB - (Long)bk->req_szB);
600 bk->payload = (Addr)p_new;
601 bk->req_szB = new_req_szB;
602
603 // and re-add
604 Bool present
605 = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
606 tl_assert(!present);
607 fbc_cache0 = fbc_cache1 = NULL;
608
609 return p_new;
610 }
611 /*NOTREACHED*/
612 tl_assert(0);
613 }
614
615
616 //------------------------------------------------------------//
617 //--- malloc() et al replacement wrappers ---//
618 //------------------------------------------------------------//
619
dh_malloc(ThreadId tid,SizeT szB)620 static void* dh_malloc ( ThreadId tid, SizeT szB )
621 {
622 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
623 }
624
dh___builtin_new(ThreadId tid,SizeT szB)625 static void* dh___builtin_new ( ThreadId tid, SizeT szB )
626 {
627 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
628 }
629
dh___builtin_vec_new(ThreadId tid,SizeT szB)630 static void* dh___builtin_vec_new ( ThreadId tid, SizeT szB )
631 {
632 return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
633 }
634
dh_calloc(ThreadId tid,SizeT m,SizeT szB)635 static void* dh_calloc ( ThreadId tid, SizeT m, SizeT szB )
636 {
637 return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True );
638 }
639
dh_memalign(ThreadId tid,SizeT alignB,SizeT szB)640 static void *dh_memalign ( ThreadId tid, SizeT alignB, SizeT szB )
641 {
642 return new_block( tid, NULL, szB, alignB, False );
643 }
644
dh_free(ThreadId tid,void * p)645 static void dh_free ( ThreadId tid __attribute__((unused)), void* p )
646 {
647 die_block( p, /*custom_free*/False );
648 }
649
dh___builtin_delete(ThreadId tid,void * p)650 static void dh___builtin_delete ( ThreadId tid, void* p )
651 {
652 die_block( p, /*custom_free*/False);
653 }
654
dh___builtin_vec_delete(ThreadId tid,void * p)655 static void dh___builtin_vec_delete ( ThreadId tid, void* p )
656 {
657 die_block( p, /*custom_free*/False );
658 }
659
dh_realloc(ThreadId tid,void * p_old,SizeT new_szB)660 static void* dh_realloc ( ThreadId tid, void* p_old, SizeT new_szB )
661 {
662 if (p_old == NULL) {
663 return dh_malloc(tid, new_szB);
664 }
665 if (new_szB == 0) {
666 dh_free(tid, p_old);
667 return NULL;
668 }
669 return renew_block(tid, p_old, new_szB);
670 }
671
dh_malloc_usable_size(ThreadId tid,void * p)672 static SizeT dh_malloc_usable_size ( ThreadId tid, void* p )
673 {
674 tl_assert(0);
675 //zz HP_Chunk* hc = VG_(HT_lookup)( malloc_list, (UWord)p );
676 //zz
677 //zz return ( hc ? hc->req_szB + hc->slop_szB : 0 );
678 }
679
680 //------------------------------------------------------------//
681 //--- memory references ---//
682 //------------------------------------------------------------//
683
684 static
inc_histo_for_block(Block * bk,Addr addr,UWord szB)685 void inc_histo_for_block ( Block* bk, Addr addr, UWord szB )
686 {
687 UWord i, offMin, offMax1;
688 offMin = addr - bk->payload;
689 tl_assert(offMin < bk->req_szB);
690 offMax1 = offMin + szB;
691 if (offMax1 > bk->req_szB)
692 offMax1 = bk->req_szB;
693 //VG_(printf)("%lu %lu (size of block %lu)\n", offMin, offMax1, bk->req_szB);
694 for (i = offMin; i < offMax1; i++) {
695 UShort n = bk->histoW[i];
696 if (n < 0xFFFF) n++;
697 bk->histoW[i] = n;
698 }
699 }
700
701 static VG_REGPARM(2)
dh_handle_write(Addr addr,UWord szB)702 void dh_handle_write ( Addr addr, UWord szB )
703 {
704 Block* bk = find_Block_containing(addr);
705 if (bk) {
706 bk->n_writes += szB;
707 if (bk->histoW)
708 inc_histo_for_block(bk, addr, szB);
709 }
710 }
711
712 static VG_REGPARM(2)
dh_handle_read(Addr addr,UWord szB)713 void dh_handle_read ( Addr addr, UWord szB )
714 {
715 Block* bk = find_Block_containing(addr);
716 if (bk) {
717 bk->n_reads += szB;
718 if (bk->histoW)
719 inc_histo_for_block(bk, addr, szB);
720 }
721 }
722
723
724 // Handle reads and writes by syscalls (read == kernel
725 // reads user space, write == kernel writes user space).
726 // Assumes no such read or write spans a heap block
727 // boundary and so we can treat it just as one giant
728 // read or write.
729 static
dh_handle_noninsn_read(CorePart part,ThreadId tid,const HChar * s,Addr base,SizeT size)730 void dh_handle_noninsn_read ( CorePart part, ThreadId tid, const HChar* s,
731 Addr base, SizeT size )
732 {
733 switch (part) {
734 case Vg_CoreSysCall:
735 dh_handle_read(base, size);
736 break;
737 case Vg_CoreSysCallArgInMem:
738 break;
739 case Vg_CoreTranslate:
740 break;
741 default:
742 tl_assert(0);
743 }
744 }
745
746 static
dh_handle_noninsn_write(CorePart part,ThreadId tid,Addr base,SizeT size)747 void dh_handle_noninsn_write ( CorePart part, ThreadId tid,
748 Addr base, SizeT size )
749 {
750 switch (part) {
751 case Vg_CoreSysCall:
752 dh_handle_write(base, size);
753 break;
754 case Vg_CoreSignal:
755 break;
756 default:
757 tl_assert(0);
758 }
759 }
760
761
762 //------------------------------------------------------------//
763 //--- Instrumentation ---//
764 //------------------------------------------------------------//
765
766 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
767 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
768 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
769 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
770 #define assign(_t, _e) IRStmt_WrTmp((_t), (_e))
771
772 static
add_counter_update(IRSB * sbOut,Int n)773 void add_counter_update(IRSB* sbOut, Int n)
774 {
775 #if defined(VG_BIGENDIAN)
776 # define END Iend_BE
777 #elif defined(VG_LITTLEENDIAN)
778 # define END Iend_LE
779 #else
780 # error "Unknown endianness"
781 #endif
782 // Add code to increment 'g_guest_instrs_executed' by 'n', like this:
783 // WrTmp(t1, Load64(&g_guest_instrs_executed))
784 // WrTmp(t2, Add64(RdTmp(t1), Const(n)))
785 // Store(&g_guest_instrs_executed, t2)
786 IRTemp t1 = newIRTemp(sbOut->tyenv, Ity_I64);
787 IRTemp t2 = newIRTemp(sbOut->tyenv, Ity_I64);
788 IRExpr* counter_addr = mkIRExpr_HWord( (HWord)&g_guest_instrs_executed );
789
790 IRStmt* st1 = assign(t1, IRExpr_Load(END, Ity_I64, counter_addr));
791 IRStmt* st2 = assign(t2, binop(Iop_Add64, mkexpr(t1), mkU64(n)));
792 IRStmt* st3 = IRStmt_Store(END, counter_addr, mkexpr(t2));
793
794 addStmtToIRSB( sbOut, st1 );
795 addStmtToIRSB( sbOut, st2 );
796 addStmtToIRSB( sbOut, st3 );
797 }
798
799 static
addMemEvent(IRSB * sbOut,Bool isWrite,Int szB,IRExpr * addr,Int goff_sp)800 void addMemEvent(IRSB* sbOut, Bool isWrite, Int szB, IRExpr* addr,
801 Int goff_sp)
802 {
803 IRType tyAddr = Ity_INVALID;
804 const HChar* hName= NULL;
805 void* hAddr = NULL;
806 IRExpr** argv = NULL;
807 IRDirty* di = NULL;
808
809 const Int THRESH = 4096 * 4; // somewhat arbitrary
810 const Int rz_szB = VG_STACK_REDZONE_SZB;
811
812 tyAddr = typeOfIRExpr( sbOut->tyenv, addr );
813 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
814
815 if (isWrite) {
816 hName = "dh_handle_write";
817 hAddr = &dh_handle_write;
818 } else {
819 hName = "dh_handle_read";
820 hAddr = &dh_handle_read;
821 }
822
823 argv = mkIRExprVec_2( addr, mkIRExpr_HWord(szB) );
824
825 /* Add the helper. */
826 tl_assert(hName);
827 tl_assert(hAddr);
828 tl_assert(argv);
829 di = unsafeIRDirty_0_N( 2/*regparms*/,
830 hName, VG_(fnptr_to_fnentry)( hAddr ),
831 argv );
832
833 /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
834 some arbitrary N. If that fails then addr is in the range (SP -
835 RZ .. SP + N - RZ). If N is smallish (a page?) then we can say
836 addr is within a page of SP and so can't possibly be a heap
837 access, and so can be skipped. */
838 IRTemp sp = newIRTemp(sbOut->tyenv, tyAddr);
839 addStmtToIRSB( sbOut, assign(sp, IRExpr_Get(goff_sp, tyAddr)));
840
841 IRTemp sp_minus_rz = newIRTemp(sbOut->tyenv, tyAddr);
842 addStmtToIRSB(
843 sbOut,
844 assign(sp_minus_rz,
845 tyAddr == Ity_I32
846 ? binop(Iop_Sub32, mkexpr(sp), mkU32(rz_szB))
847 : binop(Iop_Sub64, mkexpr(sp), mkU64(rz_szB)))
848 );
849
850 IRTemp diff = newIRTemp(sbOut->tyenv, tyAddr);
851 addStmtToIRSB(
852 sbOut,
853 assign(diff,
854 tyAddr == Ity_I32
855 ? binop(Iop_Sub32, addr, mkexpr(sp_minus_rz))
856 : binop(Iop_Sub64, addr, mkexpr(sp_minus_rz)))
857 );
858
859 IRTemp guard = newIRTemp(sbOut->tyenv, Ity_I1);
860 addStmtToIRSB(
861 sbOut,
862 assign(guard,
863 tyAddr == Ity_I32
864 ? binop(Iop_CmpLT32U, mkU32(THRESH), mkexpr(diff))
865 : binop(Iop_CmpLT64U, mkU64(THRESH), mkexpr(diff)))
866 );
867 di->guard = mkexpr(guard);
868
869 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
870 }
871
872 static
dh_instrument(VgCallbackClosure * closure,IRSB * sbIn,VexGuestLayout * layout,VexGuestExtents * vge,VexArchInfo * archinfo_host,IRType gWordTy,IRType hWordTy)873 IRSB* dh_instrument ( VgCallbackClosure* closure,
874 IRSB* sbIn,
875 VexGuestLayout* layout,
876 VexGuestExtents* vge,
877 VexArchInfo* archinfo_host,
878 IRType gWordTy, IRType hWordTy )
879 {
880 Int i, n = 0;
881 IRSB* sbOut;
882 IRTypeEnv* tyenv = sbIn->tyenv;
883
884 const Int goff_sp = layout->offset_SP;
885
886 // We increment the instruction count in two places:
887 // - just before any Ist_Exit statements;
888 // - just before the IRSB's end.
889 // In the former case, we zero 'n' and then continue instrumenting.
890
891 sbOut = deepCopyIRSBExceptStmts(sbIn);
892
893 // Copy verbatim any IR preamble preceding the first IMark
894 i = 0;
895 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
896 addStmtToIRSB( sbOut, sbIn->stmts[i] );
897 i++;
898 }
899
900 for (/*use current i*/; i < sbIn->stmts_used; i++) {
901 IRStmt* st = sbIn->stmts[i];
902
903 if (!st || st->tag == Ist_NoOp) continue;
904
905 switch (st->tag) {
906
907 case Ist_IMark: {
908 n++;
909 break;
910 }
911
912 case Ist_Exit: {
913 if (n > 0) {
914 // Add an increment before the Exit statement, then reset 'n'.
915 add_counter_update(sbOut, n);
916 n = 0;
917 }
918 break;
919 }
920
921 case Ist_WrTmp: {
922 IRExpr* data = st->Ist.WrTmp.data;
923 if (data->tag == Iex_Load) {
924 IRExpr* aexpr = data->Iex.Load.addr;
925 // Note also, endianness info is ignored. I guess
926 // that's not interesting.
927 addMemEvent( sbOut, False/*!isWrite*/,
928 sizeofIRType(data->Iex.Load.ty),
929 aexpr, goff_sp );
930 }
931 break;
932 }
933
934 case Ist_Store: {
935 IRExpr* data = st->Ist.Store.data;
936 IRExpr* aexpr = st->Ist.Store.addr;
937 addMemEvent( sbOut, True/*isWrite*/,
938 sizeofIRType(typeOfIRExpr(tyenv, data)),
939 aexpr, goff_sp );
940 break;
941 }
942
943 case Ist_Dirty: {
944 Int dataSize;
945 IRDirty* d = st->Ist.Dirty.details;
946 if (d->mFx != Ifx_None) {
947 /* This dirty helper accesses memory. Collect the details. */
948 tl_assert(d->mAddr != NULL);
949 tl_assert(d->mSize != 0);
950 dataSize = d->mSize;
951 // Large (eg. 28B, 108B, 512B on x86) data-sized
952 // instructions will be done inaccurately, but they're
953 // very rare and this avoids errors from hitting more
954 // than two cache lines in the simulation.
955 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
956 addMemEvent( sbOut, False/*!isWrite*/,
957 dataSize, d->mAddr, goff_sp );
958 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
959 addMemEvent( sbOut, True/*isWrite*/,
960 dataSize, d->mAddr, goff_sp );
961 } else {
962 tl_assert(d->mAddr == NULL);
963 tl_assert(d->mSize == 0);
964 }
965 break;
966 }
967
968 case Ist_CAS: {
969 /* We treat it as a read and a write of the location. I
970 think that is the same behaviour as it was before IRCAS
971 was introduced, since prior to that point, the Vex
972 front ends would translate a lock-prefixed instruction
973 into a (normal) read followed by a (normal) write. */
974 Int dataSize;
975 IRCAS* cas = st->Ist.CAS.details;
976 tl_assert(cas->addr != NULL);
977 tl_assert(cas->dataLo != NULL);
978 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
979 if (cas->dataHi != NULL)
980 dataSize *= 2; /* since it's a doubleword-CAS */
981 addMemEvent( sbOut, False/*!isWrite*/,
982 dataSize, cas->addr, goff_sp );
983 addMemEvent( sbOut, True/*isWrite*/,
984 dataSize, cas->addr, goff_sp );
985 break;
986 }
987
988 case Ist_LLSC: {
989 IRType dataTy;
990 if (st->Ist.LLSC.storedata == NULL) {
991 /* LL */
992 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
993 addMemEvent( sbOut, False/*!isWrite*/,
994 sizeofIRType(dataTy),
995 st->Ist.LLSC.addr, goff_sp );
996 } else {
997 /* SC */
998 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
999 addMemEvent( sbOut, True/*isWrite*/,
1000 sizeofIRType(dataTy),
1001 st->Ist.LLSC.addr, goff_sp );
1002 }
1003 break;
1004 }
1005
1006 default:
1007 break;
1008 }
1009
1010 addStmtToIRSB( sbOut, st );
1011 }
1012
1013 if (n > 0) {
1014 // Add an increment before the SB end.
1015 add_counter_update(sbOut, n);
1016 }
1017 return sbOut;
1018 }
1019
1020 #undef binop
1021 #undef mkexpr
1022 #undef mkU32
1023 #undef mkU64
1024 #undef assign
1025
1026
1027 //------------------------------------------------------------//
1028 //--- Command line args ---//
1029 //------------------------------------------------------------//
1030
1031 // FORWARDS
1032 static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
1033 /*OUT*/Bool* increasingP,
1034 const HChar* metric_name );
1035
1036 static Int clo_show_top_n = 10;
1037 static const HChar *clo_sort_by = "max-bytes-live";
1038
dh_process_cmd_line_option(const HChar * arg)1039 static Bool dh_process_cmd_line_option(const HChar* arg)
1040 {
1041 if VG_BINT_CLO(arg, "--show-top-n", clo_show_top_n, 1, 100000) {}
1042
1043 else if VG_STR_CLO(arg, "--sort-by", clo_sort_by) {
1044 ULong (*dummyFn)(APInfo*);
1045 Bool dummyB;
1046 Bool ok = identify_metric( &dummyFn, &dummyB, clo_sort_by);
1047 if (!ok)
1048 return False;
1049 // otherwise it's OK, in which case leave it alone.
1050 // show_top_n_apinfos will later convert the string by a
1051 // second call to identify_metric.
1052 }
1053
1054 else
1055 return VG_(replacement_malloc_process_cmd_line_option)(arg);
1056
1057 return True;
1058 }
1059
1060
dh_print_usage(void)1061 static void dh_print_usage(void)
1062 {
1063 VG_(printf)(
1064 " --show-top-n=number show the top <number> alloc points [10]\n"
1065 " --sort-by=string\n"
1066 " sort the allocation points by the metric\n"
1067 " defined by <string>, thusly:\n"
1068 " max-bytes-live maximum live bytes [default]\n"
1069 " tot-bytes-allocd total allocation (turnover)\n"
1070 " max-blocks-live maximum live blocks\n"
1071 );
1072 }
1073
dh_print_debug_usage(void)1074 static void dh_print_debug_usage(void)
1075 {
1076 VG_(printf)(
1077 " (none)\n"
1078 );
1079 }
1080
1081
1082 //------------------------------------------------------------//
1083 //--- Finalisation ---//
1084 //------------------------------------------------------------//
1085
show_N_div_100(HChar * buf,ULong n)1086 static void show_N_div_100( /*OUT*/HChar* buf, ULong n )
1087 {
1088 ULong nK = n / 100;
1089 ULong nR = n % 100;
1090 VG_(sprintf)(buf, "%llu.%s%llu", nK,
1091 nR < 10 ? "0" : "",
1092 nR);
1093 }
1094
show_APInfo(APInfo * api)1095 static void show_APInfo ( APInfo* api )
1096 {
1097 HChar bufA[80];
1098 VG_(memset)(bufA, 0, sizeof(bufA));
1099 if (api->tot_blocks > 0) {
1100 show_N_div_100( bufA, ((ULong)api->tot_bytes * 100ULL)
1101 / (ULong)api->tot_blocks );
1102 } else {
1103 bufA[0] = 'N'; bufA[1] = 'a'; bufA[2] = 'N';
1104 }
1105
1106 VG_(umsg)("max-live: %'llu in %'llu blocks\n",
1107 api->max_bytes_live, api->max_blocks_live);
1108 VG_(umsg)("tot-alloc: %'llu in %'llu blocks (avg size %s)\n",
1109 api->tot_bytes, api->tot_blocks, bufA);
1110
1111 tl_assert(api->tot_blocks >= api->max_blocks_live);
1112 tl_assert(api->tot_bytes >= api->max_bytes_live);
1113
1114 if (api->deaths > 0) {
1115 // Average Age at Death
1116 ULong aad = api->deaths == 0
1117 ? 0 : (api->death_ages_sum / api->deaths);
1118 // AAD as a fraction of the total program lifetime (so far)
1119 // measured in ten-thousand-ths (aad_frac_10k == 10000 means the
1120 // complete lifetime of the program.
1121 ULong aad_frac_10k
1122 = g_guest_instrs_executed == 0
1123 ? 0 : (10000ULL * aad) / g_guest_instrs_executed;
1124 HChar buf[16];
1125 show_N_div_100(buf, aad_frac_10k);
1126 VG_(umsg)("deaths: %'llu, at avg age %'llu "
1127 "(%s%% of prog lifetime)\n",
1128 api->deaths, aad, buf );
1129 } else {
1130 VG_(umsg)("deaths: none (none of these blocks were freed)\n");
1131 }
1132
1133 HChar bufR[80], bufW[80];
1134 VG_(memset)(bufR, 0, sizeof(bufR));
1135 VG_(memset)(bufW, 0, sizeof(bufW));
1136 if (api->tot_bytes > 0) {
1137 show_N_div_100(bufR, (100ULL * api->n_reads) / api->tot_bytes);
1138 show_N_div_100(bufW, (100ULL * api->n_writes) / api->tot_bytes);
1139 } else {
1140 VG_(strcat)(bufR, "Inf");
1141 VG_(strcat)(bufW, "Inf");
1142 }
1143
1144 VG_(umsg)("acc-ratios: %s rd, %s wr "
1145 " (%'llu b-read, %'llu b-written)\n",
1146 bufR, bufW,
1147 api->n_reads, api->n_writes);
1148
1149 VG_(pp_ExeContext)(api->ap);
1150
1151 if (api->histo && api->xsize_tag == Exactly) {
1152 VG_(umsg)("\nAggregated access counts by offset:\n");
1153 VG_(umsg)("\n");
1154 UWord i;
1155 if (api->xsize > 0)
1156 VG_(umsg)("[ 0] ");
1157 for (i = 0; i < api->xsize; i++) {
1158 if (i > 0 && (i % 16) == 0 && i != api->xsize-1) {
1159 VG_(umsg)("\n");
1160 VG_(umsg)("[%4lu] ", i);
1161 }
1162 VG_(umsg)("%u ", api->histo[i]);
1163 }
1164 VG_(umsg)("\n");
1165 }
1166 }
1167
1168
1169 /* Metric-access functions for APInfos. */
get_metric__max_bytes_live(APInfo * api)1170 static ULong get_metric__max_bytes_live ( APInfo* api ) {
1171 return api->max_bytes_live;
1172 }
get_metric__tot_bytes(APInfo * api)1173 static ULong get_metric__tot_bytes ( APInfo* api ) {
1174 return api->tot_bytes;
1175 }
get_metric__max_blocks_live(APInfo * api)1176 static ULong get_metric__max_blocks_live ( APInfo* api ) {
1177 return api->max_blocks_live;
1178 }
1179
1180 /* Given a string, return the metric-access function and also a Bool
1181 indicating whether we want increasing or decreasing values of the
1182 metric. This is used twice, once in command line processing, and
1183 then again in show_top_n_apinfos. Returns False if the given
1184 string could not be identified.*/
identify_metric(ULong (** get_metricP)(APInfo *),Bool * increasingP,const HChar * metric_name)1185 static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
1186 /*OUT*/Bool* increasingP,
1187 const HChar* metric_name )
1188 {
1189 if (0 == VG_(strcmp)(metric_name, "max-bytes-live")) {
1190 *get_metricP = get_metric__max_bytes_live;
1191 *increasingP = False;
1192 return True;
1193 }
1194 if (0 == VG_(strcmp)(metric_name, "tot-bytes-allocd")) {
1195 *get_metricP = get_metric__tot_bytes;
1196 *increasingP = False;
1197 return True;
1198 }
1199 if (0 == VG_(strcmp)(metric_name, "max-blocks-live")) {
1200 *get_metricP = get_metric__max_blocks_live;
1201 *increasingP = False;
1202 return True;
1203 }
1204 return False;
1205 }
1206
1207
show_top_n_apinfos(void)1208 static void show_top_n_apinfos ( void )
1209 {
1210 Int i;
1211 UWord keyW, valW;
1212 ULong (*get_metric)(APInfo*);
1213 Bool increasing;
1214
1215 const HChar* metric_name = clo_sort_by;
1216 tl_assert(metric_name); // ensured by clo processing
1217
1218 Bool ok = identify_metric( &get_metric, &increasing, metric_name );
1219 tl_assert(ok); // ensured by clo processing
1220
1221 VG_(umsg)("\n");
1222 VG_(umsg)("======== ORDERED BY %s \"%s\": "
1223 "top %d allocators ========\n",
1224 increasing ? "increasing" : "decreasing",
1225 metric_name, clo_show_top_n );
1226
1227 // Clear all .shown bits
1228 VG_(initIterFM)( apinfo );
1229 while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
1230 APInfo* api = (APInfo*)valW;
1231 tl_assert(api && api->ap == (ExeContext*)keyW);
1232 api->shown = False;
1233 }
1234 VG_(doneIterFM)( apinfo );
1235
1236 // Now print the top N entries. Each one requires a
1237 // complete scan of the set. Duh.
1238 for (i = 0; i < clo_show_top_n; i++) {
1239 ULong best_metric = increasing ? ~0ULL : 0ULL;
1240 APInfo* best_api = NULL;
1241
1242 VG_(initIterFM)( apinfo );
1243 while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
1244 APInfo* api = (APInfo*)valW;
1245 if (api->shown)
1246 continue;
1247 ULong metric = get_metric(api);
1248 if (increasing ? (metric < best_metric) : (metric > best_metric)) {
1249 best_metric = metric;
1250 best_api = api;
1251 }
1252 }
1253 VG_(doneIterFM)( apinfo );
1254
1255 if (!best_api)
1256 break; // all APIs have been shown. Stop.
1257
1258 VG_(umsg)("\n");
1259 VG_(umsg)("-------------------- %d of %d --------------------\n",
1260 i+1, clo_show_top_n );
1261 show_APInfo(best_api);
1262 best_api->shown = True;
1263 }
1264
1265 VG_(umsg)("\n");
1266 }
1267
1268
dh_fini(Int exit_status)1269 static void dh_fini(Int exit_status)
1270 {
1271 // Before printing statistics, we must harvest access counts for
1272 // all the blocks that are still alive. Not doing so gives
1273 // access ratios which are too low (zero, in the worst case)
1274 // for such blocks, since the accesses that do get made will
1275 // (if we skip this step) not get folded into the AP summaries.
1276 UWord keyW, valW;
1277 VG_(initIterFM)( interval_tree );
1278 while (VG_(nextIterFM)( interval_tree, &keyW, &valW )) {
1279 Block* bk = (Block*)keyW;
1280 tl_assert(valW == 0);
1281 tl_assert(bk);
1282 retire_Block(bk, False/*!because_freed*/);
1283 }
1284 VG_(doneIterFM)( interval_tree );
1285
1286 // show results
1287 VG_(umsg)("======== SUMMARY STATISTICS ========\n");
1288 VG_(umsg)("\n");
1289 VG_(umsg)("guest_insns: %'llu\n", g_guest_instrs_executed);
1290 VG_(umsg)("\n");
1291 VG_(umsg)("max_live: %'llu in %'llu blocks\n",
1292 g_max_bytes_live, g_max_blocks_live);
1293 VG_(umsg)("\n");
1294 VG_(umsg)("tot_alloc: %'llu in %'llu blocks\n",
1295 g_tot_bytes, g_tot_blocks);
1296 VG_(umsg)("\n");
1297 if (g_tot_bytes > 0) {
1298 VG_(umsg)("insns per allocated byte: %'llu\n",
1299 g_guest_instrs_executed / g_tot_bytes);
1300 VG_(umsg)("\n");
1301 }
1302
1303 show_top_n_apinfos();
1304
1305 VG_(umsg)("\n");
1306 VG_(umsg)("\n");
1307 VG_(umsg)("==============================================================\n");
1308 VG_(umsg)("\n");
1309 VG_(umsg)("Some hints: (see --help for command line option details):\n");
1310 VG_(umsg)("\n");
1311 VG_(umsg)("* summary stats for whole program are at the top of this output\n");
1312 VG_(umsg)("\n");
1313 VG_(umsg)("* --show-top-n= controls how many alloc points are shown.\n");
1314 VG_(umsg)(" You probably want to set it much higher than\n");
1315 VG_(umsg)(" the default value (10)\n");
1316 VG_(umsg)("\n");
1317 VG_(umsg)("* --sort-by= specifies the sort key for output.\n");
1318 VG_(umsg)(" See --help for details.\n");
1319 VG_(umsg)("\n");
1320 VG_(umsg)("* Each allocation stack, by default 12 frames, counts as\n");
1321 VG_(umsg)(" a separate alloc point. This causes the data to be spread out\n");
1322 VG_(umsg)(" over far too many alloc points. I strongly suggest using\n");
1323 VG_(umsg)(" --num-callers=4 or some such, to reduce the spreading.\n");
1324 VG_(umsg)("\n");
1325
1326 if (VG_(clo_stats)) {
1327 VG_(dmsg)(" dhat: find_Block_containing:\n");
1328 VG_(dmsg)(" found: %'lu (%'lu cached + %'lu uncached)\n",
1329 stats__n_fBc_cached + stats__n_fBc_uncached,
1330 stats__n_fBc_cached,
1331 stats__n_fBc_uncached);
1332 VG_(dmsg)(" notfound: %'lu\n", stats__n_fBc_notfound);
1333 VG_(dmsg)("\n");
1334 }
1335 }
1336
1337
1338 //------------------------------------------------------------//
1339 //--- Initialisation ---//
1340 //------------------------------------------------------------//
1341
dh_post_clo_init(void)1342 static void dh_post_clo_init(void)
1343 {
1344 }
1345
dh_pre_clo_init(void)1346 static void dh_pre_clo_init(void)
1347 {
1348 VG_(details_name) ("DHAT");
1349 VG_(details_version) (NULL);
1350 VG_(details_description) ("a dynamic heap analysis tool");
1351 VG_(details_copyright_author)(
1352 "Copyright (C) 2010-2013, and GNU GPL'd, by Mozilla Inc");
1353 VG_(details_bug_reports_to) (VG_BUGS_TO);
1354
1355 // Basic functions.
1356 VG_(basic_tool_funcs) (dh_post_clo_init,
1357 dh_instrument,
1358 dh_fini);
1359 //zz
1360 // Needs.
1361 VG_(needs_libc_freeres)();
1362 VG_(needs_command_line_options)(dh_process_cmd_line_option,
1363 dh_print_usage,
1364 dh_print_debug_usage);
1365 //zz VG_(needs_client_requests) (dh_handle_client_request);
1366 //zz VG_(needs_sanity_checks) (dh_cheap_sanity_check,
1367 //zz dh_expensive_sanity_check);
1368 VG_(needs_malloc_replacement) (dh_malloc,
1369 dh___builtin_new,
1370 dh___builtin_vec_new,
1371 dh_memalign,
1372 dh_calloc,
1373 dh_free,
1374 dh___builtin_delete,
1375 dh___builtin_vec_delete,
1376 dh_realloc,
1377 dh_malloc_usable_size,
1378 0 );
1379
1380 VG_(track_pre_mem_read) ( dh_handle_noninsn_read );
1381 //VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
1382 VG_(track_post_mem_write) ( dh_handle_noninsn_write );
1383
1384 tl_assert(!interval_tree);
1385 tl_assert(!fbc_cache0);
1386 tl_assert(!fbc_cache1);
1387
1388 interval_tree = VG_(newFM)( VG_(malloc),
1389 "dh.main.interval_tree.1",
1390 VG_(free),
1391 interval_tree_Cmp );
1392
1393 apinfo = VG_(newFM)( VG_(malloc),
1394 "dh.main.apinfo.1",
1395 VG_(free),
1396 NULL/*unboxedcmp*/ );
1397 }
1398
1399 VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init)
1400
1401 //--------------------------------------------------------------------//
1402 //--- end dh_main.c ---//
1403 //--------------------------------------------------------------------//
1404