• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 //--------------------------------------------------------------------*/
3 //--- DHAT: a Dynamic Heap Analysis Tool                 dh_main.c ---*/
4 //--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of DHAT, a Valgrind tool for profiling the
8    heap usage of programs.
9 
10    Copyright (C) 2010-2013 Mozilla Inc
11 
12    This program is free software; you can redistribute it and/or
13    modify it under the terms of the GNU General Public License as
14    published by the Free Software Foundation; either version 2 of the
15    License, or (at your option) any later version.
16 
17    This program is distributed in the hope that it will be useful, but
18    WITHOUT ANY WARRANTY; without even the implied warranty of
19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20    General Public License for more details.
21 
22    You should have received a copy of the GNU General Public License
23    along with this program; if not, write to the Free Software
24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25    02111-1307, USA.
26 
27    The GNU General Public License is contained in the file COPYING.
28 */
29 
30 /* Contributed by Julian Seward <jseward@acm.org> */
31 
32 
33 #include "pub_tool_basics.h"
34 #include "pub_tool_libcbase.h"
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_machine.h"      // VG_(fnptr_to_fnentry)
38 #include "pub_tool_mallocfree.h"
39 #include "pub_tool_options.h"
40 #include "pub_tool_replacemalloc.h"
41 #include "pub_tool_tooliface.h"
42 #include "pub_tool_wordfm.h"
43 
44 #define HISTOGRAM_SIZE_LIMIT 1024
45 
46 
47 //------------------------------------------------------------//
48 //--- Globals                                              ---//
49 //------------------------------------------------------------//
50 
51 // Number of guest instructions executed so far.  This is
52 // incremented directly from the generated code.
53 static ULong g_guest_instrs_executed = 0;
54 
55 // Summary statistics for the entire run.
56 static ULong g_tot_blocks = 0;   // total blocks allocated
57 static ULong g_tot_bytes  = 0;   // total bytes allocated
58 
59 static ULong g_cur_blocks_live = 0; // curr # blocks live
60 static ULong g_cur_bytes_live  = 0; // curr # bytes live
61 
62 static ULong g_max_blocks_live = 0; // bytes and blocks at
63 static ULong g_max_bytes_live  = 0; // the max residency point
64 
65 
66 //------------------------------------------------------------//
67 //--- an Interval Tree of live blocks                      ---//
68 //------------------------------------------------------------//
69 
70 /* Tracks information about live blocks. */
71 typedef
72    struct {
73       Addr        payload;
74       SizeT       req_szB;
75       ExeContext* ap;  /* allocation ec */
76       ULong       allocd_at; /* instruction number */
77       ULong       n_reads;
78       ULong       n_writes;
79       /* Approx histogram, one byte per payload byte.  Counts latch up
80          therefore at 0xFFFF.  Can be NULL if the block is resized or if
81          the block is larger than HISTOGRAM_SIZE_LIMIT. */
82       UShort*     histoW; /* [0 .. req_szB-1] */
83    }
84    Block;
85 
86 /* May not contain zero-sized blocks.  May not contain
87    overlapping blocks. */
88 static WordFM* interval_tree = NULL;  /* WordFM* Block* void */
89 
90 /* Here's the comparison function.  Since the tree is required
91 to contain non-zero sized, non-overlapping blocks, it's good
92 enough to consider any overlap as a match. */
interval_tree_Cmp(UWord k1,UWord k2)93 static Word interval_tree_Cmp ( UWord k1, UWord k2 )
94 {
95    Block* b1 = (Block*)k1;
96    Block* b2 = (Block*)k2;
97    tl_assert(b1->req_szB > 0);
98    tl_assert(b2->req_szB > 0);
99    if (b1->payload + b1->req_szB <= b2->payload) return -1;
100    if (b2->payload + b2->req_szB <= b1->payload) return  1;
101    return 0;
102 }
103 
104 // 2-entry cache for find_Block_containing
105 static Block* fbc_cache0 = NULL;
106 static Block* fbc_cache1 = NULL;
107 
108 static UWord stats__n_fBc_cached = 0;
109 static UWord stats__n_fBc_uncached = 0;
110 static UWord stats__n_fBc_notfound = 0;
111 
find_Block_containing(Addr a)112 static Block* find_Block_containing ( Addr a )
113 {
114    if (LIKELY(fbc_cache0
115               && fbc_cache0->payload <= a
116               && a < fbc_cache0->payload + fbc_cache0->req_szB)) {
117       // found at 0
118       stats__n_fBc_cached++;
119       return fbc_cache0;
120    }
121    if (LIKELY(fbc_cache1
122               && fbc_cache1->payload <= a
123               && a < fbc_cache1->payload + fbc_cache1->req_szB)) {
124       // found at 1; swap 0 and 1
125       Block* tmp = fbc_cache0;
126       fbc_cache0 = fbc_cache1;
127       fbc_cache1 = tmp;
128       stats__n_fBc_cached++;
129       return fbc_cache0;
130    }
131    Block fake;
132    fake.payload = a;
133    fake.req_szB = 1;
134    UWord foundkey = 1;
135    UWord foundval = 1;
136    Bool found = VG_(lookupFM)( interval_tree,
137                                &foundkey, &foundval, (UWord)&fake );
138    if (!found) {
139       stats__n_fBc_notfound++;
140       return NULL;
141    }
142    tl_assert(foundval == 0); // we don't store vals in the interval tree
143    tl_assert(foundkey != 1);
144    Block* res = (Block*)foundkey;
145    tl_assert(res != &fake);
146    // put at the top position
147    fbc_cache1 = fbc_cache0;
148    fbc_cache0 = res;
149    stats__n_fBc_uncached++;
150    return res;
151 }
152 
153 // delete a block; asserts if not found.  (viz, 'a' must be
154 // known to be present.)
delete_Block_starting_at(Addr a)155 static void delete_Block_starting_at ( Addr a )
156 {
157    Block fake;
158    fake.payload = a;
159    fake.req_szB = 1;
160    Bool found = VG_(delFromFM)( interval_tree,
161                                 NULL, NULL, (Addr)&fake );
162    tl_assert(found);
163    fbc_cache0 = fbc_cache1 = NULL;
164 }
165 
166 
167 //------------------------------------------------------------//
168 //--- a FM of allocation points (APs)                      ---//
169 //------------------------------------------------------------//
170 
171 typedef
172    struct {
173       // the allocation point that we're summarising stats for
174       ExeContext* ap;
175       // used when printing results
176       Bool shown;
177       // The current number of blocks and bytes live for this AP
178       ULong cur_blocks_live;
179       ULong cur_bytes_live;
180       // The number of blocks and bytes live at the max-liveness
181       // point.  Note this is a bit subtle.  max_blocks_live is not
182       // the maximum number of live blocks, but rather the number of
183       // blocks live at the point of maximum byte liveness.  These are
184       // not necessarily the same thing.
185       ULong max_blocks_live;
186       ULong max_bytes_live;
187       // Total number of blocks and bytes allocated by this AP.
188       ULong tot_blocks;
189       ULong tot_bytes;
190       // Sum of death ages for all blocks allocated by this AP,
191       // that have subsequently been freed.
192       ULong death_ages_sum;
193       ULong deaths;
194       // Total number of reads and writes in all blocks allocated
195       // by this AP.
196       ULong n_reads;
197       ULong n_writes;
198       /* Histogram information.  We maintain a histogram aggregated for
199          all retiring Blocks allocated by this AP, but only if:
200          - this AP has only ever allocated objects of one size
201          - that size is <= HISTOGRAM_SIZE_LIMIT
202          What we need therefore is a mechanism to see if this AP
203          has only ever allocated blocks of one size.
204 
205          3 states:
206             Unknown          because no retirement yet
207             Exactly xsize    all retiring blocks are of this size
208             Mixed            multiple different sizes seen
209       */
210       enum { Unknown=999, Exactly, Mixed } xsize_tag;
211       SizeT xsize;
212       UInt* histo; /* [0 .. xsize-1] */
213    }
214    APInfo;
215 
216 /* maps ExeContext*'s to APInfo*'s.  Note that the keys must match the
217    .ap field in the values. */
218 static WordFM* apinfo = NULL;  /* WordFM* ExeContext* APInfo* */
219 
220 
221 /* 'bk' is being introduced (has just been allocated).  Find the
222    relevant APInfo entry for it, or create one, based on the block's
223    allocation EC.  Then, update the APInfo to the extent that we
224    actually can, to reflect the allocation. */
intro_Block(Block * bk)225 static void intro_Block ( Block* bk )
226 {
227    tl_assert(bk);
228    tl_assert(bk->ap);
229 
230    APInfo* api   = NULL;
231    UWord   keyW  = 0;
232    UWord   valW  = 0;
233    Bool    found = VG_(lookupFM)( apinfo,
234                                   &keyW, &valW, (UWord)bk->ap );
235    if (found) {
236       api = (APInfo*)valW;
237       tl_assert(keyW == (UWord)bk->ap);
238    } else {
239       api = VG_(malloc)( "dh.main.intro_Block.1", sizeof(APInfo) );
240       VG_(memset)(api, 0, sizeof(*api));
241       api->ap = bk->ap;
242       Bool present = VG_(addToFM)( apinfo,
243                                    (UWord)bk->ap, (UWord)api );
244       tl_assert(!present);
245       // histo stuff
246       tl_assert(api->deaths == 0);
247       api->xsize_tag = Unknown;
248       api->xsize = 0;
249       if (0) VG_(printf)("api %p   -->  Unknown\n", api);
250    }
251 
252    tl_assert(api->ap == bk->ap);
253 
254    /* So: update stats to reflect an allocation */
255 
256    // # live blocks
257    api->cur_blocks_live++;
258 
259    // # live bytes
260    api->cur_bytes_live += bk->req_szB;
261    if (api->cur_bytes_live > api->max_bytes_live) {
262       api->max_bytes_live  = api->cur_bytes_live;
263       api->max_blocks_live = api->cur_blocks_live;
264    }
265 
266    // total blocks and bytes allocated here
267    api->tot_blocks++;
268    api->tot_bytes += bk->req_szB;
269 
270    // update summary globals
271    g_tot_blocks++;
272    g_tot_bytes += bk->req_szB;
273 
274    g_cur_blocks_live++;
275    g_cur_bytes_live += bk->req_szB;
276    if (g_cur_bytes_live > g_max_bytes_live) {
277       g_max_bytes_live = g_cur_bytes_live;
278       g_max_blocks_live = g_cur_blocks_live;
279    }
280 }
281 
282 
283 /* 'bk' is retiring (being freed).  Find the relevant APInfo entry for
284    it, which must already exist.  Then, fold info from 'bk' into that
285    entry.  'because_freed' is True if the block is retiring because
286    the client has freed it.  If it is False then the block is retiring
287    because the program has finished, in which case we want to skip the
288    updates of the total blocks live etc for this AP, but still fold in
289    the access counts and histo data that have so far accumulated for
290    the block. */
retire_Block(Block * bk,Bool because_freed)291 static void retire_Block ( Block* bk, Bool because_freed )
292 {
293    tl_assert(bk);
294    tl_assert(bk->ap);
295 
296    APInfo* api   = NULL;
297    UWord   keyW  = 0;
298    UWord   valW  = 0;
299    Bool    found = VG_(lookupFM)( apinfo,
300                                   &keyW, &valW, (UWord)bk->ap );
301 
302    tl_assert(found);
303    api = (APInfo*)valW;
304    tl_assert(api->ap == bk->ap);
305 
306    // update stats following this free.
307    if (0)
308    VG_(printf)("ec %p  api->c_by_l %llu  bk->rszB %llu\n",
309                bk->ap, api->cur_bytes_live, (ULong)bk->req_szB);
310 
311    // update total blocks live etc for this AP
312    if (because_freed) {
313       tl_assert(api->cur_blocks_live >= 1);
314       tl_assert(api->cur_bytes_live >= bk->req_szB);
315       api->cur_blocks_live--;
316       api->cur_bytes_live -= bk->req_szB;
317 
318       api->deaths++;
319 
320       tl_assert(bk->allocd_at <= g_guest_instrs_executed);
321       api->death_ages_sum += (g_guest_instrs_executed - bk->allocd_at);
322 
323       // update global summary stats
324       tl_assert(g_cur_blocks_live > 0);
325       g_cur_blocks_live--;
326       tl_assert(g_cur_bytes_live >= bk->req_szB);
327       g_cur_bytes_live -= bk->req_szB;
328    }
329 
330    // access counts
331    api->n_reads  += bk->n_reads;
332    api->n_writes += bk->n_writes;
333 
334    // histo stuff.  First, do state transitions for xsize/xsize_tag.
335    switch (api->xsize_tag) {
336 
337       case Unknown:
338          tl_assert(api->xsize == 0);
339          tl_assert(api->deaths == 1 || api->deaths == 0);
340          tl_assert(!api->histo);
341          api->xsize_tag = Exactly;
342          api->xsize = bk->req_szB;
343          if (0) VG_(printf)("api %p   -->  Exactly(%lu)\n", api, api->xsize);
344          // and allocate the histo
345          if (bk->histoW) {
346             api->histo = VG_(malloc)("dh.main.retire_Block.1",
347                                      api->xsize * sizeof(UInt));
348             VG_(memset)(api->histo, 0, api->xsize * sizeof(UInt));
349          }
350          break;
351 
352       case Exactly:
353          //tl_assert(api->deaths > 1);
354          if (bk->req_szB != api->xsize) {
355             if (0) VG_(printf)("api %p   -->  Mixed(%lu -> %lu)\n",
356                                api, api->xsize, bk->req_szB);
357             api->xsize_tag = Mixed;
358             api->xsize = 0;
359             // deallocate the histo, if any
360             if (api->histo) {
361                VG_(free)(api->histo);
362                api->histo = NULL;
363             }
364          }
365          break;
366 
367       case Mixed:
368          //tl_assert(api->deaths > 1);
369          break;
370 
371       default:
372         tl_assert(0);
373    }
374 
375    // See if we can fold the histo data from this block into
376    // the data for the AP
377    if (api->xsize_tag == Exactly && api->histo && bk->histoW) {
378       tl_assert(api->xsize == bk->req_szB);
379       UWord i;
380       for (i = 0; i < api->xsize; i++) {
381          // FIXME: do something better in case of overflow of api->histo[..]
382          // Right now, at least don't let it overflow/wrap around
383          if (api->histo[i] <= 0xFFFE0000)
384             api->histo[i] += (UInt)bk->histoW[i];
385       }
386       if (0) VG_(printf)("fold in, AP = %p\n", api);
387    }
388 
389 
390 
391 #if 0
392    if (bk->histoB) {
393       VG_(printf)("block retiring, histo %lu: ", bk->req_szB);
394       UWord i;
395       for (i = 0; i < bk->req_szB; i++)
396         VG_(printf)("%u ", (UInt)bk->histoB[i]);
397       VG_(printf)("\n");
398    } else {
399       VG_(printf)("block retiring, no histo %lu\n", bk->req_szB);
400    }
401 #endif
402 }
403 
404 /* This handles block resizing.  When a block with AP 'ec' has a
405    size change of 'delta', call here to update the APInfo. */
apinfo_change_cur_bytes_live(ExeContext * ec,Long delta)406 static void apinfo_change_cur_bytes_live( ExeContext* ec, Long delta )
407 {
408    APInfo* api   = NULL;
409    UWord   keyW  = 0;
410    UWord   valW  = 0;
411    Bool    found = VG_(lookupFM)( apinfo,
412                                   &keyW, &valW, (UWord)ec );
413 
414    tl_assert(found);
415    api = (APInfo*)valW;
416    tl_assert(api->ap == ec);
417 
418    if (delta < 0) {
419       tl_assert(api->cur_bytes_live >= -delta);
420       tl_assert(g_cur_bytes_live >= -delta);
421    }
422 
423    // adjust current live size
424    api->cur_bytes_live += delta;
425    g_cur_bytes_live += delta;
426 
427    if (delta > 0 && api->cur_bytes_live > api->max_bytes_live) {
428       api->max_bytes_live  = api->cur_bytes_live;
429       api->max_blocks_live = api->cur_blocks_live;
430    }
431 
432    // update global summary stats
433    if (delta > 0 && g_cur_bytes_live > g_max_bytes_live) {
434       g_max_bytes_live = g_cur_bytes_live;
435       g_max_blocks_live = g_cur_blocks_live;
436    }
437    if (delta > 0)
438       g_tot_bytes += delta;
439 
440    // adjust total allocation size
441    if (delta > 0)
442       api->tot_bytes += delta;
443 }
444 
445 
446 //------------------------------------------------------------//
447 //--- update both Block and APInfos after {m,re}alloc/free ---//
448 //------------------------------------------------------------//
449 
450 static
new_block(ThreadId tid,void * p,SizeT req_szB,SizeT req_alignB,Bool is_zeroed)451 void* new_block ( ThreadId tid, void* p, SizeT req_szB, SizeT req_alignB,
452                   Bool is_zeroed )
453 {
454    tl_assert(p == NULL); // don't handle custom allocators right now
455    SizeT actual_szB /*, slop_szB*/;
456 
457    if ((SSizeT)req_szB < 0) return NULL;
458 
459    if (req_szB == 0)
460       req_szB = 1;  /* can't allow zero-sized blocks in the interval tree */
461 
462    // Allocate and zero if necessary
463    if (!p) {
464       p = VG_(cli_malloc)( req_alignB, req_szB );
465       if (!p) {
466          return NULL;
467       }
468       if (is_zeroed) VG_(memset)(p, 0, req_szB);
469       actual_szB = VG_(malloc_usable_size)(p);
470       tl_assert(actual_szB >= req_szB);
471       /* slop_szB = actual_szB - req_szB; */
472    } else {
473       /* slop_szB = 0; */
474    }
475 
476    // Make new HP_Chunk node, add to malloc_list
477    Block* bk = VG_(malloc)("dh.new_block.1", sizeof(Block));
478    bk->payload   = (Addr)p;
479    bk->req_szB   = req_szB;
480    bk->ap        = VG_(record_ExeContext)(tid, 0/*first word delta*/);
481    bk->allocd_at = g_guest_instrs_executed;
482    bk->n_reads   = 0;
483    bk->n_writes  = 0;
484    // set up histogram array, if the block isn't too large
485    bk->histoW = NULL;
486    if (req_szB <= HISTOGRAM_SIZE_LIMIT) {
487       bk->histoW = VG_(malloc)("dh.new_block.2", req_szB * sizeof(UShort));
488       VG_(memset)(bk->histoW, 0, req_szB * sizeof(UShort));
489    }
490 
491    Bool present = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
492    tl_assert(!present);
493    fbc_cache0 = fbc_cache1 = NULL;
494 
495    intro_Block(bk);
496 
497    if (0) VG_(printf)("ALLOC %ld -> %p\n", req_szB, p);
498 
499    return p;
500 }
501 
502 static
die_block(void * p,Bool custom_free)503 void die_block ( void* p, Bool custom_free )
504 {
505    tl_assert(!custom_free);  // at least for now
506 
507    Block* bk = find_Block_containing( (Addr)p );
508 
509    if (!bk) {
510      return; // bogus free
511    }
512 
513    tl_assert(bk->req_szB > 0);
514    // assert the block finder is behaving sanely
515    tl_assert(bk->payload <= (Addr)p);
516    tl_assert( (Addr)p < bk->payload + bk->req_szB );
517 
518    if (bk->payload != (Addr)p) {
519       return; // bogus free
520    }
521 
522    if (0) VG_(printf)(" FREE %p %llu\n",
523                       p, g_guest_instrs_executed - bk->allocd_at);
524 
525    retire_Block(bk, True/*because_freed*/);
526 
527    VG_(cli_free)( (void*)bk->payload );
528    delete_Block_starting_at( bk->payload );
529    if (bk->histoW) {
530       VG_(free)( bk->histoW );
531       bk->histoW = NULL;
532    }
533    VG_(free)( bk );
534 }
535 
536 
537 static
renew_block(ThreadId tid,void * p_old,SizeT new_req_szB)538 void* renew_block ( ThreadId tid, void* p_old, SizeT new_req_szB )
539 {
540    if (0) VG_(printf)("REALL %p %ld\n", p_old, new_req_szB);
541    void* p_new = NULL;
542 
543    tl_assert(new_req_szB > 0); // map 0 to 1
544 
545    // Find the old block.
546    Block* bk = find_Block_containing( (Addr)p_old );
547    if (!bk) {
548       return NULL;   // bogus realloc
549    }
550 
551    tl_assert(bk->req_szB > 0);
552    // assert the block finder is behaving sanely
553    tl_assert(bk->payload <= (Addr)p_old);
554    tl_assert( (Addr)p_old < bk->payload + bk->req_szB );
555 
556    if (bk->payload != (Addr)p_old) {
557       return NULL; // bogus realloc
558    }
559 
560    // Keeping the histogram alive in any meaningful way across
561    // block resizing is too darn complicated.  Just throw it away.
562    if (bk->histoW) {
563       VG_(free)(bk->histoW);
564       bk->histoW = NULL;
565    }
566 
567    // Actually do the allocation, if necessary.
568    if (new_req_szB <= bk->req_szB) {
569 
570       // New size is smaller or same; block not moved.
571       apinfo_change_cur_bytes_live(bk->ap,
572                                    (Long)new_req_szB - (Long)bk->req_szB);
573       bk->req_szB = new_req_szB;
574       return p_old;
575 
576    } else {
577 
578       // New size is bigger;  make new block, copy shared contents, free old.
579       p_new = VG_(cli_malloc)(VG_(clo_alignment), new_req_szB);
580       if (!p_new) {
581          // Nb: if realloc fails, NULL is returned but the old block is not
582          // touched.  What an awful function.
583          return NULL;
584       }
585       tl_assert(p_new != p_old);
586 
587       VG_(memcpy)(p_new, p_old, bk->req_szB);
588       VG_(cli_free)(p_old);
589 
590       // Since the block has moved, we need to re-insert it into the
591       // interval tree at the new place.  Do this by removing
592       // and re-adding it.
593       delete_Block_starting_at( (Addr)p_old );
594       // now 'bk' is no longer in the tree, but the Block itself
595       // is still alive
596 
597       // Update the metadata.
598       apinfo_change_cur_bytes_live(bk->ap,
599                                    (Long)new_req_szB - (Long)bk->req_szB);
600       bk->payload = (Addr)p_new;
601       bk->req_szB = new_req_szB;
602 
603       // and re-add
604       Bool present
605          = VG_(addToFM)( interval_tree, (UWord)bk, (UWord)0/*no val*/);
606       tl_assert(!present);
607       fbc_cache0 = fbc_cache1 = NULL;
608 
609       return p_new;
610    }
611    /*NOTREACHED*/
612    tl_assert(0);
613 }
614 
615 
616 //------------------------------------------------------------//
617 //--- malloc() et al replacement wrappers                  ---//
618 //------------------------------------------------------------//
619 
dh_malloc(ThreadId tid,SizeT szB)620 static void* dh_malloc ( ThreadId tid, SizeT szB )
621 {
622    return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
623 }
624 
dh___builtin_new(ThreadId tid,SizeT szB)625 static void* dh___builtin_new ( ThreadId tid, SizeT szB )
626 {
627    return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
628 }
629 
dh___builtin_vec_new(ThreadId tid,SizeT szB)630 static void* dh___builtin_vec_new ( ThreadId tid, SizeT szB )
631 {
632    return new_block( tid, NULL, szB, VG_(clo_alignment), /*is_zeroed*/False );
633 }
634 
dh_calloc(ThreadId tid,SizeT m,SizeT szB)635 static void* dh_calloc ( ThreadId tid, SizeT m, SizeT szB )
636 {
637    return new_block( tid, NULL, m*szB, VG_(clo_alignment), /*is_zeroed*/True );
638 }
639 
dh_memalign(ThreadId tid,SizeT alignB,SizeT szB)640 static void *dh_memalign ( ThreadId tid, SizeT alignB, SizeT szB )
641 {
642    return new_block( tid, NULL, szB, alignB, False );
643 }
644 
dh_free(ThreadId tid,void * p)645 static void dh_free ( ThreadId tid __attribute__((unused)), void* p )
646 {
647    die_block( p, /*custom_free*/False );
648 }
649 
dh___builtin_delete(ThreadId tid,void * p)650 static void dh___builtin_delete ( ThreadId tid, void* p )
651 {
652    die_block( p, /*custom_free*/False);
653 }
654 
dh___builtin_vec_delete(ThreadId tid,void * p)655 static void dh___builtin_vec_delete ( ThreadId tid, void* p )
656 {
657    die_block( p, /*custom_free*/False );
658 }
659 
dh_realloc(ThreadId tid,void * p_old,SizeT new_szB)660 static void* dh_realloc ( ThreadId tid, void* p_old, SizeT new_szB )
661 {
662    if (p_old == NULL) {
663       return dh_malloc(tid, new_szB);
664    }
665    if (new_szB == 0) {
666       dh_free(tid, p_old);
667       return NULL;
668    }
669    return renew_block(tid, p_old, new_szB);
670 }
671 
dh_malloc_usable_size(ThreadId tid,void * p)672 static SizeT dh_malloc_usable_size ( ThreadId tid, void* p )
673 {
674    tl_assert(0);
675 //zz   HP_Chunk* hc = VG_(HT_lookup)( malloc_list, (UWord)p );
676 //zz
677 //zz   return ( hc ? hc->req_szB + hc->slop_szB : 0 );
678 }
679 
680 //------------------------------------------------------------//
681 //--- memory references                                    ---//
682 //------------------------------------------------------------//
683 
684 static
inc_histo_for_block(Block * bk,Addr addr,UWord szB)685 void inc_histo_for_block ( Block* bk, Addr addr, UWord szB )
686 {
687    UWord i, offMin, offMax1;
688    offMin = addr - bk->payload;
689    tl_assert(offMin < bk->req_szB);
690    offMax1 = offMin + szB;
691    if (offMax1 > bk->req_szB)
692       offMax1 = bk->req_szB;
693    //VG_(printf)("%lu %lu   (size of block %lu)\n", offMin, offMax1, bk->req_szB);
694    for (i = offMin; i < offMax1; i++) {
695       UShort n = bk->histoW[i];
696       if (n < 0xFFFF) n++;
697       bk->histoW[i] = n;
698    }
699 }
700 
701 static VG_REGPARM(2)
dh_handle_write(Addr addr,UWord szB)702 void dh_handle_write ( Addr addr, UWord szB )
703 {
704    Block* bk = find_Block_containing(addr);
705    if (bk) {
706       bk->n_writes += szB;
707       if (bk->histoW)
708          inc_histo_for_block(bk, addr, szB);
709    }
710 }
711 
712 static VG_REGPARM(2)
dh_handle_read(Addr addr,UWord szB)713 void dh_handle_read ( Addr addr, UWord szB )
714 {
715    Block* bk = find_Block_containing(addr);
716    if (bk) {
717       bk->n_reads += szB;
718       if (bk->histoW)
719          inc_histo_for_block(bk, addr, szB);
720    }
721 }
722 
723 
724 // Handle reads and writes by syscalls (read == kernel
725 // reads user space, write == kernel writes user space).
726 // Assumes no such read or write spans a heap block
727 // boundary and so we can treat it just as one giant
728 // read or write.
729 static
dh_handle_noninsn_read(CorePart part,ThreadId tid,const HChar * s,Addr base,SizeT size)730 void dh_handle_noninsn_read ( CorePart part, ThreadId tid, const HChar* s,
731                               Addr base, SizeT size )
732 {
733    switch (part) {
734       case Vg_CoreSysCall:
735          dh_handle_read(base, size);
736          break;
737       case Vg_CoreSysCallArgInMem:
738          break;
739       case Vg_CoreTranslate:
740          break;
741       default:
742          tl_assert(0);
743    }
744 }
745 
746 static
dh_handle_noninsn_write(CorePart part,ThreadId tid,Addr base,SizeT size)747 void dh_handle_noninsn_write ( CorePart part, ThreadId tid,
748                                Addr base, SizeT size )
749 {
750    switch (part) {
751       case Vg_CoreSysCall:
752          dh_handle_write(base, size);
753          break;
754       case Vg_CoreSignal:
755          break;
756       default:
757          tl_assert(0);
758    }
759 }
760 
761 
762 //------------------------------------------------------------//
763 //--- Instrumentation                                      ---//
764 //------------------------------------------------------------//
765 
766 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
767 #define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
768 #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
769 #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
770 #define assign(_t, _e)           IRStmt_WrTmp((_t), (_e))
771 
772 static
add_counter_update(IRSB * sbOut,Int n)773 void add_counter_update(IRSB* sbOut, Int n)
774 {
775    #if defined(VG_BIGENDIAN)
776    # define END Iend_BE
777    #elif defined(VG_LITTLEENDIAN)
778    # define END Iend_LE
779    #else
780    # error "Unknown endianness"
781    #endif
782    // Add code to increment 'g_guest_instrs_executed' by 'n', like this:
783    //   WrTmp(t1, Load64(&g_guest_instrs_executed))
784    //   WrTmp(t2, Add64(RdTmp(t1), Const(n)))
785    //   Store(&g_guest_instrs_executed, t2)
786    IRTemp t1 = newIRTemp(sbOut->tyenv, Ity_I64);
787    IRTemp t2 = newIRTemp(sbOut->tyenv, Ity_I64);
788    IRExpr* counter_addr = mkIRExpr_HWord( (HWord)&g_guest_instrs_executed );
789 
790    IRStmt* st1 = assign(t1, IRExpr_Load(END, Ity_I64, counter_addr));
791    IRStmt* st2 = assign(t2, binop(Iop_Add64, mkexpr(t1), mkU64(n)));
792    IRStmt* st3 = IRStmt_Store(END, counter_addr, mkexpr(t2));
793 
794    addStmtToIRSB( sbOut, st1 );
795    addStmtToIRSB( sbOut, st2 );
796    addStmtToIRSB( sbOut, st3 );
797 }
798 
799 static
addMemEvent(IRSB * sbOut,Bool isWrite,Int szB,IRExpr * addr,Int goff_sp)800 void addMemEvent(IRSB* sbOut, Bool isWrite, Int szB, IRExpr* addr,
801                  Int goff_sp)
802 {
803    IRType   tyAddr   = Ity_INVALID;
804    const HChar* hName= NULL;
805    void*    hAddr    = NULL;
806    IRExpr** argv     = NULL;
807    IRDirty* di       = NULL;
808 
809    const Int THRESH = 4096 * 4; // somewhat arbitrary
810    const Int rz_szB = VG_STACK_REDZONE_SZB;
811 
812    tyAddr = typeOfIRExpr( sbOut->tyenv, addr );
813    tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
814 
815    if (isWrite) {
816       hName = "dh_handle_write";
817       hAddr = &dh_handle_write;
818    } else {
819       hName = "dh_handle_read";
820       hAddr = &dh_handle_read;
821    }
822 
823    argv = mkIRExprVec_2( addr, mkIRExpr_HWord(szB) );
824 
825    /* Add the helper. */
826    tl_assert(hName);
827    tl_assert(hAddr);
828    tl_assert(argv);
829    di = unsafeIRDirty_0_N( 2/*regparms*/,
830                            hName, VG_(fnptr_to_fnentry)( hAddr ),
831                            argv );
832 
833    /* Generate the guard condition: "(addr - (SP - RZ)) >u N", for
834       some arbitrary N.  If that fails then addr is in the range (SP -
835       RZ .. SP + N - RZ).  If N is smallish (a page?) then we can say
836       addr is within a page of SP and so can't possibly be a heap
837       access, and so can be skipped. */
838    IRTemp sp = newIRTemp(sbOut->tyenv, tyAddr);
839    addStmtToIRSB( sbOut, assign(sp, IRExpr_Get(goff_sp, tyAddr)));
840 
841    IRTemp sp_minus_rz = newIRTemp(sbOut->tyenv, tyAddr);
842    addStmtToIRSB(
843       sbOut,
844       assign(sp_minus_rz,
845              tyAddr == Ity_I32
846                 ? binop(Iop_Sub32, mkexpr(sp), mkU32(rz_szB))
847                 : binop(Iop_Sub64, mkexpr(sp), mkU64(rz_szB)))
848    );
849 
850    IRTemp diff = newIRTemp(sbOut->tyenv, tyAddr);
851    addStmtToIRSB(
852       sbOut,
853       assign(diff,
854              tyAddr == Ity_I32
855                 ? binop(Iop_Sub32, addr, mkexpr(sp_minus_rz))
856                 : binop(Iop_Sub64, addr, mkexpr(sp_minus_rz)))
857    );
858 
859    IRTemp guard = newIRTemp(sbOut->tyenv, Ity_I1);
860    addStmtToIRSB(
861       sbOut,
862       assign(guard,
863              tyAddr == Ity_I32
864                 ? binop(Iop_CmpLT32U, mkU32(THRESH), mkexpr(diff))
865                 : binop(Iop_CmpLT64U, mkU64(THRESH), mkexpr(diff)))
866    );
867    di->guard = mkexpr(guard);
868 
869    addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
870 }
871 
872 static
dh_instrument(VgCallbackClosure * closure,IRSB * sbIn,VexGuestLayout * layout,VexGuestExtents * vge,VexArchInfo * archinfo_host,IRType gWordTy,IRType hWordTy)873 IRSB* dh_instrument ( VgCallbackClosure* closure,
874                       IRSB* sbIn,
875                       VexGuestLayout* layout,
876                       VexGuestExtents* vge,
877                       VexArchInfo* archinfo_host,
878                       IRType gWordTy, IRType hWordTy )
879 {
880    Int   i, n = 0;
881    IRSB* sbOut;
882    IRTypeEnv* tyenv = sbIn->tyenv;
883 
884    const Int goff_sp = layout->offset_SP;
885 
886    // We increment the instruction count in two places:
887    // - just before any Ist_Exit statements;
888    // - just before the IRSB's end.
889    // In the former case, we zero 'n' and then continue instrumenting.
890 
891    sbOut = deepCopyIRSBExceptStmts(sbIn);
892 
893    // Copy verbatim any IR preamble preceding the first IMark
894    i = 0;
895    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
896       addStmtToIRSB( sbOut, sbIn->stmts[i] );
897       i++;
898    }
899 
900    for (/*use current i*/; i < sbIn->stmts_used; i++) {
901       IRStmt* st = sbIn->stmts[i];
902 
903       if (!st || st->tag == Ist_NoOp) continue;
904 
905       switch (st->tag) {
906 
907          case Ist_IMark: {
908             n++;
909             break;
910          }
911 
912          case Ist_Exit: {
913             if (n > 0) {
914                // Add an increment before the Exit statement, then reset 'n'.
915                add_counter_update(sbOut, n);
916                n = 0;
917             }
918             break;
919          }
920 
921          case Ist_WrTmp: {
922             IRExpr* data = st->Ist.WrTmp.data;
923             if (data->tag == Iex_Load) {
924                IRExpr* aexpr = data->Iex.Load.addr;
925                // Note also, endianness info is ignored.  I guess
926                // that's not interesting.
927                addMemEvent( sbOut, False/*!isWrite*/,
928                             sizeofIRType(data->Iex.Load.ty),
929                             aexpr, goff_sp );
930             }
931             break;
932          }
933 
934          case Ist_Store: {
935             IRExpr* data  = st->Ist.Store.data;
936             IRExpr* aexpr = st->Ist.Store.addr;
937             addMemEvent( sbOut, True/*isWrite*/,
938                          sizeofIRType(typeOfIRExpr(tyenv, data)),
939                          aexpr, goff_sp );
940             break;
941          }
942 
943          case Ist_Dirty: {
944             Int      dataSize;
945             IRDirty* d = st->Ist.Dirty.details;
946             if (d->mFx != Ifx_None) {
947                /* This dirty helper accesses memory.  Collect the details. */
948                tl_assert(d->mAddr != NULL);
949                tl_assert(d->mSize != 0);
950                dataSize = d->mSize;
951                // Large (eg. 28B, 108B, 512B on x86) data-sized
952                // instructions will be done inaccurately, but they're
953                // very rare and this avoids errors from hitting more
954                // than two cache lines in the simulation.
955                if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
956                   addMemEvent( sbOut, False/*!isWrite*/,
957                                dataSize, d->mAddr, goff_sp );
958                if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
959                   addMemEvent( sbOut, True/*isWrite*/,
960                                dataSize, d->mAddr, goff_sp );
961             } else {
962                tl_assert(d->mAddr == NULL);
963                tl_assert(d->mSize == 0);
964             }
965             break;
966          }
967 
968          case Ist_CAS: {
969             /* We treat it as a read and a write of the location.  I
970                think that is the same behaviour as it was before IRCAS
971                was introduced, since prior to that point, the Vex
972                front ends would translate a lock-prefixed instruction
973                into a (normal) read followed by a (normal) write. */
974             Int    dataSize;
975             IRCAS* cas = st->Ist.CAS.details;
976             tl_assert(cas->addr != NULL);
977             tl_assert(cas->dataLo != NULL);
978             dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
979             if (cas->dataHi != NULL)
980                dataSize *= 2; /* since it's a doubleword-CAS */
981             addMemEvent( sbOut, False/*!isWrite*/,
982                          dataSize, cas->addr, goff_sp );
983             addMemEvent( sbOut, True/*isWrite*/,
984                          dataSize, cas->addr, goff_sp );
985             break;
986          }
987 
988          case Ist_LLSC: {
989             IRType dataTy;
990             if (st->Ist.LLSC.storedata == NULL) {
991                /* LL */
992                dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
993                addMemEvent( sbOut, False/*!isWrite*/,
994                             sizeofIRType(dataTy),
995                             st->Ist.LLSC.addr, goff_sp );
996             } else {
997                /* SC */
998                dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
999                addMemEvent( sbOut, True/*isWrite*/,
1000                             sizeofIRType(dataTy),
1001                             st->Ist.LLSC.addr, goff_sp );
1002             }
1003             break;
1004          }
1005 
1006          default:
1007             break;
1008       }
1009 
1010       addStmtToIRSB( sbOut, st );
1011    }
1012 
1013    if (n > 0) {
1014       // Add an increment before the SB end.
1015       add_counter_update(sbOut, n);
1016    }
1017    return sbOut;
1018 }
1019 
1020 #undef binop
1021 #undef mkexpr
1022 #undef mkU32
1023 #undef mkU64
1024 #undef assign
1025 
1026 
1027 //------------------------------------------------------------//
1028 //--- Command line args                                    ---//
1029 //------------------------------------------------------------//
1030 
1031 // FORWARDS
1032 static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
1033                               /*OUT*/Bool* increasingP,
1034                               const HChar* metric_name );
1035 
1036 static Int    clo_show_top_n = 10;
1037 static const HChar *clo_sort_by = "max-bytes-live";
1038 
dh_process_cmd_line_option(const HChar * arg)1039 static Bool dh_process_cmd_line_option(const HChar* arg)
1040 {
1041    if VG_BINT_CLO(arg, "--show-top-n", clo_show_top_n, 1, 100000) {}
1042 
1043    else if VG_STR_CLO(arg, "--sort-by", clo_sort_by) {
1044        ULong (*dummyFn)(APInfo*);
1045        Bool dummyB;
1046        Bool ok = identify_metric( &dummyFn, &dummyB, clo_sort_by);
1047        if (!ok)
1048           return False;
1049        // otherwise it's OK, in which case leave it alone.
1050        // show_top_n_apinfos will later convert the string by a
1051        // second call to identify_metric.
1052    }
1053 
1054    else
1055       return VG_(replacement_malloc_process_cmd_line_option)(arg);
1056 
1057    return True;
1058 }
1059 
1060 
dh_print_usage(void)1061 static void dh_print_usage(void)
1062 {
1063    VG_(printf)(
1064 "    --show-top-n=number       show the top <number> alloc points [10]\n"
1065 "    --sort-by=string\n"
1066 "            sort the allocation points by the metric\n"
1067 "            defined by <string>, thusly:\n"
1068 "                max-bytes-live    maximum live bytes [default]\n"
1069 "                tot-bytes-allocd  total allocation (turnover)\n"
1070 "                max-blocks-live   maximum live blocks\n"
1071    );
1072 }
1073 
dh_print_debug_usage(void)1074 static void dh_print_debug_usage(void)
1075 {
1076    VG_(printf)(
1077 "    (none)\n"
1078    );
1079 }
1080 
1081 
1082 //------------------------------------------------------------//
1083 //--- Finalisation                                         ---//
1084 //------------------------------------------------------------//
1085 
show_N_div_100(HChar * buf,ULong n)1086 static void show_N_div_100( /*OUT*/HChar* buf, ULong n )
1087 {
1088    ULong nK = n / 100;
1089    ULong nR = n % 100;
1090    VG_(sprintf)(buf, "%llu.%s%llu", nK,
1091                 nR < 10 ? "0" : "",
1092                 nR);
1093 }
1094 
show_APInfo(APInfo * api)1095 static void show_APInfo ( APInfo* api )
1096 {
1097    HChar bufA[80];
1098    VG_(memset)(bufA, 0, sizeof(bufA));
1099    if (api->tot_blocks > 0) {
1100       show_N_div_100( bufA, ((ULong)api->tot_bytes * 100ULL)
1101                               / (ULong)api->tot_blocks );
1102    } else {
1103       bufA[0] = 'N'; bufA[1] = 'a'; bufA[2] = 'N';
1104    }
1105 
1106    VG_(umsg)("max-live:    %'llu in %'llu blocks\n",
1107              api->max_bytes_live, api->max_blocks_live);
1108    VG_(umsg)("tot-alloc:   %'llu in %'llu blocks (avg size %s)\n",
1109              api->tot_bytes, api->tot_blocks, bufA);
1110 
1111    tl_assert(api->tot_blocks >= api->max_blocks_live);
1112    tl_assert(api->tot_bytes >= api->max_bytes_live);
1113 
1114    if (api->deaths > 0) {
1115       // Average Age at Death
1116       ULong aad = api->deaths == 0
1117                   ? 0 : (api->death_ages_sum / api->deaths);
1118       // AAD as a fraction of the total program lifetime (so far)
1119       // measured in ten-thousand-ths (aad_frac_10k == 10000 means the
1120       // complete lifetime of the program.
1121       ULong aad_frac_10k
1122          = g_guest_instrs_executed == 0
1123            ? 0 : (10000ULL * aad) / g_guest_instrs_executed;
1124       HChar buf[16];
1125       show_N_div_100(buf, aad_frac_10k);
1126       VG_(umsg)("deaths:      %'llu, at avg age %'llu "
1127                 "(%s%% of prog lifetime)\n",
1128                 api->deaths, aad, buf );
1129    } else {
1130       VG_(umsg)("deaths:      none (none of these blocks were freed)\n");
1131    }
1132 
1133    HChar bufR[80], bufW[80];
1134    VG_(memset)(bufR, 0, sizeof(bufR));
1135    VG_(memset)(bufW, 0, sizeof(bufW));
1136    if (api->tot_bytes > 0) {
1137       show_N_div_100(bufR, (100ULL * api->n_reads) / api->tot_bytes);
1138       show_N_div_100(bufW, (100ULL * api->n_writes) / api->tot_bytes);
1139    } else {
1140       VG_(strcat)(bufR, "Inf");
1141       VG_(strcat)(bufW, "Inf");
1142    }
1143 
1144    VG_(umsg)("acc-ratios:  %s rd, %s wr "
1145              " (%'llu b-read, %'llu b-written)\n",
1146              bufR, bufW,
1147              api->n_reads, api->n_writes);
1148 
1149    VG_(pp_ExeContext)(api->ap);
1150 
1151    if (api->histo && api->xsize_tag == Exactly) {
1152       VG_(umsg)("\nAggregated access counts by offset:\n");
1153       VG_(umsg)("\n");
1154       UWord i;
1155       if (api->xsize > 0)
1156          VG_(umsg)("[   0]  ");
1157       for (i = 0; i < api->xsize; i++) {
1158          if (i > 0 && (i % 16) == 0 && i != api->xsize-1) {
1159             VG_(umsg)("\n");
1160             VG_(umsg)("[%4lu]  ", i);
1161          }
1162          VG_(umsg)("%u ", api->histo[i]);
1163       }
1164       VG_(umsg)("\n");
1165    }
1166 }
1167 
1168 
1169 /* Metric-access functions for APInfos. */
get_metric__max_bytes_live(APInfo * api)1170 static ULong get_metric__max_bytes_live ( APInfo* api ) {
1171    return api->max_bytes_live;
1172 }
get_metric__tot_bytes(APInfo * api)1173 static ULong get_metric__tot_bytes ( APInfo* api ) {
1174    return api->tot_bytes;
1175 }
get_metric__max_blocks_live(APInfo * api)1176 static ULong get_metric__max_blocks_live ( APInfo* api ) {
1177    return api->max_blocks_live;
1178 }
1179 
1180 /* Given a string, return the metric-access function and also a Bool
1181    indicating whether we want increasing or decreasing values of the
1182    metric.  This is used twice, once in command line processing, and
1183    then again in show_top_n_apinfos.  Returns False if the given
1184    string could not be identified.*/
identify_metric(ULong (** get_metricP)(APInfo *),Bool * increasingP,const HChar * metric_name)1185 static Bool identify_metric ( /*OUT*/ULong(**get_metricP)(APInfo*),
1186                               /*OUT*/Bool* increasingP,
1187                               const HChar* metric_name )
1188 {
1189    if (0 == VG_(strcmp)(metric_name, "max-bytes-live")) {
1190       *get_metricP = get_metric__max_bytes_live;
1191       *increasingP = False;
1192       return True;
1193    }
1194    if (0 == VG_(strcmp)(metric_name, "tot-bytes-allocd")) {
1195       *get_metricP = get_metric__tot_bytes;
1196       *increasingP = False;
1197       return True;
1198    }
1199    if (0 == VG_(strcmp)(metric_name, "max-blocks-live")) {
1200       *get_metricP = get_metric__max_blocks_live;
1201       *increasingP = False;
1202       return True;
1203    }
1204    return False;
1205 }
1206 
1207 
show_top_n_apinfos(void)1208 static void show_top_n_apinfos ( void )
1209 {
1210    Int   i;
1211    UWord keyW, valW;
1212    ULong (*get_metric)(APInfo*);
1213    Bool  increasing;
1214 
1215    const HChar* metric_name = clo_sort_by;
1216    tl_assert(metric_name); // ensured by clo processing
1217 
1218    Bool ok = identify_metric( &get_metric, &increasing, metric_name );
1219    tl_assert(ok); // ensured by clo processing
1220 
1221    VG_(umsg)("\n");
1222    VG_(umsg)("======== ORDERED BY %s \"%s\": "
1223              "top %d allocators ========\n",
1224              increasing ? "increasing" : "decreasing",
1225              metric_name, clo_show_top_n );
1226 
1227    // Clear all .shown bits
1228    VG_(initIterFM)( apinfo );
1229    while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
1230       APInfo* api = (APInfo*)valW;
1231       tl_assert(api && api->ap == (ExeContext*)keyW);
1232       api->shown = False;
1233    }
1234    VG_(doneIterFM)( apinfo );
1235 
1236    // Now print the top N entries.  Each one requires a
1237    // complete scan of the set.  Duh.
1238    for (i = 0; i < clo_show_top_n; i++) {
1239       ULong   best_metric = increasing ? ~0ULL : 0ULL;
1240       APInfo* best_api    = NULL;
1241 
1242       VG_(initIterFM)( apinfo );
1243       while (VG_(nextIterFM)( apinfo, &keyW, &valW )) {
1244          APInfo* api = (APInfo*)valW;
1245          if (api->shown)
1246             continue;
1247          ULong metric = get_metric(api);
1248          if (increasing ? (metric < best_metric) : (metric > best_metric)) {
1249             best_metric = metric;
1250             best_api = api;
1251          }
1252       }
1253       VG_(doneIterFM)( apinfo );
1254 
1255       if (!best_api)
1256          break; // all APIs have been shown.  Stop.
1257 
1258       VG_(umsg)("\n");
1259       VG_(umsg)("-------------------- %d of %d --------------------\n",
1260                 i+1, clo_show_top_n );
1261       show_APInfo(best_api);
1262       best_api->shown = True;
1263    }
1264 
1265    VG_(umsg)("\n");
1266 }
1267 
1268 
dh_fini(Int exit_status)1269 static void dh_fini(Int exit_status)
1270 {
1271    // Before printing statistics, we must harvest access counts for
1272    // all the blocks that are still alive.  Not doing so gives
1273    // access ratios which are too low (zero, in the worst case)
1274    // for such blocks, since the accesses that do get made will
1275    // (if we skip this step) not get folded into the AP summaries.
1276    UWord keyW, valW;
1277    VG_(initIterFM)( interval_tree );
1278    while (VG_(nextIterFM)( interval_tree, &keyW, &valW )) {
1279       Block* bk = (Block*)keyW;
1280       tl_assert(valW == 0);
1281       tl_assert(bk);
1282       retire_Block(bk, False/*!because_freed*/);
1283    }
1284    VG_(doneIterFM)( interval_tree );
1285 
1286    // show results
1287    VG_(umsg)("======== SUMMARY STATISTICS ========\n");
1288    VG_(umsg)("\n");
1289    VG_(umsg)("guest_insns:  %'llu\n", g_guest_instrs_executed);
1290    VG_(umsg)("\n");
1291    VG_(umsg)("max_live:     %'llu in %'llu blocks\n",
1292              g_max_bytes_live, g_max_blocks_live);
1293    VG_(umsg)("\n");
1294    VG_(umsg)("tot_alloc:    %'llu in %'llu blocks\n",
1295              g_tot_bytes, g_tot_blocks);
1296    VG_(umsg)("\n");
1297    if (g_tot_bytes > 0) {
1298       VG_(umsg)("insns per allocated byte: %'llu\n",
1299                 g_guest_instrs_executed / g_tot_bytes);
1300       VG_(umsg)("\n");
1301    }
1302 
1303    show_top_n_apinfos();
1304 
1305    VG_(umsg)("\n");
1306    VG_(umsg)("\n");
1307    VG_(umsg)("==============================================================\n");
1308    VG_(umsg)("\n");
1309    VG_(umsg)("Some hints: (see --help for command line option details):\n");
1310    VG_(umsg)("\n");
1311    VG_(umsg)("* summary stats for whole program are at the top of this output\n");
1312    VG_(umsg)("\n");
1313    VG_(umsg)("* --show-top-n=  controls how many alloc points are shown.\n");
1314    VG_(umsg)("                 You probably want to set it much higher than\n");
1315    VG_(umsg)("                 the default value (10)\n");
1316    VG_(umsg)("\n");
1317    VG_(umsg)("* --sort-by=     specifies the sort key for output.\n");
1318    VG_(umsg)("                 See --help for details.\n");
1319    VG_(umsg)("\n");
1320    VG_(umsg)("* Each allocation stack, by default 12 frames, counts as\n");
1321    VG_(umsg)("  a separate alloc point.  This causes the data to be spread out\n");
1322    VG_(umsg)("  over far too many alloc points.  I strongly suggest using\n");
1323    VG_(umsg)("  --num-callers=4 or some such, to reduce the spreading.\n");
1324    VG_(umsg)("\n");
1325 
1326    if (VG_(clo_stats)) {
1327       VG_(dmsg)(" dhat: find_Block_containing:\n");
1328       VG_(dmsg)("             found: %'lu (%'lu cached + %'lu uncached)\n",
1329                 stats__n_fBc_cached + stats__n_fBc_uncached,
1330                 stats__n_fBc_cached,
1331                 stats__n_fBc_uncached);
1332       VG_(dmsg)("          notfound: %'lu\n", stats__n_fBc_notfound);
1333       VG_(dmsg)("\n");
1334    }
1335 }
1336 
1337 
1338 //------------------------------------------------------------//
1339 //--- Initialisation                                       ---//
1340 //------------------------------------------------------------//
1341 
dh_post_clo_init(void)1342 static void dh_post_clo_init(void)
1343 {
1344 }
1345 
dh_pre_clo_init(void)1346 static void dh_pre_clo_init(void)
1347 {
1348    VG_(details_name)            ("DHAT");
1349    VG_(details_version)         (NULL);
1350    VG_(details_description)     ("a dynamic heap analysis tool");
1351    VG_(details_copyright_author)(
1352       "Copyright (C) 2010-2013, and GNU GPL'd, by Mozilla Inc");
1353    VG_(details_bug_reports_to)  (VG_BUGS_TO);
1354 
1355    // Basic functions.
1356    VG_(basic_tool_funcs)          (dh_post_clo_init,
1357                                    dh_instrument,
1358                                    dh_fini);
1359 //zz
1360    // Needs.
1361    VG_(needs_libc_freeres)();
1362    VG_(needs_command_line_options)(dh_process_cmd_line_option,
1363                                    dh_print_usage,
1364                                    dh_print_debug_usage);
1365 //zz   VG_(needs_client_requests)     (dh_handle_client_request);
1366 //zz   VG_(needs_sanity_checks)       (dh_cheap_sanity_check,
1367 //zz                                   dh_expensive_sanity_check);
1368    VG_(needs_malloc_replacement)  (dh_malloc,
1369                                    dh___builtin_new,
1370                                    dh___builtin_vec_new,
1371                                    dh_memalign,
1372                                    dh_calloc,
1373                                    dh_free,
1374                                    dh___builtin_delete,
1375                                    dh___builtin_vec_delete,
1376                                    dh_realloc,
1377                                    dh_malloc_usable_size,
1378                                    0 );
1379 
1380    VG_(track_pre_mem_read)        ( dh_handle_noninsn_read );
1381    //VG_(track_pre_mem_read_asciiz) ( check_mem_is_defined_asciiz );
1382    VG_(track_post_mem_write)      ( dh_handle_noninsn_write );
1383 
1384    tl_assert(!interval_tree);
1385    tl_assert(!fbc_cache0);
1386    tl_assert(!fbc_cache1);
1387 
1388    interval_tree = VG_(newFM)( VG_(malloc),
1389                                "dh.main.interval_tree.1",
1390                                VG_(free),
1391                                interval_tree_Cmp );
1392 
1393    apinfo = VG_(newFM)( VG_(malloc),
1394                         "dh.main.apinfo.1",
1395                         VG_(free),
1396                         NULL/*unboxedcmp*/ );
1397 }
1398 
1399 VG_DETERMINE_INTERFACE_VERSION(dh_pre_clo_init)
1400 
1401 //--------------------------------------------------------------------//
1402 //--- end                                                dh_main.c ---//
1403 //--------------------------------------------------------------------//
1404