• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- An xtree, tree of stacktraces with data            m_xtree.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2016-2017 Philippe Waroquiers
11 
12    This code generalises the XTree idea that was implemented in
13    the massif tool in Valgrind versions <= 3.12, which is
14       Copyright (C) 2005-2017 Nicholas Nethercote
15        njn@valgrind.org
16 
17    The XTree implementation in this file is however implemented completely
18    differently. Some code has been re-used for the production of the
19    massif file header (e.g. FP_cmd function).
20 
21    This program is free software; you can redistribute it and/or
22    modify it under the terms of the GNU General Public License as
23    published by the Free Software Foundation; either version 2 of the
24    License, or (at your option) any later version.
25 
26    This program is distributed in the hope that it will be useful, but
27    WITHOUT ANY WARRANTY; without even the implied warranty of
28    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
29    General Public License for more details.
30 
31    You should have received a copy of the GNU General Public License
32    along with this program; if not, write to the Free Software
33    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
34    02111-1307, USA.
35 
36    The GNU General Public License is contained in the file COPYING.
37 */
38 
39 #include "pub_core_basics.h"
40 #include "pub_core_debuglog.h"
41 #include "pub_core_clientstate.h"
42 #include "pub_core_stacktrace.h"
43 #include "pub_core_execontext.h"
44 #include "pub_core_libcbase.h"
45 #include "pub_core_libcassert.h"
46 #include "pub_core_libcfile.h"
47 #include "pub_core_libcprint.h"
48 #include "pub_core_libcproc.h"
49 #include "pub_core_hashtable.h"
50 #include "pub_core_mallocfree.h"
51 #include "pub_core_options.h"
52 #include "pub_core_debuginfo.h"
53 #include "pub_core_deduppoolalloc.h"
54 #include "pub_core_xtree.h"    /* self */
55 
56 #define DMSG(level, ...) (level <= VG_(debugLog_getLevel)() ?         \
57                           VG_(dmsg)(__VA_ARGS__)                      \
58                           : 0)
59 
60 /* Defines the relevant part of an ec. This is shared between an xt
61    and its snapshots (see XT_shared XArray of xec). */
62 typedef
63    struct _xec {
64      ExeContext* ec;
65      UShort top;        // The first ips of ec to take into account.
66      UShort n_ips_sel;  // The nr of ips from top to take into account.
67    }
68    xec;
69 
70 /* XT_shared maintains the information shared between an XT and all
71    its snapshots. */
72 typedef
73    struct _XT_shared {
74       UWord nrRef; /* nr of XTrees referencing this shared memory. */
75 
76       Alloc_Fn_t alloc_fn;                /* alloc fn (nofail) */
77       const HChar* cc;                    /* cost centre for alloc */
78       Free_Fn_t free_fn;                  /* free fn */
79 
80       /* The data associated to each ec is stored in 2 arrays:
81            an xec array, shared between an xt and all its snapshots.
82            a  data array, private to each XTree.
83          For an ec with an ECU ecu, d4ecu2xecu[ecu/4] gives the offset in
84          xec and data arrays where the ec information is located (this
85          indirection is used to avoid huge xec and data arrays, in
86          case an XTree contains data only for a small number of ec.
87          The offset in the xec and data array is used as xtree ec unique
88          id i.e. an xecu. */
89 
90       UInt  d4ecu2xecu_sz; /* size of d4ecu2xecu (in nr of elements). */
91       UInt* d4ecu2xecu;
92 
93       /* ec information common to an xt and its snapshots. */
94       XArray* xec; /* XArray of xec, indexed by xecu (== d4ecu2xecu[ecu/4]). */
95 
96       /* XArray of xecu, sorted by StackTrace ips[top..top+n_ips_sel-1].
97          See ips_order_cmp. */
98       XArray* ips_order_xecu;
99    } XT_shared;
100 
101 /* NO_OFFSET indicates in d4ecu2xecu  there is no data (yet) for this ec
102    (with the index ecu/4). */
103 #define NO_OFFSET 0xffffffff
104 
new_XT_shared(Alloc_Fn_t alloc_fn,const HChar * cc,void (* free_fn)(void *))105 static XT_shared* new_XT_shared (Alloc_Fn_t alloc_fn,
106                                  const  HChar* cc,
107                                  void   (*free_fn)(void*))
108 {
109    XT_shared* shared;
110 
111    vg_assert(alloc_fn);
112    vg_assert(cc);
113    vg_assert(free_fn);
114    shared = alloc_fn(cc, sizeof(*shared));
115    shared->nrRef = 0;
116    shared->alloc_fn = alloc_fn;
117    shared->cc = cc;
118    shared->free_fn = free_fn;
119 
120    shared->d4ecu2xecu_sz = 0;
121    shared->d4ecu2xecu = NULL;
122    shared->xec = VG_(newXA)(alloc_fn, cc, free_fn, sizeof(xec));
123    shared->ips_order_xecu = NULL; // Allocated when needed.
124 
125    return shared;
126 }
127 
delete_XT_shared(XT_shared * shared)128 static void delete_XT_shared (XT_shared* shared)
129 {
130    vg_assert(shared->nrRef == 0);
131    shared->free_fn(shared->d4ecu2xecu);
132    VG_(deleteXA)(shared->xec);
133    if (shared->ips_order_xecu != NULL)
134       VG_(deleteXA)(shared->ips_order_xecu);
135    shared->free_fn(shared);
136 }
137 
138 /* Compare 2 entries in ips_order_xecu by StackTrace elements.
139    In case stack traces are of different length, an 'absent' ips is
140    considered smaller than any other address. */
141 static XArray* xec_data_for_sort; // Needed to translate an xecu into an xec
ips_order_cmp(const void * vleft,const void * vright)142 static Int ips_order_cmp(const void* vleft, const void* vright)
143 {
144    const Xecu left_xecu  = *(const Xecu*)vleft;
145    const Xecu right_xecu = *(const Xecu*)vright;
146    const xec* left  = VG_(indexXA)(xec_data_for_sort, left_xecu);
147    const xec* right = VG_(indexXA)(xec_data_for_sort, right_xecu);
148    const StackTrace left_ips  = VG_(get_ExeContext_StackTrace)(left->ec)
149       + left->top;
150    const StackTrace right_ips = VG_(get_ExeContext_StackTrace)(right->ec)
151       + right->top;
152    UInt i;
153 
154    const UInt c_n_ips_sel = left->n_ips_sel < right->n_ips_sel
155       ? left->n_ips_sel : right->n_ips_sel;
156 
157    // First see if we have a difference on the common nr of ips selected
158    for (i = 0; i < c_n_ips_sel; i++) {
159       if (left_ips[i] == right_ips[i]) continue;
160       if (left_ips[i] < right_ips[i]) return -1;
161       return  1;
162    }
163    // Common part is equal => compare lengths.
164    if (left->n_ips_sel < right->n_ips_sel) return -1;
165    if (left->n_ips_sel > right->n_ips_sel) return  1;
166    return 0;
167 }
168 
169 // If needed, build or refresh shared->ips_order_xecu
ensure_ips_order_xecu_valid(XT_shared * shared)170 static void ensure_ips_order_xecu_valid(XT_shared* shared)
171 {
172    UInt i;
173    UInt n_xecu;
174 
175    if (shared->ips_order_xecu == NULL) {
176       shared->ips_order_xecu = VG_(newXA)(shared->alloc_fn, shared->cc,
177                                           shared->free_fn, sizeof(Xecu));
178       VG_(hintSizeXA)(shared->ips_order_xecu, VG_(sizeXA)(shared->xec));
179       VG_(setCmpFnXA)(shared->ips_order_xecu, ips_order_cmp);
180    }
181 
182    if (VG_(sizeXA)(shared->xec) == VG_(sizeXA)(shared->ips_order_xecu))
183       return;
184 
185    n_xecu = VG_(sizeXA)(shared->xec);
186    for (i = VG_(sizeXA)(shared->ips_order_xecu); i < n_xecu; i++)
187       VG_(addToXA)(shared->ips_order_xecu, &i);
188 
189    xec_data_for_sort = shared->xec;
190    VG_(sortXA)(shared->ips_order_xecu);
191 }
192 
addRef_XT_shared(XT_shared * shared)193 static void addRef_XT_shared (XT_shared* shared)
194 {
195    shared->nrRef++;
196 }
197 
release_XT_shared(XT_shared * shared)198 static UWord release_XT_shared(XT_shared* shared)
199 {
200    UWord nrRef;
201 
202    vg_assert(shared->nrRef > 0);
203    nrRef = --shared->nrRef;
204    if (nrRef == 0)
205       delete_XT_shared(shared);
206    return nrRef;
207 }
208 
209 
210 struct _XTree {
211    Alloc_Fn_t alloc_fn;                /* alloc fn (nofail) */
212    const HChar* cc;                    /* cost centre for alloc */
213    Free_Fn_t free_fn;                  /* free fn */
214    Word  dataSzB;   /* data size in bytes */
215    XT_init_data_t init_data_fn;
216    XT_add_data_t add_data_fn;
217    XT_sub_data_t sub_data_fn;
218    XT_filter_IPs_t filter_IPs_fn;
219 
220    XT_shared* shared;
221 
222    HChar* tmp_data; /* temporary buffer, to insert new elements. */
223    XArray* data; /* of elements of size dataSzB */
224 };
225 
226 
VG_(XT_create)227 XTree* VG_(XT_create) ( Alloc_Fn_t alloc_fn,
228                         const HChar* cc,
229                         Free_Fn_t free_fn,
230                         Word dataSzB,
231                         XT_init_data_t init_data_fn,
232                         XT_add_data_t add_data_fn,
233                         XT_sub_data_t sub_data_fn,
234                         XT_filter_IPs_t filter_IPs_fn)
235 {
236    XTree* xt;
237 
238    /* check user-supplied info .. */
239    vg_assert(alloc_fn);
240    vg_assert(free_fn);
241    vg_assert(dataSzB >= 0);
242    vg_assert(init_data_fn);
243    vg_assert(add_data_fn);
244    vg_assert(sub_data_fn);
245 
246    xt = alloc_fn(cc, sizeof(struct _XTree) );
247    xt->alloc_fn  = alloc_fn;
248    xt->cc        = cc;
249    xt->free_fn   = free_fn;
250    xt->dataSzB   = dataSzB;
251    xt->init_data_fn = init_data_fn;
252    xt->add_data_fn = add_data_fn;
253    xt->sub_data_fn = sub_data_fn;
254    xt->filter_IPs_fn = filter_IPs_fn;
255 
256    xt->shared = new_XT_shared(alloc_fn, cc, free_fn);
257    addRef_XT_shared(xt->shared);
258    xt->tmp_data = alloc_fn(cc, xt->dataSzB);
259    xt->data =  VG_(newXA)(alloc_fn, cc, free_fn, dataSzB);
260 
261    return xt;
262 }
263 
VG_(XT_snapshot)264 XTree* VG_(XT_snapshot)(XTree* xt)
265 {
266    XTree* nxt;
267 
268    vg_assert(xt);
269 
270    nxt = xt->alloc_fn(xt->cc, sizeof(struct _XTree) );
271 
272    *nxt = *xt;
273    addRef_XT_shared(nxt->shared);
274    nxt->tmp_data = nxt->alloc_fn(nxt->cc, nxt->dataSzB);
275    nxt->data = VG_(cloneXA)(nxt->cc, xt->data);
276 
277    return nxt;
278 }
279 
VG_(XT_delete)280 void VG_(XT_delete) ( XTree* xt )
281 {
282    vg_assert(xt);
283 
284    release_XT_shared(xt->shared);
285    xt->free_fn(xt->tmp_data);
286    VG_(deleteXA)(xt->data);
287    xt->free_fn(xt);
288 }
289 
find_or_insert(XTree * xt,ExeContext * ec)290 static Xecu find_or_insert (XTree* xt, ExeContext* ec)
291 {
292 
293    const UInt d4ecu = VG_(get_ECU_from_ExeContext)(ec) / 4;
294    XT_shared* shared = xt->shared;
295 
296    /* First grow the d4ecu2xecu array if needed. */
297    if (d4ecu >= shared->d4ecu2xecu_sz) {
298       UInt old_sz = shared->d4ecu2xecu_sz;
299       UInt new_sz = (3 * d4ecu) / 2;
300 
301       if (new_sz < 1000)
302          new_sz = 1000;
303       shared->d4ecu2xecu = VG_(realloc)(xt->cc, shared->d4ecu2xecu,
304                                         new_sz * sizeof(UInt));
305       shared->d4ecu2xecu_sz = new_sz;
306       for (UInt i = old_sz; i < new_sz; i++)
307          shared->d4ecu2xecu[i] = NO_OFFSET;
308    }
309 
310    if (shared->d4ecu2xecu[d4ecu] == NO_OFFSET) {
311       xec xe;
312 
313       xe.ec = ec;
314       if (xt->filter_IPs_fn == NULL) {
315          xe.top = 0;
316          xe.n_ips_sel = (UShort)VG_(get_ExeContext_n_ips)(xe.ec);
317       } else {
318          UInt top;
319          UInt n_ips_sel = VG_(get_ExeContext_n_ips)(xe.ec);
320          xt->filter_IPs_fn(VG_(get_ExeContext_StackTrace)(xe.ec), n_ips_sel,
321                            &top, &n_ips_sel);
322          xe.top = (UShort)top;
323          xe.n_ips_sel = (UShort)n_ips_sel;
324       }
325       xt->init_data_fn(xt->tmp_data);
326       VG_(addToXA)(shared->xec, &xe);
327       shared->d4ecu2xecu[d4ecu] = (UInt)VG_(addToXA)(xt->data, xt->tmp_data);
328    }
329 
330    return shared->d4ecu2xecu[d4ecu];
331 }
332 
VG_(XT_add_to_ec)333 Xecu VG_(XT_add_to_ec) (XTree* xt, ExeContext* ec, const void* value)
334 {
335    Xecu xecu = find_or_insert(xt, ec);
336    void* data = VG_(indexXA)(xt->data, xecu);
337 
338    xt->add_data_fn(data, value);
339    return xecu;
340 }
341 
VG_(XT_sub_from_ec)342 Xecu VG_(XT_sub_from_ec) (XTree* xt, ExeContext* ec, const void* value)
343 {
344    Xecu xecu = find_or_insert(xt, ec);
345    void* data = VG_(indexXA)(xt->data, xecu);
346 
347    xt->sub_data_fn(data, value);
348    return xecu;
349 }
350 
VG_(XT_add_to_xecu)351 void VG_(XT_add_to_xecu) (XTree* xt, Xecu xecu, const void* value)
352 {
353    void* data = VG_(indexXA)(xt->data, xecu);
354    xt->add_data_fn(data, value);
355 }
356 
VG_(XT_sub_from_xecu)357 void VG_(XT_sub_from_xecu) (XTree* xt, Xecu xecu, const void* value)
358 {
359    void* data = VG_(indexXA)(xt->data, xecu);
360    xt->sub_data_fn(data, value);
361 }
362 
VG_(XT_n_ips_sel)363 UInt VG_(XT_n_ips_sel) (XTree* xt, Xecu xecu)
364 {
365    xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu);
366    return (UInt)xe->n_ips_sel;
367 }
368 
VG_(XT_get_ec_from_xecu)369 ExeContext* VG_(XT_get_ec_from_xecu) (XTree* xt, Xecu xecu)
370 {
371    xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu);
372    return xe->ec;
373 }
374 
xt_open(const HChar * outfilename)375 static VgFile* xt_open (const HChar* outfilename)
376 {
377    VgFile* fp;
378 
379    fp = VG_(fopen)(outfilename, VKI_O_CREAT|VKI_O_WRONLY|VKI_O_TRUNC,
380                    VKI_S_IRUSR|VKI_S_IWUSR);
381    if (fp == NULL) {
382       VG_(message)(Vg_UserMsg,
383                    "Error: can not open xtree output file `%s'\n",
384                    outfilename);
385    }
386    return fp;
387 }
388 
389 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
390 
391 // Print "cmd:" line.
FP_cmd(VgFile * fp)392 static void FP_cmd(VgFile* fp)
393 {
394    UInt i;
395 
396    FP("cmd: ");
397    FP("%s", VG_(args_the_exename));
398    for (i = 0; i < VG_(sizeXA)(VG_(args_for_client)); i++) {
399       HChar* arg = * (HChar**) VG_(indexXA)(VG_(args_for_client), i);
400       FP(" %s", arg);
401    }
402    FP("\n");
403 }
404 
405 /* ----------- Callgrind output ------------------------------------------- */
406 
407 /* Output a callgrind format element in compressed format:
408      "name=(pos)" or "name=(pos) value" (if value_new)
409    or not compressed format: "name=value"
410    VG_(clo_xtree_compress_strings) indicates if the compressed format is used.
411    name is the format element (e.g. fl, fn, cfi, cfn, ...).
412    pos is the value dictionary position, used for compressed format.
413    value_new is True if this is the first usage of value. */
FP_pos_str(VgFile * fp,const HChar * name,UInt pos,const HChar * value,Bool value_new)414 static void FP_pos_str(VgFile* fp, const HChar* name, UInt pos,
415                        const HChar* value, Bool value_new)
416 {
417    if (!VG_(clo_xtree_compress_strings))
418       FP("%s=%s\n", name, value);
419    else if (value_new)
420       FP("%s=(%d) %s\n", name, pos, value);
421    else
422       FP("%s=(%d)\n", name, pos);
423 }
424 
VG_(XT_callgrind_print)425 void VG_(XT_callgrind_print)
426      (XTree* xt,
427       const HChar* outfilename,
428       const HChar* events,
429       const HChar* (*img_value)(const void* value))
430 {
431    UInt n_xecu;
432    XT_shared* shared = xt->shared;
433    VgFile* fp = xt_open(outfilename);
434    DedupPoolAlloc* fnname_ddpa;
435    DedupPoolAlloc* filename_ddpa;
436    HChar* filename_buf = NULL;
437    UInt filename_buf_size = 0;
438    const HChar* filename_dir;
439    const HChar* filename_name;
440 
441    if (fp == NULL)
442       return;
443 
444    fnname_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn,
445                                  "XT_callgrind_print.fn", xt->free_fn);
446    filename_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn,
447                                    "XT_callgrind_print.fl", xt->free_fn);
448 
449    FP("# callgrind format\n");
450    FP("version: 1\n");
451    FP("creator: xtree-1\n");
452    FP("pid: %d\n", VG_(getpid)());
453    FP_cmd(fp);
454 
455    /* Currently, we only need/support line positions. */
456    FP("\npositions:%s\n", " line");
457 
458    /* Produce one "event:" line for each event, and the "events:" line. */
459    {
460       HChar strtok_events[VG_(strlen)(events)+1];
461       HChar* e;
462       HChar* ssaveptr;
463       HChar* p;
464 
465       VG_(strcpy)(strtok_events, events);
466       for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr);
467            e != NULL;
468            e = VG_(strtok_r)(NULL, ",", &ssaveptr))
469          FP("event: %s\n", e);
470       FP("events:");
471       VG_(strcpy)(strtok_events, events);
472       for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr);
473            e != NULL;
474            e = VG_(strtok_r)(NULL, ",", &ssaveptr)) {
475          p = e;
476          while (*p) {
477             if (*p == ':')
478                *p = 0;
479             p++;
480          }
481          FP(" %s", e);
482       }
483       FP("\n");
484    }
485    xt->init_data_fn(xt->tmp_data); // to compute totals
486 
487    n_xecu = VG_(sizeXA)(xt->data);
488    vg_assert (n_xecu <= VG_(sizeXA)(shared->xec));
489    for (Xecu xecu = 0; xecu < n_xecu; xecu++) {
490       xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
491       if (xe->n_ips_sel == 0)
492          continue;
493 
494       const HChar* img = img_value(VG_(indexXA)(xt->data, xecu));
495 
496       // CALLED_FLF gets the Dir+Filename/Line number/Function name for ips[n]
497       // in the variables called_filename/called_linenum/called_fnname.
498       // The booleans called_filename_new/called_fnname_new are set to True
499       // the first time the called_filename/called_fnname are encountered.
500       // The called_filename_nr/called_fnname_nr are numbers identifying
501       // the strings  called_filename/called_fnname.
502 #define CALLED_FLF(n)                                                   \
503       if ((n) < 0                                                       \
504           || !VG_(get_filename_linenum)(ips[(n)],                       \
505                                         &filename_name,                 \
506                                         &filename_dir,                  \
507                                         &called_linenum)) {             \
508          filename_name = "UnknownFile???";                              \
509          called_linenum = 0;                                            \
510       }                                                                 \
511       if ((n) < 0                                                       \
512           || !VG_(get_fnname)(ips[(n)], &called_fnname)) {              \
513          called_fnname = "UnknownFn???";                                \
514       }                                                                 \
515       {                                                                 \
516          UInt needed_size = VG_(strlen)(filename_dir) + 1               \
517             + VG_(strlen)(filename_name) + 1;                           \
518          if (filename_buf_size < needed_size) {                         \
519             filename_buf_size = needed_size;                            \
520             filename_buf = VG_(realloc)(xt->cc, filename_buf,           \
521                                         filename_buf_size);             \
522          }                                                              \
523       }                                                                 \
524       VG_(strcpy)(filename_buf, filename_dir);                          \
525       if (filename_buf[0] != '\0') {                                    \
526          VG_(strcat)(filename_buf, "/");                                \
527       }                                                                 \
528       VG_(strcat)(filename_buf, filename_name);                         \
529       called_filename_nr = VG_(allocStrDedupPA)(filename_ddpa,          \
530                                                 filename_buf,           \
531                                                 &called_filename_new);  \
532       called_filename = filename_buf;                                   \
533       called_fnname_nr = VG_(allocStrDedupPA)(fnname_ddpa,              \
534                                               called_fnname,            \
535                                               &called_fnname_new);
536 
537       /* Instead of unknown fnname ???, CALLED_FLF could use instead:
538          VG_(sprintf)(unknown_fn, "%p", (void*)ips[(n)]);
539          but that creates a lot of (useless) nodes at least for
540          valgrind self-hosting. */
541 
542       if (img) {
543          const HChar* called_filename;
544          UInt called_filename_nr;
545          Bool called_filename_new; // True the first time we see this filename.
546          const HChar* called_fnname;
547          UInt called_fnname_nr;
548          Bool called_fnname_new; // True the first time we see this fnname.
549          UInt called_linenum;
550          UInt prev_linenum;
551 
552          const Addr* ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top;
553          Int ips_idx = xe->n_ips_sel - 1;
554 
555          if (0) {
556             VG_(printf)("entry img %s\n", img);
557             VG_(pp_ExeContext)(xe->ec);
558             VG_(printf)("\n");
559          }
560          xt->add_data_fn(xt->tmp_data, VG_(indexXA)(xt->data, xecu));
561          CALLED_FLF(ips_idx);
562          for (;
563               ips_idx >= 0;
564               ips_idx--) {
565             FP_pos_str(fp, "fl", called_filename_nr,
566                        called_filename, called_filename_new);
567             FP_pos_str(fp, "fn", called_fnname_nr,
568                        called_fnname, called_fnname_new);
569             if (ips_idx == 0)
570                FP("%d %s\n", called_linenum, img);
571             else
572                FP("%d\n", called_linenum); //no self cost.
573             prev_linenum = called_linenum;
574             if (ips_idx >= 1) {
575                CALLED_FLF(ips_idx-1);
576                FP_pos_str(fp, "cfi", called_filename_nr,
577                           called_filename, called_filename_new);
578                FP_pos_str(fp, "cfn", called_fnname_nr,
579                           called_fnname, called_fnname_new);
580                called_filename_new = False;
581                called_fnname_new = False;
582                /* Giving a call count of 0 allows kcachegrind to hide the calls
583                   column. A call count of 1 means kcachegrind would show in the
584                   calls column the nr of stacktrace containing this arc, which
585                   is very confusing. So, the less bad is to give a 0 call
586                   count. */
587                FP("calls=0 %d\n", called_linenum);
588                FP("%d %s\n", prev_linenum, img);
589             }
590          }
591          FP("\n");
592       }
593    }
594    /* callgrind format is not really fully supporting (yet?) execution trees:
595       in an execution tree, self and inclusive costs are identical, and
596       cannot be added together.
597       If no totals: line is given, callgrind_annotate calculates the addition
598       of all costs, and so gives a wrong totals.
599       Giving a totals: line solves this, but the user must give the option
600       --inclusive=yes (kind of hack) to have all the functions given
601       in the output file. */
602    FP("totals: %s\n", img_value(xt->tmp_data));
603    VG_(fclose)(fp);
604    VG_(deleteDedupPA)(fnname_ddpa);
605    VG_(deleteDedupPA)(filename_ddpa);
606    VG_(free)(filename_buf);
607 }
608 
609 
610 /* ----------- Massif output ---------------------------------------------- */
611 
612 /* For Massif output, some functions from the execontext are not output, a.o.
613    the allocation functions at the top of the stack and the functions below
614    main. So, the StackTrace of the execontexts in the xtree must be filtered.
615    Ms_Ec defines the subset of the stacktrace relevant for the report. */
616 typedef
617    struct {
618       StackTrace ips; // ips and n_ips provides the subset of the xtree ec
619       UInt n_ips;     // to use for a massif report.
620 
621       SizeT report_value; // The value to report for this stack trace.
622    } Ms_Ec;
623 
624 /* Ms_Group defines (at a certain depth) a group of ec context that
625    have the same IPs at the given depth, and have the same 'parent'.
626    total is the sum of the values of all group elements.
627    A Ms_Group can also represent a set of ec contexts that do not
628    have the same IP, but that have each a total which is below the
629    significant size. Such a group has a NULL ms_ec, a zero group_io.
630    n_ec is the nr of insignificant ec that have been collected inside this
631    insignificant group, and total is the sum of all non significant ec
632    at the given depth. */
633 typedef
634    struct {
635       Ms_Ec* ms_ec; // The group consists in ms_ec[0 .. n_ec-1]
636       Addr group_ip;
637       UInt n_ec;
638       SizeT total;
639    } Ms_Group;
640 
641 /* Compare 2 groups by total, to have bigger total first. */
ms_group_revcmp_total(const void * vleft,const void * vright)642 static Int ms_group_revcmp_total(const void* vleft, const void* vright)
643 {
644    const Ms_Group* left = (const Ms_Group*)vleft;
645    const Ms_Group* right = (const Ms_Group*)vright;
646 
647    // First reverse compare total
648    if (left->total > right->total) return -1;
649    if (left->total < right->total) return  1;
650 
651    /* Equal size => compare IPs.
652       This (somewhat?) helps to have deterministic test results.
653       If this would change between platforms, then we should compare
654       function names/filename/linenr */
655    if (left->group_ip < right->group_ip) return -1;
656    if (left->group_ip > right->group_ip) return  1;
657    return 0;
658 }
659 
660 /* Scan the addresses in ms_ec at the given depth.
661    On return,
662       *groups points to an array of Ms_Group sorted by total.
663       *n_groups is the nr of groups
664    The caller is responsible to free the allocated group array. */
ms_make_groups(UInt depth,Ms_Ec * ms_ec,UInt n_ec,SizeT sig_sz,UInt * n_groups,Ms_Group ** groups)665 static void ms_make_groups (UInt depth, Ms_Ec* ms_ec, UInt n_ec, SizeT sig_sz,
666                             UInt* n_groups, Ms_Group** groups)
667 {
668    UInt i, g;
669    Addr cur_group_ip = 0;
670 
671    *n_groups = 0;
672 
673    /* Handle special case somewhat more efficiently */
674    if (n_ec == 0) {
675       *groups = NULL;
676       return;
677    }
678 
679    /* Compute how many groups we have. */
680    for (i = 0; i < n_ec; i++) {
681       if (ms_ec[i].n_ips > depth
682           && (*n_groups == 0 || cur_group_ip != ms_ec[i].ips[depth])) {
683          (*n_groups)++;
684          cur_group_ip = ms_ec[i].ips[depth];
685       }
686    }
687 
688    /* make the group array. */
689    *groups = VG_(malloc)("ms_make_groups", *n_groups * sizeof(Ms_Group));
690    i = 0;
691    for (g = 0; g < *n_groups; g++) {
692       while (ms_ec[i].n_ips <= depth)
693          i++;
694       cur_group_ip = ms_ec[i].ips[depth];
695       (*groups)[g].group_ip = cur_group_ip;
696       (*groups)[g].ms_ec = &ms_ec[i];
697       (*groups)[g].n_ec = 1;
698       (*groups)[g].total = ms_ec[i].report_value;
699       i++;
700       while (i < n_ec
701              && ms_ec[i].n_ips > depth
702              && cur_group_ip == ms_ec[i].ips[depth]) {
703          (*groups)[g].total += ms_ec[i].report_value;
704          i++;
705          (*groups)[g].n_ec++;
706       }
707    }
708 
709    /* Search for insignificant groups, collect them all together
710       in the first insignificant group, and compact the group array. */
711    {
712       UInt insig1; // Position of first insignificant group.
713       UInt n_insig = 0; // Nr of insignificant groups found.
714 
715       for (g = 0; g < *n_groups; g++) {
716          if ((*groups)[g].total < sig_sz) {
717             if (n_insig == 0) {
718                // First insig group => transform it into the special group
719                (*groups)[g].ms_ec = NULL;
720                (*groups)[g].group_ip = 0;
721                (*groups)[g].n_ec = 0;
722                // start the sum of insig total as total
723                insig1 = g;
724             } else {
725                // Add this insig group total into insig1 first group
726                (*groups)[insig1].total += (*groups)[g].total;
727             }
728             n_insig++;
729          } else {
730             if (n_insig > 1)
731                (*groups)[g - n_insig + 1] = (*groups)[g];
732          }
733       }
734       if (n_insig > 0) {
735          (*groups)[insig1].n_ec = n_insig;
736          *n_groups -= n_insig - 1;
737       }
738       DMSG(1, "depth %u n_groups %u n_insig %u\n", depth, *n_groups, n_insig);
739    }
740 
741    /* Sort on total size, bigger size first. */
742    VG_(ssort)(*groups, *n_groups, sizeof(Ms_Group), ms_group_revcmp_total);
743 }
744 
ms_output_group(VgFile * fp,UInt depth,Ms_Group * group,SizeT sig_sz,double sig_pct_threshold)745 static void ms_output_group (VgFile* fp, UInt depth, Ms_Group* group,
746                              SizeT sig_sz, double sig_pct_threshold)
747 {
748    UInt i;
749    Ms_Group* groups;
750    UInt n_groups;
751 
752    // If this is an insignificant group, handle it specially
753    if (group->ms_ec == NULL) {
754       const HChar* s = ( 1 ==  group->n_ec? "," : "s, all" );
755       vg_assert(group->group_ip == 0);
756       FP("%*sn0: %lu in %d place%s below massif's threshold (%.2f%%)\n",
757          depth+1, "", group->total, group->n_ec, s, sig_pct_threshold);
758       return;
759    }
760 
761    // Normal group => output the group and its subgroups.
762    ms_make_groups(depth+1, group->ms_ec, group->n_ec, sig_sz,
763                   &n_groups, &groups);
764 
765    FP("%*s" "n%u: %ld %s\n",
766       depth + 1, "",
767       n_groups,
768       group->total,
769       VG_(describe_IP)(group->ms_ec->ips[depth] - 1, NULL));
770    /* XTREE??? Massif original code removes 1 to get the IP description. I am
771       wondering if this is not something that predates revision r8818,
772       which introduced a -1 in the stack unwind (see m_stacktrace.c)
773       Kept for the moment to allow exact comparison with massif output, but
774       probably we should remove this, as we very probably end up 2 bytes before
775       the RA Return Address. */
776 
777    /* Output sub groups of this group. */
778    for (i = 0; i < n_groups; i++)
779       ms_output_group(fp, depth+1, &groups[i], sig_sz, sig_pct_threshold);
780 
781    VG_(free)(groups);
782 }
783 
784 /* Allocate and build an array of Ms_Ec sorted by addresses in the
785    Ms_Ec StackTrace. */
prepare_ms_ec(XTree * xt,ULong (* report_value)(const void * value),ULong * top_total,Ms_Ec ** vms_ec,UInt * vn_ec)786 static void prepare_ms_ec (XTree* xt,
787                            ULong (*report_value)(const void* value),
788                            ULong* top_total, Ms_Ec** vms_ec, UInt* vn_ec)
789 {
790    XT_shared* shared = xt->shared;
791    const UInt n_xecu = VG_(sizeXA)(shared->xec);
792    const UInt n_data_xecu = VG_(sizeXA)(xt->data);
793    Ms_Ec* ms_ec = VG_(malloc)("XT_massif_print.ms_ec", n_xecu * sizeof(Ms_Ec));
794    UInt n_xecu_sel = 0; // Nr of xecu that are selected for output.
795 
796    vg_assert(n_data_xecu <= n_xecu);
797 
798    // Ensure we have in shared->ips_order_xecu our xecu sorted by StackTrace.
799    ensure_ips_order_xecu_valid(shared);
800 
801    *top_total = 0;
802    DMSG(1, "iteration %u\n", n_xecu);
803    for (UInt i = 0; i < n_xecu; i++) {
804       Xecu xecu = *(Xecu*)VG_(indexXA)(shared->ips_order_xecu, i);
805       xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
806 
807       if (xecu >= n_data_xecu)
808          continue; // No data for this xecu in xt->data.
809       ms_ec[n_xecu_sel].n_ips = xe->n_ips_sel;
810       if (ms_ec[n_xecu_sel].n_ips == 0)
811          continue;
812 
813       ms_ec[n_xecu_sel].ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top;
814       ms_ec[n_xecu_sel].report_value
815          = (*report_value)(VG_(indexXA)(xt->data, xecu));
816       *top_total += ms_ec[n_xecu_sel].report_value;
817 
818       n_xecu_sel++;
819    }
820    vg_assert(n_xecu_sel <= n_xecu);
821 
822    *vms_ec = ms_ec;
823    *vn_ec = n_xecu_sel;
824 }
825 
VG_(XT_massif_open)826 MsFile* VG_(XT_massif_open)
827      (const HChar* outfilename,
828       const HChar* desc,
829       const XArray* desc_args,
830       const HChar* time_unit)
831 {
832    UInt i;
833    VgFile* fp = xt_open(outfilename);
834 
835    if (fp == NULL)
836       return NULL; // xt_open reported the error.
837 
838    /* ------ file header ------------------------------- */
839    FP("desc:");
840    if (desc)
841       FP(" %s", desc);
842    i = 0;
843    if (desc_args) {
844       for (i = 0; i < VG_(sizeXA)(desc_args); i++) {
845          HChar* arg = *(HChar**)VG_(indexXA)(desc_args, i);
846          FP(" %s", arg);
847       }
848    }
849    if (0 == i && desc == NULL) FP(" (none)");
850    FP("\n");
851 
852    FP_cmd(fp);
853 
854    FP("time_unit: %s\n", time_unit);
855 
856    return fp;
857 }
858 
VG_(XT_massif_close)859 void VG_(XT_massif_close)(MsFile* fp)
860 {
861    if (fp == NULL)
862       return; // Error should have been reported by  VG_(XT_massif_open)
863 
864    VG_(fclose)(fp);
865 }
866 
VG_(XT_massif_print)867 void VG_(XT_massif_print)
868      (MsFile* fp,
869       XTree* xt,
870       const Massif_Header* header,
871       ULong (*report_value)(const void* value))
872 {
873    UInt i;
874 
875    if (fp == NULL)
876       return; // Normally  VG_(XT_massif_open) already reported an error.
877 
878    /* Compute/prepare Snapshot totals/data/... */
879    ULong top_total;
880 
881    /* Following variables only used for detailed snapshot. */
882    UInt n_ec = 0;
883    Ms_Ec* ms_ec = NULL;
884    const HChar* kind =
885       header->detailed ? (header->peak ? "peak" : "detailed") : "empty";
886 
887    DMSG(1, "XT_massif_print %s\n", kind);
888    if (header->detailed) {
889       /* Prepare the Ms_Ec sorted array of stacktraces and the groups
890          at level 0. */
891       prepare_ms_ec(xt, report_value, &top_total, &ms_ec, &n_ec);
892       DMSG(1, "XT_print_massif ms_ec n_ec %u\n", n_ec);
893    } else if (xt == NULL) {
894       /* Non detailed, no xt => use the sz provided in the header. */
895       top_total = header->sz_B;
896    } else {
897       /* For non detailed snapshot, compute total directly from the xec. */
898       const XT_shared* shared = xt->shared;
899       const UInt n_xecu = VG_(sizeXA)(xt->data);
900       top_total = 0;
901 
902       for (UInt xecu = 0; xecu < n_xecu; xecu++) {
903          xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
904          if (xe->n_ips_sel == 0)
905             continue;
906          top_total += (*report_value)(VG_(indexXA)(xt->data, xecu));
907       }
908    }
909 
910    /* ------ snapshot header --------------------------- */
911    FP("#-----------\n");
912    FP("snapshot=%d\n", header->snapshot_n);
913    FP("#-----------\n");
914    FP("time=%lld\n", header->time);
915 
916    FP("mem_heap_B=%llu\n", top_total); // without extra_B and without stacks_B
917    FP("mem_heap_extra_B=%llu\n", header->extra_B);
918    FP("mem_stacks_B=%llu\n", header->stacks_B);
919    FP("heap_tree=%s\n", kind);
920 
921    /* ------ detailed snapshot data ----------------------------- */
922    if (header->detailed) {
923       UInt n_groups;
924       Ms_Group* groups;
925 
926       ULong sig_sz;
927       // Work out how big a child must be to be significant.  If the current
928       // top_total is zero, then we set it to 1, which means everything will be
929       // judged insignificant -- this is sensible, as there's no point showing
930       // any detail for this case.  Unless they used threshold=0, in which
931       // case we show them everything because that's what they asked for.
932       //
933       // Nb: We do this once now, rather than once per child, because if we do
934       // that the cost of all the divisions adds up to something significant.
935       if (0 == top_total && 0 != header->sig_threshold)
936          sig_sz = 1;
937       else
938          sig_sz = ((top_total + header->extra_B + header->stacks_B)
939                    * header->sig_threshold) / 100;
940 
941       /* Produce the groups at depth 0 */
942       DMSG(1, "XT_massif_print producing depth 0 groups\n");
943       ms_make_groups(0, ms_ec, n_ec, sig_sz, &n_groups, &groups);
944 
945       /* Output the top node. */
946       FP("n%u: %llu %s\n", n_groups, top_total, header->top_node_desc);
947 
948       /* Output depth 0 groups. */
949       DMSG(1, "XT_massif_print outputing %u depth 0 groups\n", n_groups);
950       for (i = 0; i < n_groups; i++)
951          ms_output_group(fp, 0, &groups[i], sig_sz, header->sig_threshold);
952 
953       VG_(free)(groups);
954       VG_(free)(ms_ec);
955    }
956 }
957 
VG_(XT_offset_main_or_below_main)958 Int VG_(XT_offset_main_or_below_main)(Addr* ips, Int n_ips)
959 {
960    /* Search for main or below main function.
961       To limit the nr of ips to examine, we maintain the deepest
962       offset where main was found, and we first search main
963       from there.
964       If no main is found, we will then do a search for main or
965       below main function till the top. */
966    static Int deepest_main = 0;
967    Vg_FnNameKind kind = Vg_FnNameNormal;
968    Int mbm = n_ips - 1; // Position of deepest main or below main.
969    Vg_FnNameKind mbmkind = Vg_FnNameNormal;
970    Int i;
971 
972    for (i = n_ips - 1 - deepest_main;
973         i < n_ips;
974         i++) {
975       mbmkind = VG_(get_fnname_kind_from_IP)(ips[i]);
976       if (mbmkind != Vg_FnNameNormal) {
977          mbm = i;
978          break;
979       }
980    }
981 
982    /* Search for main or below main function till top. */
983    for (i = mbm - 1;
984         i >= 0 && mbmkind != Vg_FnNameMain;
985         i--) {
986       kind = VG_(get_fnname_kind_from_IP)(ips[i]);
987       if (kind != Vg_FnNameNormal) {
988          mbm = i;
989          mbmkind = kind;
990       }
991    }
992    if (Vg_FnNameMain == mbmkind || Vg_FnNameBelowMain == mbmkind) {
993       if (mbmkind == Vg_FnNameMain && (n_ips - 1 - mbm) > deepest_main)
994          deepest_main = n_ips - 1 - mbm;
995       return mbm;
996    } else
997       return n_ips-1;
998 }
999 
VG_(XT_filter_1top_and_maybe_below_main)1000 void VG_(XT_filter_1top_and_maybe_below_main)
1001      (Addr* ips, Int n_ips,
1002       UInt* top, UInt* n_ips_sel)
1003 {
1004    Int mbm;
1005 
1006    *n_ips_sel = n_ips;
1007    if (n_ips == 0) {
1008       *top = 0;
1009       return;
1010    }
1011 
1012    /* Filter top function. */
1013    *top = 1;
1014 
1015    if (VG_(clo_show_below_main))
1016       mbm = n_ips - 1;
1017    else
1018       mbm = VG_(XT_offset_main_or_below_main)(ips, n_ips);
1019 
1020    *n_ips_sel = mbm - *top + 1;
1021 }
1022 
VG_(XT_filter_maybe_below_main)1023 void VG_(XT_filter_maybe_below_main)
1024      (Addr* ips, Int n_ips,
1025       UInt* top, UInt* n_ips_sel)
1026 {
1027    Int mbm;
1028 
1029    *n_ips_sel = n_ips;
1030    *top = 0;
1031    if (n_ips == 0)
1032       return;
1033 
1034    if (VG_(clo_show_below_main))
1035       mbm = n_ips - 1;
1036    else
1037       mbm = VG_(XT_offset_main_or_below_main)(ips, n_ips);
1038 
1039    *n_ips_sel = mbm - *top + 1;
1040 }
1041 
1042 /*--------------------------------------------------------------------*/
1043 /*--- end                                                m_xtree.c ---*/
1044 /*--------------------------------------------------------------------*/
1045