1
2 /*--------------------------------------------------------------------*/
3 /*--- An xtree, tree of stacktraces with data m_xtree.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2016-2017 Philippe Waroquiers
11
12 This code generalises the XTree idea that was implemented in
13 the massif tool in Valgrind versions <= 3.12, which is
14 Copyright (C) 2005-2017 Nicholas Nethercote
15 njn@valgrind.org
16
17 The XTree implementation in this file is however implemented completely
18 differently. Some code has been re-used for the production of the
19 massif file header (e.g. FP_cmd function).
20
21 This program is free software; you can redistribute it and/or
22 modify it under the terms of the GNU General Public License as
23 published by the Free Software Foundation; either version 2 of the
24 License, or (at your option) any later version.
25
26 This program is distributed in the hope that it will be useful, but
27 WITHOUT ANY WARRANTY; without even the implied warranty of
28 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29 General Public License for more details.
30
31 You should have received a copy of the GNU General Public License
32 along with this program; if not, write to the Free Software
33 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
34 02111-1307, USA.
35
36 The GNU General Public License is contained in the file COPYING.
37 */
38
39 #include "pub_core_basics.h"
40 #include "pub_core_debuglog.h"
41 #include "pub_core_clientstate.h"
42 #include "pub_core_stacktrace.h"
43 #include "pub_core_execontext.h"
44 #include "pub_core_libcbase.h"
45 #include "pub_core_libcassert.h"
46 #include "pub_core_libcfile.h"
47 #include "pub_core_libcprint.h"
48 #include "pub_core_libcproc.h"
49 #include "pub_core_hashtable.h"
50 #include "pub_core_mallocfree.h"
51 #include "pub_core_options.h"
52 #include "pub_core_debuginfo.h"
53 #include "pub_core_deduppoolalloc.h"
54 #include "pub_core_xtree.h" /* self */
55
56 #define DMSG(level, ...) (level <= VG_(debugLog_getLevel)() ? \
57 VG_(dmsg)(__VA_ARGS__) \
58 : 0)
59
60 /* Defines the relevant part of an ec. This is shared between an xt
61 and its snapshots (see XT_shared XArray of xec). */
62 typedef
63 struct _xec {
64 ExeContext* ec;
65 UShort top; // The first ips of ec to take into account.
66 UShort n_ips_sel; // The nr of ips from top to take into account.
67 }
68 xec;
69
70 /* XT_shared maintains the information shared between an XT and all
71 its snapshots. */
72 typedef
73 struct _XT_shared {
74 UWord nrRef; /* nr of XTrees referencing this shared memory. */
75
76 Alloc_Fn_t alloc_fn; /* alloc fn (nofail) */
77 const HChar* cc; /* cost centre for alloc */
78 Free_Fn_t free_fn; /* free fn */
79
80 /* The data associated to each ec is stored in 2 arrays:
81 an xec array, shared between an xt and all its snapshots.
82 a data array, private to each XTree.
83 For an ec with an ECU ecu, d4ecu2xecu[ecu/4] gives the offset in
84 xec and data arrays where the ec information is located (this
85 indirection is used to avoid huge xec and data arrays, in
86 case an XTree contains data only for a small number of ec.
87 The offset in the xec and data array is used as xtree ec unique
88 id i.e. an xecu. */
89
90 UInt d4ecu2xecu_sz; /* size of d4ecu2xecu (in nr of elements). */
91 UInt* d4ecu2xecu;
92
93 /* ec information common to an xt and its snapshots. */
94 XArray* xec; /* XArray of xec, indexed by xecu (== d4ecu2xecu[ecu/4]). */
95
96 /* XArray of xecu, sorted by StackTrace ips[top..top+n_ips_sel-1].
97 See ips_order_cmp. */
98 XArray* ips_order_xecu;
99 } XT_shared;
100
101 /* NO_OFFSET indicates in d4ecu2xecu there is no data (yet) for this ec
102 (with the index ecu/4). */
103 #define NO_OFFSET 0xffffffff
104
new_XT_shared(Alloc_Fn_t alloc_fn,const HChar * cc,void (* free_fn)(void *))105 static XT_shared* new_XT_shared (Alloc_Fn_t alloc_fn,
106 const HChar* cc,
107 void (*free_fn)(void*))
108 {
109 XT_shared* shared;
110
111 vg_assert(alloc_fn);
112 vg_assert(cc);
113 vg_assert(free_fn);
114 shared = alloc_fn(cc, sizeof(*shared));
115 shared->nrRef = 0;
116 shared->alloc_fn = alloc_fn;
117 shared->cc = cc;
118 shared->free_fn = free_fn;
119
120 shared->d4ecu2xecu_sz = 0;
121 shared->d4ecu2xecu = NULL;
122 shared->xec = VG_(newXA)(alloc_fn, cc, free_fn, sizeof(xec));
123 shared->ips_order_xecu = NULL; // Allocated when needed.
124
125 return shared;
126 }
127
delete_XT_shared(XT_shared * shared)128 static void delete_XT_shared (XT_shared* shared)
129 {
130 vg_assert(shared->nrRef == 0);
131 shared->free_fn(shared->d4ecu2xecu);
132 VG_(deleteXA)(shared->xec);
133 if (shared->ips_order_xecu != NULL)
134 VG_(deleteXA)(shared->ips_order_xecu);
135 shared->free_fn(shared);
136 }
137
138 /* Compare 2 entries in ips_order_xecu by StackTrace elements.
139 In case stack traces are of different length, an 'absent' ips is
140 considered smaller than any other address. */
141 static XArray* xec_data_for_sort; // Needed to translate an xecu into an xec
ips_order_cmp(const void * vleft,const void * vright)142 static Int ips_order_cmp(const void* vleft, const void* vright)
143 {
144 const Xecu left_xecu = *(const Xecu*)vleft;
145 const Xecu right_xecu = *(const Xecu*)vright;
146 const xec* left = VG_(indexXA)(xec_data_for_sort, left_xecu);
147 const xec* right = VG_(indexXA)(xec_data_for_sort, right_xecu);
148 const StackTrace left_ips = VG_(get_ExeContext_StackTrace)(left->ec)
149 + left->top;
150 const StackTrace right_ips = VG_(get_ExeContext_StackTrace)(right->ec)
151 + right->top;
152 UInt i;
153
154 const UInt c_n_ips_sel = left->n_ips_sel < right->n_ips_sel
155 ? left->n_ips_sel : right->n_ips_sel;
156
157 // First see if we have a difference on the common nr of ips selected
158 for (i = 0; i < c_n_ips_sel; i++) {
159 if (left_ips[i] == right_ips[i]) continue;
160 if (left_ips[i] < right_ips[i]) return -1;
161 return 1;
162 }
163 // Common part is equal => compare lengths.
164 if (left->n_ips_sel < right->n_ips_sel) return -1;
165 if (left->n_ips_sel > right->n_ips_sel) return 1;
166 return 0;
167 }
168
169 // If needed, build or refresh shared->ips_order_xecu
ensure_ips_order_xecu_valid(XT_shared * shared)170 static void ensure_ips_order_xecu_valid(XT_shared* shared)
171 {
172 UInt i;
173 UInt n_xecu;
174
175 if (shared->ips_order_xecu == NULL) {
176 shared->ips_order_xecu = VG_(newXA)(shared->alloc_fn, shared->cc,
177 shared->free_fn, sizeof(Xecu));
178 VG_(hintSizeXA)(shared->ips_order_xecu, VG_(sizeXA)(shared->xec));
179 VG_(setCmpFnXA)(shared->ips_order_xecu, ips_order_cmp);
180 }
181
182 if (VG_(sizeXA)(shared->xec) == VG_(sizeXA)(shared->ips_order_xecu))
183 return;
184
185 n_xecu = VG_(sizeXA)(shared->xec);
186 for (i = VG_(sizeXA)(shared->ips_order_xecu); i < n_xecu; i++)
187 VG_(addToXA)(shared->ips_order_xecu, &i);
188
189 xec_data_for_sort = shared->xec;
190 VG_(sortXA)(shared->ips_order_xecu);
191 }
192
addRef_XT_shared(XT_shared * shared)193 static void addRef_XT_shared (XT_shared* shared)
194 {
195 shared->nrRef++;
196 }
197
release_XT_shared(XT_shared * shared)198 static UWord release_XT_shared(XT_shared* shared)
199 {
200 UWord nrRef;
201
202 vg_assert(shared->nrRef > 0);
203 nrRef = --shared->nrRef;
204 if (nrRef == 0)
205 delete_XT_shared(shared);
206 return nrRef;
207 }
208
209
210 struct _XTree {
211 Alloc_Fn_t alloc_fn; /* alloc fn (nofail) */
212 const HChar* cc; /* cost centre for alloc */
213 Free_Fn_t free_fn; /* free fn */
214 Word dataSzB; /* data size in bytes */
215 XT_init_data_t init_data_fn;
216 XT_add_data_t add_data_fn;
217 XT_sub_data_t sub_data_fn;
218 XT_filter_IPs_t filter_IPs_fn;
219
220 XT_shared* shared;
221
222 HChar* tmp_data; /* temporary buffer, to insert new elements. */
223 XArray* data; /* of elements of size dataSzB */
224 };
225
226
VG_(XT_create)227 XTree* VG_(XT_create) ( Alloc_Fn_t alloc_fn,
228 const HChar* cc,
229 Free_Fn_t free_fn,
230 Word dataSzB,
231 XT_init_data_t init_data_fn,
232 XT_add_data_t add_data_fn,
233 XT_sub_data_t sub_data_fn,
234 XT_filter_IPs_t filter_IPs_fn)
235 {
236 XTree* xt;
237
238 /* check user-supplied info .. */
239 vg_assert(alloc_fn);
240 vg_assert(free_fn);
241 vg_assert(dataSzB >= 0);
242 vg_assert(init_data_fn);
243 vg_assert(add_data_fn);
244 vg_assert(sub_data_fn);
245
246 xt = alloc_fn(cc, sizeof(struct _XTree) );
247 xt->alloc_fn = alloc_fn;
248 xt->cc = cc;
249 xt->free_fn = free_fn;
250 xt->dataSzB = dataSzB;
251 xt->init_data_fn = init_data_fn;
252 xt->add_data_fn = add_data_fn;
253 xt->sub_data_fn = sub_data_fn;
254 xt->filter_IPs_fn = filter_IPs_fn;
255
256 xt->shared = new_XT_shared(alloc_fn, cc, free_fn);
257 addRef_XT_shared(xt->shared);
258 xt->tmp_data = alloc_fn(cc, xt->dataSzB);
259 xt->data = VG_(newXA)(alloc_fn, cc, free_fn, dataSzB);
260
261 return xt;
262 }
263
VG_(XT_snapshot)264 XTree* VG_(XT_snapshot)(XTree* xt)
265 {
266 XTree* nxt;
267
268 vg_assert(xt);
269
270 nxt = xt->alloc_fn(xt->cc, sizeof(struct _XTree) );
271
272 *nxt = *xt;
273 addRef_XT_shared(nxt->shared);
274 nxt->tmp_data = nxt->alloc_fn(nxt->cc, nxt->dataSzB);
275 nxt->data = VG_(cloneXA)(nxt->cc, xt->data);
276
277 return nxt;
278 }
279
VG_(XT_delete)280 void VG_(XT_delete) ( XTree* xt )
281 {
282 vg_assert(xt);
283
284 release_XT_shared(xt->shared);
285 xt->free_fn(xt->tmp_data);
286 VG_(deleteXA)(xt->data);
287 xt->free_fn(xt);
288 }
289
find_or_insert(XTree * xt,ExeContext * ec)290 static Xecu find_or_insert (XTree* xt, ExeContext* ec)
291 {
292
293 const UInt d4ecu = VG_(get_ECU_from_ExeContext)(ec) / 4;
294 XT_shared* shared = xt->shared;
295
296 /* First grow the d4ecu2xecu array if needed. */
297 if (d4ecu >= shared->d4ecu2xecu_sz) {
298 UInt old_sz = shared->d4ecu2xecu_sz;
299 UInt new_sz = (3 * d4ecu) / 2;
300
301 if (new_sz < 1000)
302 new_sz = 1000;
303 shared->d4ecu2xecu = VG_(realloc)(xt->cc, shared->d4ecu2xecu,
304 new_sz * sizeof(UInt));
305 shared->d4ecu2xecu_sz = new_sz;
306 for (UInt i = old_sz; i < new_sz; i++)
307 shared->d4ecu2xecu[i] = NO_OFFSET;
308 }
309
310 if (shared->d4ecu2xecu[d4ecu] == NO_OFFSET) {
311 xec xe;
312
313 xe.ec = ec;
314 if (xt->filter_IPs_fn == NULL) {
315 xe.top = 0;
316 xe.n_ips_sel = (UShort)VG_(get_ExeContext_n_ips)(xe.ec);
317 } else {
318 UInt top;
319 UInt n_ips_sel = VG_(get_ExeContext_n_ips)(xe.ec);
320 xt->filter_IPs_fn(VG_(get_ExeContext_StackTrace)(xe.ec), n_ips_sel,
321 &top, &n_ips_sel);
322 xe.top = (UShort)top;
323 xe.n_ips_sel = (UShort)n_ips_sel;
324 }
325 xt->init_data_fn(xt->tmp_data);
326 VG_(addToXA)(shared->xec, &xe);
327 shared->d4ecu2xecu[d4ecu] = (UInt)VG_(addToXA)(xt->data, xt->tmp_data);
328 }
329
330 return shared->d4ecu2xecu[d4ecu];
331 }
332
VG_(XT_add_to_ec)333 Xecu VG_(XT_add_to_ec) (XTree* xt, ExeContext* ec, const void* value)
334 {
335 Xecu xecu = find_or_insert(xt, ec);
336 void* data = VG_(indexXA)(xt->data, xecu);
337
338 xt->add_data_fn(data, value);
339 return xecu;
340 }
341
VG_(XT_sub_from_ec)342 Xecu VG_(XT_sub_from_ec) (XTree* xt, ExeContext* ec, const void* value)
343 {
344 Xecu xecu = find_or_insert(xt, ec);
345 void* data = VG_(indexXA)(xt->data, xecu);
346
347 xt->sub_data_fn(data, value);
348 return xecu;
349 }
350
VG_(XT_add_to_xecu)351 void VG_(XT_add_to_xecu) (XTree* xt, Xecu xecu, const void* value)
352 {
353 void* data = VG_(indexXA)(xt->data, xecu);
354 xt->add_data_fn(data, value);
355 }
356
VG_(XT_sub_from_xecu)357 void VG_(XT_sub_from_xecu) (XTree* xt, Xecu xecu, const void* value)
358 {
359 void* data = VG_(indexXA)(xt->data, xecu);
360 xt->sub_data_fn(data, value);
361 }
362
VG_(XT_n_ips_sel)363 UInt VG_(XT_n_ips_sel) (XTree* xt, Xecu xecu)
364 {
365 xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu);
366 return (UInt)xe->n_ips_sel;
367 }
368
VG_(XT_get_ec_from_xecu)369 ExeContext* VG_(XT_get_ec_from_xecu) (XTree* xt, Xecu xecu)
370 {
371 xec* xe = (xec*)VG_(indexXA)(xt->shared->xec, xecu);
372 return xe->ec;
373 }
374
xt_open(const HChar * outfilename)375 static VgFile* xt_open (const HChar* outfilename)
376 {
377 VgFile* fp;
378
379 fp = VG_(fopen)(outfilename, VKI_O_CREAT|VKI_O_WRONLY|VKI_O_TRUNC,
380 VKI_S_IRUSR|VKI_S_IWUSR);
381 if (fp == NULL) {
382 VG_(message)(Vg_UserMsg,
383 "Error: can not open xtree output file `%s'\n",
384 outfilename);
385 }
386 return fp;
387 }
388
389 #define FP(format, args...) ({ VG_(fprintf)(fp, format, ##args); })
390
391 // Print "cmd:" line.
FP_cmd(VgFile * fp)392 static void FP_cmd(VgFile* fp)
393 {
394 UInt i;
395
396 FP("cmd: ");
397 FP("%s", VG_(args_the_exename));
398 for (i = 0; i < VG_(sizeXA)(VG_(args_for_client)); i++) {
399 HChar* arg = * (HChar**) VG_(indexXA)(VG_(args_for_client), i);
400 FP(" %s", arg);
401 }
402 FP("\n");
403 }
404
405 /* ----------- Callgrind output ------------------------------------------- */
406
407 /* Output a callgrind format element in compressed format:
408 "name=(pos)" or "name=(pos) value" (if value_new)
409 or not compressed format: "name=value"
410 VG_(clo_xtree_compress_strings) indicates if the compressed format is used.
411 name is the format element (e.g. fl, fn, cfi, cfn, ...).
412 pos is the value dictionary position, used for compressed format.
413 value_new is True if this is the first usage of value. */
FP_pos_str(VgFile * fp,const HChar * name,UInt pos,const HChar * value,Bool value_new)414 static void FP_pos_str(VgFile* fp, const HChar* name, UInt pos,
415 const HChar* value, Bool value_new)
416 {
417 if (!VG_(clo_xtree_compress_strings))
418 FP("%s=%s\n", name, value);
419 else if (value_new)
420 FP("%s=(%d) %s\n", name, pos, value);
421 else
422 FP("%s=(%d)\n", name, pos);
423 }
424
VG_(XT_callgrind_print)425 void VG_(XT_callgrind_print)
426 (XTree* xt,
427 const HChar* outfilename,
428 const HChar* events,
429 const HChar* (*img_value)(const void* value))
430 {
431 UInt n_xecu;
432 XT_shared* shared = xt->shared;
433 VgFile* fp = xt_open(outfilename);
434 DedupPoolAlloc* fnname_ddpa;
435 DedupPoolAlloc* filename_ddpa;
436 HChar* filename_buf = NULL;
437 UInt filename_buf_size = 0;
438 const HChar* filename_dir;
439 const HChar* filename_name;
440
441 if (fp == NULL)
442 return;
443
444 fnname_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn,
445 "XT_callgrind_print.fn", xt->free_fn);
446 filename_ddpa = VG_(newDedupPA)(16000, 1, xt->alloc_fn,
447 "XT_callgrind_print.fl", xt->free_fn);
448
449 FP("# callgrind format\n");
450 FP("version: 1\n");
451 FP("creator: xtree-1\n");
452 FP("pid: %d\n", VG_(getpid)());
453 FP_cmd(fp);
454
455 /* Currently, we only need/support line positions. */
456 FP("\npositions:%s\n", " line");
457
458 /* Produce one "event:" line for each event, and the "events:" line. */
459 {
460 HChar strtok_events[VG_(strlen)(events)+1];
461 HChar* e;
462 HChar* ssaveptr;
463 HChar* p;
464
465 VG_(strcpy)(strtok_events, events);
466 for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr);
467 e != NULL;
468 e = VG_(strtok_r)(NULL, ",", &ssaveptr))
469 FP("event: %s\n", e);
470 FP("events:");
471 VG_(strcpy)(strtok_events, events);
472 for (e = VG_(strtok_r)(strtok_events, ",", &ssaveptr);
473 e != NULL;
474 e = VG_(strtok_r)(NULL, ",", &ssaveptr)) {
475 p = e;
476 while (*p) {
477 if (*p == ':')
478 *p = 0;
479 p++;
480 }
481 FP(" %s", e);
482 }
483 FP("\n");
484 }
485 xt->init_data_fn(xt->tmp_data); // to compute totals
486
487 n_xecu = VG_(sizeXA)(xt->data);
488 vg_assert (n_xecu <= VG_(sizeXA)(shared->xec));
489 for (Xecu xecu = 0; xecu < n_xecu; xecu++) {
490 xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
491 if (xe->n_ips_sel == 0)
492 continue;
493
494 const HChar* img = img_value(VG_(indexXA)(xt->data, xecu));
495
496 // CALLED_FLF gets the Dir+Filename/Line number/Function name for ips[n]
497 // in the variables called_filename/called_linenum/called_fnname.
498 // The booleans called_filename_new/called_fnname_new are set to True
499 // the first time the called_filename/called_fnname are encountered.
500 // The called_filename_nr/called_fnname_nr are numbers identifying
501 // the strings called_filename/called_fnname.
502 #define CALLED_FLF(n) \
503 if ((n) < 0 \
504 || !VG_(get_filename_linenum)(ips[(n)], \
505 &filename_name, \
506 &filename_dir, \
507 &called_linenum)) { \
508 filename_name = "UnknownFile???"; \
509 called_linenum = 0; \
510 } \
511 if ((n) < 0 \
512 || !VG_(get_fnname)(ips[(n)], &called_fnname)) { \
513 called_fnname = "UnknownFn???"; \
514 } \
515 { \
516 UInt needed_size = VG_(strlen)(filename_dir) + 1 \
517 + VG_(strlen)(filename_name) + 1; \
518 if (filename_buf_size < needed_size) { \
519 filename_buf_size = needed_size; \
520 filename_buf = VG_(realloc)(xt->cc, filename_buf, \
521 filename_buf_size); \
522 } \
523 } \
524 VG_(strcpy)(filename_buf, filename_dir); \
525 if (filename_buf[0] != '\0') { \
526 VG_(strcat)(filename_buf, "/"); \
527 } \
528 VG_(strcat)(filename_buf, filename_name); \
529 called_filename_nr = VG_(allocStrDedupPA)(filename_ddpa, \
530 filename_buf, \
531 &called_filename_new); \
532 called_filename = filename_buf; \
533 called_fnname_nr = VG_(allocStrDedupPA)(fnname_ddpa, \
534 called_fnname, \
535 &called_fnname_new);
536
537 /* Instead of unknown fnname ???, CALLED_FLF could use instead:
538 VG_(sprintf)(unknown_fn, "%p", (void*)ips[(n)]);
539 but that creates a lot of (useless) nodes at least for
540 valgrind self-hosting. */
541
542 if (img) {
543 const HChar* called_filename;
544 UInt called_filename_nr;
545 Bool called_filename_new; // True the first time we see this filename.
546 const HChar* called_fnname;
547 UInt called_fnname_nr;
548 Bool called_fnname_new; // True the first time we see this fnname.
549 UInt called_linenum;
550 UInt prev_linenum;
551
552 const Addr* ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top;
553 Int ips_idx = xe->n_ips_sel - 1;
554
555 if (0) {
556 VG_(printf)("entry img %s\n", img);
557 VG_(pp_ExeContext)(xe->ec);
558 VG_(printf)("\n");
559 }
560 xt->add_data_fn(xt->tmp_data, VG_(indexXA)(xt->data, xecu));
561 CALLED_FLF(ips_idx);
562 for (;
563 ips_idx >= 0;
564 ips_idx--) {
565 FP_pos_str(fp, "fl", called_filename_nr,
566 called_filename, called_filename_new);
567 FP_pos_str(fp, "fn", called_fnname_nr,
568 called_fnname, called_fnname_new);
569 if (ips_idx == 0)
570 FP("%d %s\n", called_linenum, img);
571 else
572 FP("%d\n", called_linenum); //no self cost.
573 prev_linenum = called_linenum;
574 if (ips_idx >= 1) {
575 CALLED_FLF(ips_idx-1);
576 FP_pos_str(fp, "cfi", called_filename_nr,
577 called_filename, called_filename_new);
578 FP_pos_str(fp, "cfn", called_fnname_nr,
579 called_fnname, called_fnname_new);
580 called_filename_new = False;
581 called_fnname_new = False;
582 /* Giving a call count of 0 allows kcachegrind to hide the calls
583 column. A call count of 1 means kcachegrind would show in the
584 calls column the nr of stacktrace containing this arc, which
585 is very confusing. So, the less bad is to give a 0 call
586 count. */
587 FP("calls=0 %d\n", called_linenum);
588 FP("%d %s\n", prev_linenum, img);
589 }
590 }
591 FP("\n");
592 }
593 }
594 /* callgrind format is not really fully supporting (yet?) execution trees:
595 in an execution tree, self and inclusive costs are identical, and
596 cannot be added together.
597 If no totals: line is given, callgrind_annotate calculates the addition
598 of all costs, and so gives a wrong totals.
599 Giving a totals: line solves this, but the user must give the option
600 --inclusive=yes (kind of hack) to have all the functions given
601 in the output file. */
602 FP("totals: %s\n", img_value(xt->tmp_data));
603 VG_(fclose)(fp);
604 VG_(deleteDedupPA)(fnname_ddpa);
605 VG_(deleteDedupPA)(filename_ddpa);
606 VG_(free)(filename_buf);
607 }
608
609
610 /* ----------- Massif output ---------------------------------------------- */
611
612 /* For Massif output, some functions from the execontext are not output, a.o.
613 the allocation functions at the top of the stack and the functions below
614 main. So, the StackTrace of the execontexts in the xtree must be filtered.
615 Ms_Ec defines the subset of the stacktrace relevant for the report. */
616 typedef
617 struct {
618 StackTrace ips; // ips and n_ips provides the subset of the xtree ec
619 UInt n_ips; // to use for a massif report.
620
621 SizeT report_value; // The value to report for this stack trace.
622 } Ms_Ec;
623
624 /* Ms_Group defines (at a certain depth) a group of ec context that
625 have the same IPs at the given depth, and have the same 'parent'.
626 total is the sum of the values of all group elements.
627 A Ms_Group can also represent a set of ec contexts that do not
628 have the same IP, but that have each a total which is below the
629 significant size. Such a group has a NULL ms_ec, a zero group_io.
630 n_ec is the nr of insignificant ec that have been collected inside this
631 insignificant group, and total is the sum of all non significant ec
632 at the given depth. */
633 typedef
634 struct {
635 Ms_Ec* ms_ec; // The group consists in ms_ec[0 .. n_ec-1]
636 Addr group_ip;
637 UInt n_ec;
638 SizeT total;
639 } Ms_Group;
640
641 /* Compare 2 groups by total, to have bigger total first. */
ms_group_revcmp_total(const void * vleft,const void * vright)642 static Int ms_group_revcmp_total(const void* vleft, const void* vright)
643 {
644 const Ms_Group* left = (const Ms_Group*)vleft;
645 const Ms_Group* right = (const Ms_Group*)vright;
646
647 // First reverse compare total
648 if (left->total > right->total) return -1;
649 if (left->total < right->total) return 1;
650
651 /* Equal size => compare IPs.
652 This (somewhat?) helps to have deterministic test results.
653 If this would change between platforms, then we should compare
654 function names/filename/linenr */
655 if (left->group_ip < right->group_ip) return -1;
656 if (left->group_ip > right->group_ip) return 1;
657 return 0;
658 }
659
660 /* Scan the addresses in ms_ec at the given depth.
661 On return,
662 *groups points to an array of Ms_Group sorted by total.
663 *n_groups is the nr of groups
664 The caller is responsible to free the allocated group array. */
ms_make_groups(UInt depth,Ms_Ec * ms_ec,UInt n_ec,SizeT sig_sz,UInt * n_groups,Ms_Group ** groups)665 static void ms_make_groups (UInt depth, Ms_Ec* ms_ec, UInt n_ec, SizeT sig_sz,
666 UInt* n_groups, Ms_Group** groups)
667 {
668 UInt i, g;
669 Addr cur_group_ip = 0;
670
671 *n_groups = 0;
672
673 /* Handle special case somewhat more efficiently */
674 if (n_ec == 0) {
675 *groups = NULL;
676 return;
677 }
678
679 /* Compute how many groups we have. */
680 for (i = 0; i < n_ec; i++) {
681 if (ms_ec[i].n_ips > depth
682 && (*n_groups == 0 || cur_group_ip != ms_ec[i].ips[depth])) {
683 (*n_groups)++;
684 cur_group_ip = ms_ec[i].ips[depth];
685 }
686 }
687
688 /* make the group array. */
689 *groups = VG_(malloc)("ms_make_groups", *n_groups * sizeof(Ms_Group));
690 i = 0;
691 for (g = 0; g < *n_groups; g++) {
692 while (ms_ec[i].n_ips <= depth)
693 i++;
694 cur_group_ip = ms_ec[i].ips[depth];
695 (*groups)[g].group_ip = cur_group_ip;
696 (*groups)[g].ms_ec = &ms_ec[i];
697 (*groups)[g].n_ec = 1;
698 (*groups)[g].total = ms_ec[i].report_value;
699 i++;
700 while (i < n_ec
701 && ms_ec[i].n_ips > depth
702 && cur_group_ip == ms_ec[i].ips[depth]) {
703 (*groups)[g].total += ms_ec[i].report_value;
704 i++;
705 (*groups)[g].n_ec++;
706 }
707 }
708
709 /* Search for insignificant groups, collect them all together
710 in the first insignificant group, and compact the group array. */
711 {
712 UInt insig1; // Position of first insignificant group.
713 UInt n_insig = 0; // Nr of insignificant groups found.
714
715 for (g = 0; g < *n_groups; g++) {
716 if ((*groups)[g].total < sig_sz) {
717 if (n_insig == 0) {
718 // First insig group => transform it into the special group
719 (*groups)[g].ms_ec = NULL;
720 (*groups)[g].group_ip = 0;
721 (*groups)[g].n_ec = 0;
722 // start the sum of insig total as total
723 insig1 = g;
724 } else {
725 // Add this insig group total into insig1 first group
726 (*groups)[insig1].total += (*groups)[g].total;
727 }
728 n_insig++;
729 } else {
730 if (n_insig > 1)
731 (*groups)[g - n_insig + 1] = (*groups)[g];
732 }
733 }
734 if (n_insig > 0) {
735 (*groups)[insig1].n_ec = n_insig;
736 *n_groups -= n_insig - 1;
737 }
738 DMSG(1, "depth %u n_groups %u n_insig %u\n", depth, *n_groups, n_insig);
739 }
740
741 /* Sort on total size, bigger size first. */
742 VG_(ssort)(*groups, *n_groups, sizeof(Ms_Group), ms_group_revcmp_total);
743 }
744
ms_output_group(VgFile * fp,UInt depth,Ms_Group * group,SizeT sig_sz,double sig_pct_threshold)745 static void ms_output_group (VgFile* fp, UInt depth, Ms_Group* group,
746 SizeT sig_sz, double sig_pct_threshold)
747 {
748 UInt i;
749 Ms_Group* groups;
750 UInt n_groups;
751
752 // If this is an insignificant group, handle it specially
753 if (group->ms_ec == NULL) {
754 const HChar* s = ( 1 == group->n_ec? "," : "s, all" );
755 vg_assert(group->group_ip == 0);
756 FP("%*sn0: %lu in %d place%s below massif's threshold (%.2f%%)\n",
757 depth+1, "", group->total, group->n_ec, s, sig_pct_threshold);
758 return;
759 }
760
761 // Normal group => output the group and its subgroups.
762 ms_make_groups(depth+1, group->ms_ec, group->n_ec, sig_sz,
763 &n_groups, &groups);
764
765 FP("%*s" "n%u: %ld %s\n",
766 depth + 1, "",
767 n_groups,
768 group->total,
769 VG_(describe_IP)(group->ms_ec->ips[depth] - 1, NULL));
770 /* XTREE??? Massif original code removes 1 to get the IP description. I am
771 wondering if this is not something that predates revision r8818,
772 which introduced a -1 in the stack unwind (see m_stacktrace.c)
773 Kept for the moment to allow exact comparison with massif output, but
774 probably we should remove this, as we very probably end up 2 bytes before
775 the RA Return Address. */
776
777 /* Output sub groups of this group. */
778 for (i = 0; i < n_groups; i++)
779 ms_output_group(fp, depth+1, &groups[i], sig_sz, sig_pct_threshold);
780
781 VG_(free)(groups);
782 }
783
784 /* Allocate and build an array of Ms_Ec sorted by addresses in the
785 Ms_Ec StackTrace. */
prepare_ms_ec(XTree * xt,ULong (* report_value)(const void * value),ULong * top_total,Ms_Ec ** vms_ec,UInt * vn_ec)786 static void prepare_ms_ec (XTree* xt,
787 ULong (*report_value)(const void* value),
788 ULong* top_total, Ms_Ec** vms_ec, UInt* vn_ec)
789 {
790 XT_shared* shared = xt->shared;
791 const UInt n_xecu = VG_(sizeXA)(shared->xec);
792 const UInt n_data_xecu = VG_(sizeXA)(xt->data);
793 Ms_Ec* ms_ec = VG_(malloc)("XT_massif_print.ms_ec", n_xecu * sizeof(Ms_Ec));
794 UInt n_xecu_sel = 0; // Nr of xecu that are selected for output.
795
796 vg_assert(n_data_xecu <= n_xecu);
797
798 // Ensure we have in shared->ips_order_xecu our xecu sorted by StackTrace.
799 ensure_ips_order_xecu_valid(shared);
800
801 *top_total = 0;
802 DMSG(1, "iteration %u\n", n_xecu);
803 for (UInt i = 0; i < n_xecu; i++) {
804 Xecu xecu = *(Xecu*)VG_(indexXA)(shared->ips_order_xecu, i);
805 xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
806
807 if (xecu >= n_data_xecu)
808 continue; // No data for this xecu in xt->data.
809 ms_ec[n_xecu_sel].n_ips = xe->n_ips_sel;
810 if (ms_ec[n_xecu_sel].n_ips == 0)
811 continue;
812
813 ms_ec[n_xecu_sel].ips = VG_(get_ExeContext_StackTrace)(xe->ec) + xe->top;
814 ms_ec[n_xecu_sel].report_value
815 = (*report_value)(VG_(indexXA)(xt->data, xecu));
816 *top_total += ms_ec[n_xecu_sel].report_value;
817
818 n_xecu_sel++;
819 }
820 vg_assert(n_xecu_sel <= n_xecu);
821
822 *vms_ec = ms_ec;
823 *vn_ec = n_xecu_sel;
824 }
825
VG_(XT_massif_open)826 MsFile* VG_(XT_massif_open)
827 (const HChar* outfilename,
828 const HChar* desc,
829 const XArray* desc_args,
830 const HChar* time_unit)
831 {
832 UInt i;
833 VgFile* fp = xt_open(outfilename);
834
835 if (fp == NULL)
836 return NULL; // xt_open reported the error.
837
838 /* ------ file header ------------------------------- */
839 FP("desc:");
840 if (desc)
841 FP(" %s", desc);
842 i = 0;
843 if (desc_args) {
844 for (i = 0; i < VG_(sizeXA)(desc_args); i++) {
845 HChar* arg = *(HChar**)VG_(indexXA)(desc_args, i);
846 FP(" %s", arg);
847 }
848 }
849 if (0 == i && desc == NULL) FP(" (none)");
850 FP("\n");
851
852 FP_cmd(fp);
853
854 FP("time_unit: %s\n", time_unit);
855
856 return fp;
857 }
858
VG_(XT_massif_close)859 void VG_(XT_massif_close)(MsFile* fp)
860 {
861 if (fp == NULL)
862 return; // Error should have been reported by VG_(XT_massif_open)
863
864 VG_(fclose)(fp);
865 }
866
VG_(XT_massif_print)867 void VG_(XT_massif_print)
868 (MsFile* fp,
869 XTree* xt,
870 const Massif_Header* header,
871 ULong (*report_value)(const void* value))
872 {
873 UInt i;
874
875 if (fp == NULL)
876 return; // Normally VG_(XT_massif_open) already reported an error.
877
878 /* Compute/prepare Snapshot totals/data/... */
879 ULong top_total;
880
881 /* Following variables only used for detailed snapshot. */
882 UInt n_ec = 0;
883 Ms_Ec* ms_ec = NULL;
884 const HChar* kind =
885 header->detailed ? (header->peak ? "peak" : "detailed") : "empty";
886
887 DMSG(1, "XT_massif_print %s\n", kind);
888 if (header->detailed) {
889 /* Prepare the Ms_Ec sorted array of stacktraces and the groups
890 at level 0. */
891 prepare_ms_ec(xt, report_value, &top_total, &ms_ec, &n_ec);
892 DMSG(1, "XT_print_massif ms_ec n_ec %u\n", n_ec);
893 } else if (xt == NULL) {
894 /* Non detailed, no xt => use the sz provided in the header. */
895 top_total = header->sz_B;
896 } else {
897 /* For non detailed snapshot, compute total directly from the xec. */
898 const XT_shared* shared = xt->shared;
899 const UInt n_xecu = VG_(sizeXA)(xt->data);
900 top_total = 0;
901
902 for (UInt xecu = 0; xecu < n_xecu; xecu++) {
903 xec* xe = (xec*)VG_(indexXA)(shared->xec, xecu);
904 if (xe->n_ips_sel == 0)
905 continue;
906 top_total += (*report_value)(VG_(indexXA)(xt->data, xecu));
907 }
908 }
909
910 /* ------ snapshot header --------------------------- */
911 FP("#-----------\n");
912 FP("snapshot=%d\n", header->snapshot_n);
913 FP("#-----------\n");
914 FP("time=%lld\n", header->time);
915
916 FP("mem_heap_B=%llu\n", top_total); // without extra_B and without stacks_B
917 FP("mem_heap_extra_B=%llu\n", header->extra_B);
918 FP("mem_stacks_B=%llu\n", header->stacks_B);
919 FP("heap_tree=%s\n", kind);
920
921 /* ------ detailed snapshot data ----------------------------- */
922 if (header->detailed) {
923 UInt n_groups;
924 Ms_Group* groups;
925
926 ULong sig_sz;
927 // Work out how big a child must be to be significant. If the current
928 // top_total is zero, then we set it to 1, which means everything will be
929 // judged insignificant -- this is sensible, as there's no point showing
930 // any detail for this case. Unless they used threshold=0, in which
931 // case we show them everything because that's what they asked for.
932 //
933 // Nb: We do this once now, rather than once per child, because if we do
934 // that the cost of all the divisions adds up to something significant.
935 if (0 == top_total && 0 != header->sig_threshold)
936 sig_sz = 1;
937 else
938 sig_sz = ((top_total + header->extra_B + header->stacks_B)
939 * header->sig_threshold) / 100;
940
941 /* Produce the groups at depth 0 */
942 DMSG(1, "XT_massif_print producing depth 0 groups\n");
943 ms_make_groups(0, ms_ec, n_ec, sig_sz, &n_groups, &groups);
944
945 /* Output the top node. */
946 FP("n%u: %llu %s\n", n_groups, top_total, header->top_node_desc);
947
948 /* Output depth 0 groups. */
949 DMSG(1, "XT_massif_print outputing %u depth 0 groups\n", n_groups);
950 for (i = 0; i < n_groups; i++)
951 ms_output_group(fp, 0, &groups[i], sig_sz, header->sig_threshold);
952
953 VG_(free)(groups);
954 VG_(free)(ms_ec);
955 }
956 }
957
VG_(XT_offset_main_or_below_main)958 Int VG_(XT_offset_main_or_below_main)(Addr* ips, Int n_ips)
959 {
960 /* Search for main or below main function.
961 To limit the nr of ips to examine, we maintain the deepest
962 offset where main was found, and we first search main
963 from there.
964 If no main is found, we will then do a search for main or
965 below main function till the top. */
966 static Int deepest_main = 0;
967 Vg_FnNameKind kind = Vg_FnNameNormal;
968 Int mbm = n_ips - 1; // Position of deepest main or below main.
969 Vg_FnNameKind mbmkind = Vg_FnNameNormal;
970 Int i;
971
972 for (i = n_ips - 1 - deepest_main;
973 i < n_ips;
974 i++) {
975 mbmkind = VG_(get_fnname_kind_from_IP)(ips[i]);
976 if (mbmkind != Vg_FnNameNormal) {
977 mbm = i;
978 break;
979 }
980 }
981
982 /* Search for main or below main function till top. */
983 for (i = mbm - 1;
984 i >= 0 && mbmkind != Vg_FnNameMain;
985 i--) {
986 kind = VG_(get_fnname_kind_from_IP)(ips[i]);
987 if (kind != Vg_FnNameNormal) {
988 mbm = i;
989 mbmkind = kind;
990 }
991 }
992 if (Vg_FnNameMain == mbmkind || Vg_FnNameBelowMain == mbmkind) {
993 if (mbmkind == Vg_FnNameMain && (n_ips - 1 - mbm) > deepest_main)
994 deepest_main = n_ips - 1 - mbm;
995 return mbm;
996 } else
997 return n_ips-1;
998 }
999
VG_(XT_filter_1top_and_maybe_below_main)1000 void VG_(XT_filter_1top_and_maybe_below_main)
1001 (Addr* ips, Int n_ips,
1002 UInt* top, UInt* n_ips_sel)
1003 {
1004 Int mbm;
1005
1006 *n_ips_sel = n_ips;
1007 if (n_ips == 0) {
1008 *top = 0;
1009 return;
1010 }
1011
1012 /* Filter top function. */
1013 *top = 1;
1014
1015 if (VG_(clo_show_below_main))
1016 mbm = n_ips - 1;
1017 else
1018 mbm = VG_(XT_offset_main_or_below_main)(ips, n_ips);
1019
1020 *n_ips_sel = mbm - *top + 1;
1021 }
1022
VG_(XT_filter_maybe_below_main)1023 void VG_(XT_filter_maybe_below_main)
1024 (Addr* ips, Int n_ips,
1025 UInt* top, UInt* n_ips_sel)
1026 {
1027 Int mbm;
1028
1029 *n_ips_sel = n_ips;
1030 *top = 0;
1031 if (n_ips == 0)
1032 return;
1033
1034 if (VG_(clo_show_below_main))
1035 mbm = n_ips - 1;
1036 else
1037 mbm = VG_(XT_offset_main_or_below_main)(ips, n_ips);
1038
1039 *n_ips_sel = mbm - *top + 1;
1040 }
1041
1042 /*--------------------------------------------------------------------*/
1043 /*--- end m_xtree.c ---*/
1044 /*--------------------------------------------------------------------*/
1045