1
2 /*--------------------------------------------------------------------*/
3 /*--- Callgrind ---*/
4 /*--- main.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of Callgrind, a Valgrind tool for call graph
9 profiling programs.
10
11 Copyright (C) 2002-2012, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
12
13 This tool is derived from and contains code from Cachegrind
14 Copyright (C) 2002-2012 Nicholas Nethercote (njn@valgrind.org)
15
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
20
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
25
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, write to the Free Software
28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 02111-1307, USA.
30
31 The GNU General Public License is contained in the file COPYING.
32 */
33
34 #include "config.h"
35 #include "callgrind.h"
36 #include "global.h"
37
38 #include "pub_tool_threadstate.h"
39 #include "pub_tool_gdbserver.h"
40
41 #include "cg_branchpred.c"
42
43 /*------------------------------------------------------------*/
44 /*--- Global variables ---*/
45 /*------------------------------------------------------------*/
46
47 /* for all threads */
48 CommandLineOptions CLG_(clo);
49 Statistics CLG_(stat);
50 Bool CLG_(instrument_state) = True; /* Instrumentation on ? */
51
52 /* thread and signal handler specific */
53 exec_state CLG_(current_state);
54
55 /* min of L1 and LL cache line sizes. This only gets set to a
56 non-zero value if we are doing cache simulation. */
57 Int CLG_(min_line_size) = 0;
58
59
60 /*------------------------------------------------------------*/
61 /*--- Statistics ---*/
62 /*------------------------------------------------------------*/
63
CLG_(init_statistics)64 static void CLG_(init_statistics)(Statistics* s)
65 {
66 s->call_counter = 0;
67 s->jcnd_counter = 0;
68 s->jump_counter = 0;
69 s->rec_call_counter = 0;
70 s->ret_counter = 0;
71 s->bb_executions = 0;
72
73 s->context_counter = 0;
74 s->bb_retranslations = 0;
75
76 s->distinct_objs = 0;
77 s->distinct_files = 0;
78 s->distinct_fns = 0;
79 s->distinct_contexts = 0;
80 s->distinct_bbs = 0;
81 s->distinct_bbccs = 0;
82 s->distinct_instrs = 0;
83 s->distinct_skips = 0;
84
85 s->bb_hash_resizes = 0;
86 s->bbcc_hash_resizes = 0;
87 s->jcc_hash_resizes = 0;
88 s->cxt_hash_resizes = 0;
89 s->fn_array_resizes = 0;
90 s->call_stack_resizes = 0;
91 s->fn_stack_resizes = 0;
92
93 s->full_debug_BBs = 0;
94 s->file_line_debug_BBs = 0;
95 s->fn_name_debug_BBs = 0;
96 s->no_debug_BBs = 0;
97 s->bbcc_lru_misses = 0;
98 s->jcc_lru_misses = 0;
99 s->cxt_lru_misses = 0;
100 s->bbcc_clones = 0;
101 }
102
103
104 /*------------------------------------------------------------*/
105 /*--- Simple callbacks (not cache similator) ---*/
106 /*------------------------------------------------------------*/
107
108 VG_REGPARM(1)
log_global_event(InstrInfo * ii)109 static void log_global_event(InstrInfo* ii)
110 {
111 ULong* cost_Bus;
112
113 CLG_DEBUG(6, "log_global_event: Ir %#lx/%u\n",
114 CLG_(bb_base) + ii->instr_offset, ii->instr_size);
115
116 if (!CLG_(current_state).collect) return;
117
118 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BUS))>0 );
119
120 CLG_(current_state).cost[ fullOffset(EG_BUS) ]++;
121
122 if (CLG_(current_state).nonskipped)
123 cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS);
124 else
125 cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS];
126 cost_Bus[0]++;
127 }
128
129
130 /* For branches, we consult two different predictors, one which
131 predicts taken/untaken for conditional branches, and the other
132 which predicts the branch target address for indirect branches
133 (jump-to-register style ones). */
134
135 static VG_REGPARM(2)
log_cond_branch(InstrInfo * ii,Word taken)136 void log_cond_branch(InstrInfo* ii, Word taken)
137 {
138 Bool miss;
139 Int fullOffset_Bc;
140 ULong* cost_Bc;
141
142 CLG_DEBUG(6, "log_cond_branch: Ir %#lx, taken %lu\n",
143 CLG_(bb_base) + ii->instr_offset, taken);
144
145 miss = 1 & do_cond_branch_predict(CLG_(bb_base) + ii->instr_offset, taken);
146
147 if (!CLG_(current_state).collect) return;
148
149 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BC))>0 );
150
151 if (CLG_(current_state).nonskipped)
152 cost_Bc = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BC);
153 else
154 cost_Bc = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BC];
155
156 fullOffset_Bc = fullOffset(EG_BC);
157 CLG_(current_state).cost[ fullOffset_Bc ]++;
158 cost_Bc[0]++;
159 if (miss) {
160 CLG_(current_state).cost[ fullOffset_Bc+1 ]++;
161 cost_Bc[1]++;
162 }
163 }
164
165 static VG_REGPARM(2)
log_ind_branch(InstrInfo * ii,UWord actual_dst)166 void log_ind_branch(InstrInfo* ii, UWord actual_dst)
167 {
168 Bool miss;
169 Int fullOffset_Bi;
170 ULong* cost_Bi;
171
172 CLG_DEBUG(6, "log_ind_branch: Ir %#lx, dst %#lx\n",
173 CLG_(bb_base) + ii->instr_offset, actual_dst);
174
175 miss = 1 & do_ind_branch_predict(CLG_(bb_base) + ii->instr_offset, actual_dst);
176
177 if (!CLG_(current_state).collect) return;
178
179 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BI))>0 );
180
181 if (CLG_(current_state).nonskipped)
182 cost_Bi = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BI);
183 else
184 cost_Bi = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BI];
185
186 fullOffset_Bi = fullOffset(EG_BI);
187 CLG_(current_state).cost[ fullOffset_Bi ]++;
188 cost_Bi[0]++;
189 if (miss) {
190 CLG_(current_state).cost[ fullOffset_Bi+1 ]++;
191 cost_Bi[1]++;
192 }
193 }
194
195 /*------------------------------------------------------------*/
196 /*--- Instrumentation structures and event queue handling ---*/
197 /*------------------------------------------------------------*/
198
199 /* Maintain an ordered list of memory events which are outstanding, in
200 the sense that no IR has yet been generated to do the relevant
201 helper calls. The BB is scanned top to bottom and memory events
202 are added to the end of the list, merging with the most recent
203 notified event where possible (Dw immediately following Dr and
204 having the same size and EA can be merged).
205
206 This merging is done so that for architectures which have
207 load-op-store instructions (x86, amd64), the insn is treated as if
208 it makes just one memory reference (a modify), rather than two (a
209 read followed by a write at the same address).
210
211 At various points the list will need to be flushed, that is, IR
212 generated from it. That must happen before any possible exit from
213 the block (the end, or an IRStmt_Exit). Flushing also takes place
214 when there is no space to add a new event.
215
216 If we require the simulation statistics to be up to date with
217 respect to possible memory exceptions, then the list would have to
218 be flushed before each memory reference. That would however lose
219 performance by inhibiting event-merging during flushing.
220
221 Flushing the list consists of walking it start to end and emitting
222 instrumentation IR for each event, in the order in which they
223 appear. It may be possible to emit a single call for two adjacent
224 events in order to reduce the number of helper function calls made.
225 For example, it could well be profitable to handle two adjacent Ir
226 events with a single helper call. */
227
228 typedef
229 IRExpr
230 IRAtom;
231
232 typedef
233 enum {
234 Ev_Ir, // Instruction read
235 Ev_Dr, // Data read
236 Ev_Dw, // Data write
237 Ev_Dm, // Data modify (read then write)
238 Ev_Bc, // branch conditional
239 Ev_Bi, // branch indirect (to unknown destination)
240 Ev_G // Global bus event
241 }
242 EventTag;
243
244 typedef
245 struct {
246 EventTag tag;
247 InstrInfo* inode;
248 union {
249 struct {
250 } Ir;
251 struct {
252 IRAtom* ea;
253 Int szB;
254 } Dr;
255 struct {
256 IRAtom* ea;
257 Int szB;
258 } Dw;
259 struct {
260 IRAtom* ea;
261 Int szB;
262 } Dm;
263 struct {
264 IRAtom* taken; /* :: Ity_I1 */
265 } Bc;
266 struct {
267 IRAtom* dst;
268 } Bi;
269 struct {
270 } G;
271 } Ev;
272 }
273 Event;
274
init_Event(Event * ev)275 static void init_Event ( Event* ev ) {
276 VG_(memset)(ev, 0, sizeof(Event));
277 }
278
get_Event_dea(Event * ev)279 static IRAtom* get_Event_dea ( Event* ev ) {
280 switch (ev->tag) {
281 case Ev_Dr: return ev->Ev.Dr.ea;
282 case Ev_Dw: return ev->Ev.Dw.ea;
283 case Ev_Dm: return ev->Ev.Dm.ea;
284 default: tl_assert(0);
285 }
286 }
287
get_Event_dszB(Event * ev)288 static Int get_Event_dszB ( Event* ev ) {
289 switch (ev->tag) {
290 case Ev_Dr: return ev->Ev.Dr.szB;
291 case Ev_Dw: return ev->Ev.Dw.szB;
292 case Ev_Dm: return ev->Ev.Dm.szB;
293 default: tl_assert(0);
294 }
295 }
296
297
298 /* Up to this many unnotified events are allowed. Number is
299 arbitrary. Larger numbers allow more event merging to occur, but
300 potentially induce more spilling due to extending live ranges of
301 address temporaries. */
302 #define N_EVENTS 16
303
304
305 /* A struct which holds all the running state during instrumentation.
306 Mostly to avoid passing loads of parameters everywhere. */
307 typedef struct {
308 /* The current outstanding-memory-event list. */
309 Event events[N_EVENTS];
310 Int events_used;
311
312 /* The array of InstrInfo's is part of BB struct. */
313 BB* bb;
314
315 /* BB seen before (ie. re-instrumentation) */
316 Bool seen_before;
317
318 /* Number InstrInfo bins 'used' so far. */
319 UInt ii_index;
320
321 // current offset of guest instructions from BB start
322 UInt instr_offset;
323
324 /* The output SB being constructed. */
325 IRSB* sbOut;
326 } ClgState;
327
328
showEvent(Event * ev)329 static void showEvent ( Event* ev )
330 {
331 switch (ev->tag) {
332 case Ev_Ir:
333 VG_(printf)("Ir (InstrInfo %p) at +%d\n",
334 ev->inode, ev->inode->instr_offset);
335 break;
336 case Ev_Dr:
337 VG_(printf)("Dr (InstrInfo %p) at +%d %d EA=",
338 ev->inode, ev->inode->instr_offset, ev->Ev.Dr.szB);
339 ppIRExpr(ev->Ev.Dr.ea);
340 VG_(printf)("\n");
341 break;
342 case Ev_Dw:
343 VG_(printf)("Dw (InstrInfo %p) at +%d %d EA=",
344 ev->inode, ev->inode->instr_offset, ev->Ev.Dw.szB);
345 ppIRExpr(ev->Ev.Dw.ea);
346 VG_(printf)("\n");
347 break;
348 case Ev_Dm:
349 VG_(printf)("Dm (InstrInfo %p) at +%d %d EA=",
350 ev->inode, ev->inode->instr_offset, ev->Ev.Dm.szB);
351 ppIRExpr(ev->Ev.Dm.ea);
352 VG_(printf)("\n");
353 break;
354 case Ev_Bc:
355 VG_(printf)("Bc %p GA=", ev->inode);
356 ppIRExpr(ev->Ev.Bc.taken);
357 VG_(printf)("\n");
358 break;
359 case Ev_Bi:
360 VG_(printf)("Bi %p DST=", ev->inode);
361 ppIRExpr(ev->Ev.Bi.dst);
362 VG_(printf)("\n");
363 break;
364 case Ev_G:
365 VG_(printf)("G %p\n", ev->inode);
366 break;
367 default:
368 tl_assert(0);
369 break;
370 }
371 }
372
373 /* Generate code for all outstanding memory events, and mark the queue
374 empty. Code is generated into cgs->sbOut, and this activity
375 'consumes' slots in cgs->bb. */
376
flushEvents(ClgState * clgs)377 static void flushEvents ( ClgState* clgs )
378 {
379 Int i, regparms, inew;
380 Char* helperName;
381 void* helperAddr;
382 IRExpr** argv;
383 IRExpr* i_node_expr;
384 IRDirty* di;
385 Event* ev;
386 Event* ev2;
387 Event* ev3;
388
389 if (!clgs->seen_before) {
390 // extend event sets as needed
391 // available sets: D0 Dr
392 for(i=0; i<clgs->events_used; i++) {
393 ev = &clgs->events[i];
394 switch(ev->tag) {
395 case Ev_Ir:
396 // Ir event always is first for a guest instruction
397 CLG_ASSERT(ev->inode->eventset == 0);
398 ev->inode->eventset = CLG_(sets).base;
399 break;
400 case Ev_Dr:
401 // extend event set by Dr counters
402 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
403 EG_DR);
404 break;
405 case Ev_Dw:
406 case Ev_Dm:
407 // extend event set by Dw counters
408 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
409 EG_DW);
410 break;
411 case Ev_Bc:
412 // extend event set by Bc counters
413 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
414 EG_BC);
415 break;
416 case Ev_Bi:
417 // extend event set by Bi counters
418 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
419 EG_BI);
420 break;
421 case Ev_G:
422 // extend event set by Bus counter
423 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
424 EG_BUS);
425 break;
426 default:
427 tl_assert(0);
428 }
429 }
430 }
431
432 for(i = 0; i < clgs->events_used; i = inew) {
433
434 helperName = NULL;
435 helperAddr = NULL;
436 argv = NULL;
437 regparms = 0;
438
439 /* generate IR to notify event i and possibly the ones
440 immediately following it. */
441 tl_assert(i >= 0 && i < clgs->events_used);
442
443 ev = &clgs->events[i];
444 ev2 = ( i < clgs->events_used-1 ? &clgs->events[i+1] : NULL );
445 ev3 = ( i < clgs->events_used-2 ? &clgs->events[i+2] : NULL );
446
447 CLG_DEBUGIF(5) {
448 VG_(printf)(" flush ");
449 showEvent( ev );
450 }
451
452 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
453
454 /* Decide on helper fn to call and args to pass it, and advance
455 i appropriately.
456 Dm events have same effect as Dw events */
457 switch (ev->tag) {
458 case Ev_Ir:
459 /* Merge an Ir with a following Dr. */
460 if (ev2 && ev2->tag == Ev_Dr) {
461 /* Why is this true? It's because we're merging an Ir
462 with a following Dr. The Ir derives from the
463 instruction's IMark and the Dr from data
464 references which follow it. In short it holds
465 because each insn starts with an IMark, hence an
466 Ev_Ir, and so these Dr must pertain to the
467 immediately preceding Ir. Same applies to analogous
468 assertions in the subsequent cases. */
469 tl_assert(ev2->inode == ev->inode);
470 helperName = CLG_(cachesim).log_1I1Dr_name;
471 helperAddr = CLG_(cachesim).log_1I1Dr;
472 argv = mkIRExprVec_3( i_node_expr,
473 get_Event_dea(ev2),
474 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
475 regparms = 3;
476 inew = i+2;
477 }
478 /* Merge an Ir with a following Dw/Dm. */
479 else
480 if (ev2 && (ev2->tag == Ev_Dw || ev2->tag == Ev_Dm)) {
481 tl_assert(ev2->inode == ev->inode);
482 helperName = CLG_(cachesim).log_1I1Dw_name;
483 helperAddr = CLG_(cachesim).log_1I1Dw;
484 argv = mkIRExprVec_3( i_node_expr,
485 get_Event_dea(ev2),
486 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
487 regparms = 3;
488 inew = i+2;
489 }
490 /* Merge an Ir with two following Irs. */
491 else
492 if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) {
493 helperName = CLG_(cachesim).log_3I0D_name;
494 helperAddr = CLG_(cachesim).log_3I0D;
495 argv = mkIRExprVec_3( i_node_expr,
496 mkIRExpr_HWord( (HWord)ev2->inode ),
497 mkIRExpr_HWord( (HWord)ev3->inode ) );
498 regparms = 3;
499 inew = i+3;
500 }
501 /* Merge an Ir with one following Ir. */
502 else
503 if (ev2 && ev2->tag == Ev_Ir) {
504 helperName = CLG_(cachesim).log_2I0D_name;
505 helperAddr = CLG_(cachesim).log_2I0D;
506 argv = mkIRExprVec_2( i_node_expr,
507 mkIRExpr_HWord( (HWord)ev2->inode ) );
508 regparms = 2;
509 inew = i+2;
510 }
511 /* No merging possible; emit as-is. */
512 else {
513 helperName = CLG_(cachesim).log_1I0D_name;
514 helperAddr = CLG_(cachesim).log_1I0D;
515 argv = mkIRExprVec_1( i_node_expr );
516 regparms = 1;
517 inew = i+1;
518 }
519 break;
520 case Ev_Dr:
521 /* Data read or modify */
522 helperName = CLG_(cachesim).log_0I1Dr_name;
523 helperAddr = CLG_(cachesim).log_0I1Dr;
524 argv = mkIRExprVec_3( i_node_expr,
525 get_Event_dea(ev),
526 mkIRExpr_HWord( get_Event_dszB(ev) ) );
527 regparms = 3;
528 inew = i+1;
529 break;
530 case Ev_Dw:
531 case Ev_Dm:
532 /* Data write */
533 helperName = CLG_(cachesim).log_0I1Dw_name;
534 helperAddr = CLG_(cachesim).log_0I1Dw;
535 argv = mkIRExprVec_3( i_node_expr,
536 get_Event_dea(ev),
537 mkIRExpr_HWord( get_Event_dszB(ev) ) );
538 regparms = 3;
539 inew = i+1;
540 break;
541 case Ev_Bc:
542 /* Conditional branch */
543 helperName = "log_cond_branch";
544 helperAddr = &log_cond_branch;
545 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
546 regparms = 2;
547 inew = i+1;
548 break;
549 case Ev_Bi:
550 /* Branch to an unknown destination */
551 helperName = "log_ind_branch";
552 helperAddr = &log_ind_branch;
553 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
554 regparms = 2;
555 inew = i+1;
556 break;
557 case Ev_G:
558 /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */
559 helperName = "log_global_event";
560 helperAddr = &log_global_event;
561 argv = mkIRExprVec_1( i_node_expr );
562 regparms = 1;
563 inew = i+1;
564 break;
565 default:
566 tl_assert(0);
567 }
568
569 CLG_DEBUGIF(5) {
570 if (inew > i+1) {
571 VG_(printf)(" merge ");
572 showEvent( ev2 );
573 }
574 if (inew > i+2) {
575 VG_(printf)(" merge ");
576 showEvent( ev3 );
577 }
578 if (helperAddr)
579 VG_(printf)(" call %s (%p)\n",
580 helperName, helperAddr);
581 }
582
583 /* helper could be unset depending on the simulator used */
584 if (helperAddr == 0) continue;
585
586 /* Add the helper. */
587 tl_assert(helperName);
588 tl_assert(helperAddr);
589 tl_assert(argv);
590 di = unsafeIRDirty_0_N( regparms,
591 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
592 argv );
593 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
594 }
595
596 clgs->events_used = 0;
597 }
598
addEvent_Ir(ClgState * clgs,InstrInfo * inode)599 static void addEvent_Ir ( ClgState* clgs, InstrInfo* inode )
600 {
601 Event* evt;
602 tl_assert(clgs->seen_before || (inode->eventset == 0));
603 if (!CLG_(clo).simulate_cache) return;
604
605 if (clgs->events_used == N_EVENTS)
606 flushEvents(clgs);
607 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
608 evt = &clgs->events[clgs->events_used];
609 init_Event(evt);
610 evt->tag = Ev_Ir;
611 evt->inode = inode;
612 clgs->events_used++;
613 }
614
615 static
addEvent_Dr(ClgState * clgs,InstrInfo * inode,Int datasize,IRAtom * ea)616 void addEvent_Dr ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
617 {
618 Event* evt;
619 tl_assert(isIRAtom(ea));
620 tl_assert(datasize >= 1);
621 if (!CLG_(clo).simulate_cache) return;
622 tl_assert(datasize <= CLG_(min_line_size));
623
624 if (clgs->events_used == N_EVENTS)
625 flushEvents(clgs);
626 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
627 evt = &clgs->events[clgs->events_used];
628 init_Event(evt);
629 evt->tag = Ev_Dr;
630 evt->inode = inode;
631 evt->Ev.Dr.szB = datasize;
632 evt->Ev.Dr.ea = ea;
633 clgs->events_used++;
634 }
635
636 static
addEvent_Dw(ClgState * clgs,InstrInfo * inode,Int datasize,IRAtom * ea)637 void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
638 {
639 Event* lastEvt;
640 Event* evt;
641 tl_assert(isIRAtom(ea));
642 tl_assert(datasize >= 1);
643 if (!CLG_(clo).simulate_cache) return;
644 tl_assert(datasize <= CLG_(min_line_size));
645
646 /* Is it possible to merge this write with the preceding read? */
647 lastEvt = &clgs->events[clgs->events_used-1];
648 if (clgs->events_used > 0
649 && lastEvt->tag == Ev_Dr
650 && lastEvt->Ev.Dr.szB == datasize
651 && lastEvt->inode == inode
652 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
653 {
654 lastEvt->tag = Ev_Dm;
655 return;
656 }
657
658 /* No. Add as normal. */
659 if (clgs->events_used == N_EVENTS)
660 flushEvents(clgs);
661 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
662 evt = &clgs->events[clgs->events_used];
663 init_Event(evt);
664 evt->tag = Ev_Dw;
665 evt->inode = inode;
666 evt->Ev.Dw.szB = datasize;
667 evt->Ev.Dw.ea = ea;
668 clgs->events_used++;
669 }
670
671 static
addEvent_Bc(ClgState * clgs,InstrInfo * inode,IRAtom * guard)672 void addEvent_Bc ( ClgState* clgs, InstrInfo* inode, IRAtom* guard )
673 {
674 Event* evt;
675 tl_assert(isIRAtom(guard));
676 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, guard)
677 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
678 if (!CLG_(clo).simulate_branch) return;
679
680 if (clgs->events_used == N_EVENTS)
681 flushEvents(clgs);
682 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
683 evt = &clgs->events[clgs->events_used];
684 init_Event(evt);
685 evt->tag = Ev_Bc;
686 evt->inode = inode;
687 evt->Ev.Bc.taken = guard;
688 clgs->events_used++;
689 }
690
691 static
addEvent_Bi(ClgState * clgs,InstrInfo * inode,IRAtom * whereTo)692 void addEvent_Bi ( ClgState* clgs, InstrInfo* inode, IRAtom* whereTo )
693 {
694 Event* evt;
695 tl_assert(isIRAtom(whereTo));
696 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, whereTo)
697 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
698 if (!CLG_(clo).simulate_branch) return;
699
700 if (clgs->events_used == N_EVENTS)
701 flushEvents(clgs);
702 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
703 evt = &clgs->events[clgs->events_used];
704 init_Event(evt);
705 evt->tag = Ev_Bi;
706 evt->inode = inode;
707 evt->Ev.Bi.dst = whereTo;
708 clgs->events_used++;
709 }
710
711 static
addEvent_G(ClgState * clgs,InstrInfo * inode)712 void addEvent_G ( ClgState* clgs, InstrInfo* inode )
713 {
714 Event* evt;
715 if (!CLG_(clo).collect_bus) return;
716
717 if (clgs->events_used == N_EVENTS)
718 flushEvents(clgs);
719 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
720 evt = &clgs->events[clgs->events_used];
721 init_Event(evt);
722 evt->tag = Ev_G;
723 evt->inode = inode;
724 clgs->events_used++;
725 }
726
727 /* Initialise or check (if already seen before) an InstrInfo for next insn.
728 We only can set instr_offset/instr_size here. The required event set and
729 resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
730 instructions. The event set is extended as required on flush of the event
731 queue (when Dm events were determined), cost offsets are determined at
732 end of BB instrumentation. */
733 static
next_InstrInfo(ClgState * clgs,UInt instr_size)734 InstrInfo* next_InstrInfo ( ClgState* clgs, UInt instr_size )
735 {
736 InstrInfo* ii;
737 tl_assert(clgs->ii_index >= 0);
738 tl_assert(clgs->ii_index < clgs->bb->instr_count);
739 ii = &clgs->bb->instr[ clgs->ii_index ];
740
741 if (clgs->seen_before) {
742 CLG_ASSERT(ii->instr_offset == clgs->instr_offset);
743 CLG_ASSERT(ii->instr_size == instr_size);
744 }
745 else {
746 ii->instr_offset = clgs->instr_offset;
747 ii->instr_size = instr_size;
748 ii->cost_offset = 0;
749 ii->eventset = 0;
750 }
751
752 clgs->ii_index++;
753 clgs->instr_offset += instr_size;
754 CLG_(stat).distinct_instrs++;
755
756 return ii;
757 }
758
759 // return total number of cost values needed for this BB
760 static
update_cost_offsets(ClgState * clgs)761 UInt update_cost_offsets( ClgState* clgs )
762 {
763 Int i;
764 InstrInfo* ii;
765 UInt cost_offset = 0;
766
767 CLG_ASSERT(clgs->bb->instr_count == clgs->ii_index);
768 for(i=0; i<clgs->ii_index; i++) {
769 ii = &clgs->bb->instr[i];
770 if (clgs->seen_before) {
771 CLG_ASSERT(ii->cost_offset == cost_offset);
772 } else
773 ii->cost_offset = cost_offset;
774 cost_offset += ii->eventset ? ii->eventset->size : 0;
775 }
776
777 return cost_offset;
778 }
779
780 /*------------------------------------------------------------*/
781 /*--- Instrumentation ---*/
782 /*------------------------------------------------------------*/
783
784 #if defined(VG_BIGENDIAN)
785 # define CLGEndness Iend_BE
786 #elif defined(VG_LITTLEENDIAN)
787 # define CLGEndness Iend_LE
788 #else
789 # error "Unknown endianness"
790 #endif
791
792 static
IRConst2Addr(IRConst * con)793 Addr IRConst2Addr(IRConst* con)
794 {
795 Addr addr;
796
797 if (sizeof(Addr) == 4) {
798 CLG_ASSERT( con->tag == Ico_U32 );
799 addr = con->Ico.U32;
800 }
801 else if (sizeof(Addr) == 8) {
802 CLG_ASSERT( con->tag == Ico_U64 );
803 addr = con->Ico.U64;
804 }
805 else
806 VG_(tool_panic)("Callgrind: invalid Addr type");
807
808 return addr;
809 }
810
811 /* First pass over a BB to instrument, counting instructions and jumps
812 * This is needed for the size of the BB struct to allocate
813 *
814 * Called from CLG_(get_bb)
815 */
CLG_(collectBlockInfo)816 void CLG_(collectBlockInfo)(IRSB* sbIn,
817 /*INOUT*/ UInt* instrs,
818 /*INOUT*/ UInt* cjmps,
819 /*INOUT*/ Bool* cjmp_inverted)
820 {
821 Int i;
822 IRStmt* st;
823 Addr instrAddr =0, jumpDst;
824 UInt instrLen = 0;
825 Bool toNextInstr = False;
826
827 // Ist_Exit has to be ignored in preamble code, before first IMark:
828 // preamble code is added by VEX for self modifying code, and has
829 // nothing to do with client code
830 Bool inPreamble = True;
831
832 if (!sbIn) return;
833
834 for (i = 0; i < sbIn->stmts_used; i++) {
835 st = sbIn->stmts[i];
836 if (Ist_IMark == st->tag) {
837 inPreamble = False;
838
839 instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr);
840 instrLen = st->Ist.IMark.len;
841
842 (*instrs)++;
843 toNextInstr = False;
844 }
845 if (inPreamble) continue;
846 if (Ist_Exit == st->tag) {
847 jumpDst = IRConst2Addr(st->Ist.Exit.dst);
848 toNextInstr = (jumpDst == instrAddr + instrLen);
849
850 (*cjmps)++;
851 }
852 }
853
854 /* if the last instructions of BB conditionally jumps to next instruction
855 * (= first instruction of next BB in memory), this is a inverted by VEX.
856 */
857 *cjmp_inverted = toNextInstr;
858 }
859
860 static
addConstMemStoreStmt(IRSB * bbOut,UWord addr,UInt val,IRType hWordTy)861 void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy)
862 {
863 addStmtToIRSB( bbOut,
864 IRStmt_Store(CLGEndness,
865 IRExpr_Const(hWordTy == Ity_I32 ?
866 IRConst_U32( addr ) :
867 IRConst_U64( addr )),
868 IRExpr_Const(IRConst_U32(val)) ));
869 }
870
871
872 /* add helper call to setup_bbcc, with pointer to BB struct as argument
873 *
874 * precondition for setup_bbcc:
875 * - jmps_passed has number of cond.jumps passed in last executed BB
876 * - current_bbcc has a pointer to the BBCC of the last executed BB
877 * Thus, if bbcc_jmpkind is != -1 (JmpNone),
878 * current_bbcc->bb->jmp_addr
879 * gives the address of the jump source.
880 *
881 * the setup does 2 things:
882 * - trace call:
883 * * Unwind own call stack, i.e sync our ESP with real ESP
884 * This is for ESP manipulation (longjmps, C++ exec handling) and RET
885 * * For CALLs or JMPs crossing objects, record call arg +
886 * push are on own call stack
887 *
888 * - prepare for cache log functions:
889 * set current_bbcc to BBCC that gets the costs for this BB execution
890 * attached
891 */
892 static
addBBSetupCall(ClgState * clgs)893 void addBBSetupCall(ClgState* clgs)
894 {
895 IRDirty* di;
896 IRExpr *arg1, **argv;
897
898 arg1 = mkIRExpr_HWord( (HWord)clgs->bb );
899 argv = mkIRExprVec_1(arg1);
900 di = unsafeIRDirty_0_N( 1, "setup_bbcc",
901 VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ),
902 argv);
903 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
904 }
905
906
907 static
CLG_(instrument)908 IRSB* CLG_(instrument)( VgCallbackClosure* closure,
909 IRSB* sbIn,
910 VexGuestLayout* layout,
911 VexGuestExtents* vge,
912 IRType gWordTy, IRType hWordTy )
913 {
914 Int i;
915 IRStmt* st;
916 Addr origAddr;
917 InstrInfo* curr_inode = NULL;
918 ClgState clgs;
919 UInt cJumps = 0;
920
921
922 if (gWordTy != hWordTy) {
923 /* We don't currently support this case. */
924 VG_(tool_panic)("host/guest word size mismatch");
925 }
926
927 // No instrumentation if it is switched off
928 if (! CLG_(instrument_state)) {
929 CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n",
930 (Addr)closure->readdr);
931 return sbIn;
932 }
933
934 CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr)closure->readdr);
935
936 /* Set up SB for instrumented IR */
937 clgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
938
939 // Copy verbatim any IR preamble preceding the first IMark
940 i = 0;
941 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
942 addStmtToIRSB( clgs.sbOut, sbIn->stmts[i] );
943 i++;
944 }
945
946 // Get the first statement, and origAddr from it
947 CLG_ASSERT(sbIn->stmts_used >0);
948 CLG_ASSERT(i < sbIn->stmts_used);
949 st = sbIn->stmts[i];
950 CLG_ASSERT(Ist_IMark == st->tag);
951
952 origAddr = (Addr)st->Ist.IMark.addr + (Addr)st->Ist.IMark.delta;
953 CLG_ASSERT(origAddr == st->Ist.IMark.addr
954 + st->Ist.IMark.delta); // XXX: check no overflow
955
956 /* Get BB struct (creating if necessary).
957 * JS: The hash table is keyed with orig_addr_noredir -- important!
958 * JW: Why? If it is because of different chasing of the redirection,
959 * this is not needed, as chasing is switched off in callgrind
960 */
961 clgs.bb = CLG_(get_bb)(origAddr, sbIn, &(clgs.seen_before));
962
963 addBBSetupCall(&clgs);
964
965 // Set up running state
966 clgs.events_used = 0;
967 clgs.ii_index = 0;
968 clgs.instr_offset = 0;
969
970 for (/*use current i*/; i < sbIn->stmts_used; i++) {
971
972 st = sbIn->stmts[i];
973 CLG_ASSERT(isFlatIRStmt(st));
974
975 switch (st->tag) {
976 case Ist_NoOp:
977 case Ist_AbiHint:
978 case Ist_Put:
979 case Ist_PutI:
980 case Ist_MBE:
981 break;
982
983 case Ist_IMark: {
984 Addr64 cia = st->Ist.IMark.addr + st->Ist.IMark.delta;
985 Int isize = st->Ist.IMark.len;
986 CLG_ASSERT(clgs.instr_offset == (Addr)cia - origAddr);
987 // If Vex fails to decode an instruction, the size will be zero.
988 // Pretend otherwise.
989 if (isize == 0) isize = VG_MIN_INSTR_SZB;
990
991 // Sanity-check size.
992 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
993 || VG_CLREQ_SZB == isize );
994
995 // Init the inode, record it as the current one.
996 // Subsequent Dr/Dw/Dm events from the same instruction will
997 // also use it.
998 curr_inode = next_InstrInfo (&clgs, isize);
999
1000 addEvent_Ir( &clgs, curr_inode );
1001 break;
1002 }
1003
1004 case Ist_WrTmp: {
1005 IRExpr* data = st->Ist.WrTmp.data;
1006 if (data->tag == Iex_Load) {
1007 IRExpr* aexpr = data->Iex.Load.addr;
1008 // Note also, endianness info is ignored. I guess
1009 // that's not interesting.
1010 addEvent_Dr( &clgs, curr_inode,
1011 sizeofIRType(data->Iex.Load.ty), aexpr );
1012 }
1013 break;
1014 }
1015
1016 case Ist_Store: {
1017 IRExpr* data = st->Ist.Store.data;
1018 IRExpr* aexpr = st->Ist.Store.addr;
1019 addEvent_Dw( &clgs, curr_inode,
1020 sizeofIRType(typeOfIRExpr(sbIn->tyenv, data)), aexpr );
1021 break;
1022 }
1023
1024 case Ist_Dirty: {
1025 Int dataSize;
1026 IRDirty* d = st->Ist.Dirty.details;
1027 if (d->mFx != Ifx_None) {
1028 /* This dirty helper accesses memory. Collect the details. */
1029 tl_assert(d->mAddr != NULL);
1030 tl_assert(d->mSize != 0);
1031 dataSize = d->mSize;
1032 // Large (eg. 28B, 108B, 512B on x86) data-sized
1033 // instructions will be done inaccurately, but they're
1034 // very rare and this avoids errors from hitting more
1035 // than two cache lines in the simulation.
1036 if (CLG_(clo).simulate_cache && dataSize > CLG_(min_line_size))
1037 dataSize = CLG_(min_line_size);
1038 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1039 addEvent_Dr( &clgs, curr_inode, dataSize, d->mAddr );
1040 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1041 addEvent_Dw( &clgs, curr_inode, dataSize, d->mAddr );
1042 } else {
1043 tl_assert(d->mAddr == NULL);
1044 tl_assert(d->mSize == 0);
1045 }
1046 break;
1047 }
1048
1049 case Ist_CAS: {
1050 /* We treat it as a read and a write of the location. I
1051 think that is the same behaviour as it was before IRCAS
1052 was introduced, since prior to that point, the Vex
1053 front ends would translate a lock-prefixed instruction
1054 into a (normal) read followed by a (normal) write. */
1055 Int dataSize;
1056 IRCAS* cas = st->Ist.CAS.details;
1057 CLG_ASSERT(cas->addr && isIRAtom(cas->addr));
1058 CLG_ASSERT(cas->dataLo);
1059 dataSize = sizeofIRType(typeOfIRExpr(sbIn->tyenv, cas->dataLo));
1060 if (cas->dataHi != NULL)
1061 dataSize *= 2; /* since this is a doubleword-cas */
1062 addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr );
1063 addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr );
1064 addEvent_G( &clgs, curr_inode );
1065 break;
1066 }
1067
1068 case Ist_LLSC: {
1069 IRType dataTy;
1070 if (st->Ist.LLSC.storedata == NULL) {
1071 /* LL */
1072 dataTy = typeOfIRTemp(sbIn->tyenv, st->Ist.LLSC.result);
1073 addEvent_Dr( &clgs, curr_inode,
1074 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1075 } else {
1076 /* SC */
1077 dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata);
1078 addEvent_Dw( &clgs, curr_inode,
1079 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1080 /* I don't know whether the global-bus-lock cost should
1081 be attributed to the LL or the SC, but it doesn't
1082 really matter since they always have to be used in
1083 pairs anyway. Hence put it (quite arbitrarily) on
1084 the SC. */
1085 addEvent_G( &clgs, curr_inode );
1086 }
1087 break;
1088 }
1089
1090 case Ist_Exit: {
1091 Bool guest_exit, inverted;
1092
1093 /* VEX code generation sometimes inverts conditional branches.
1094 * As Callgrind counts (conditional) jumps, it has to correct
1095 * inversions. The heuristic is the following:
1096 * (1) Callgrind switches off SB chasing and unrolling, and
1097 * therefore it assumes that a candidate for inversion only is
1098 * the last conditional branch in an SB.
1099 * (2) inversion is assumed if the branch jumps to the address of
1100 * the next guest instruction in memory.
1101 * This heuristic is precalculated in CLG_(collectBlockInfo)().
1102 *
1103 * Branching behavior is also used for branch prediction. Note that
1104 * above heuristic is different from what Cachegrind does.
1105 * Cachegrind uses (2) for all branches.
1106 */
1107 if (cJumps+1 == clgs.bb->cjmp_count)
1108 inverted = clgs.bb->cjmp_inverted;
1109 else
1110 inverted = False;
1111
1112 // call branch predictor only if this is a branch in guest code
1113 guest_exit = (st->Ist.Exit.jk == Ijk_Boring) ||
1114 (st->Ist.Exit.jk == Ijk_Call) ||
1115 (st->Ist.Exit.jk == Ijk_Ret);
1116
1117 if (guest_exit) {
1118 /* Stuff to widen the guard expression to a host word, so
1119 we can pass it to the branch predictor simulation
1120 functions easily. */
1121 IRType tyW = hWordTy;
1122 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
1123 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
1124 IRTemp guard1 = newIRTemp(clgs.sbOut->tyenv, Ity_I1);
1125 IRTemp guardW = newIRTemp(clgs.sbOut->tyenv, tyW);
1126 IRTemp guard = newIRTemp(clgs.sbOut->tyenv, tyW);
1127 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1128 : IRExpr_Const(IRConst_U64(1));
1129
1130 /* Widen the guard expression. */
1131 addStmtToIRSB( clgs.sbOut,
1132 IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1133 addStmtToIRSB( clgs.sbOut,
1134 IRStmt_WrTmp( guardW,
1135 IRExpr_Unop(widen,
1136 IRExpr_RdTmp(guard1))) );
1137 /* If the exit is inverted, invert the sense of the guard. */
1138 addStmtToIRSB(
1139 clgs.sbOut,
1140 IRStmt_WrTmp(
1141 guard,
1142 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1143 : IRExpr_RdTmp(guardW)
1144 ));
1145 /* And post the event. */
1146 addEvent_Bc( &clgs, curr_inode, IRExpr_RdTmp(guard) );
1147 }
1148
1149 /* We may never reach the next statement, so need to flush
1150 all outstanding transactions now. */
1151 flushEvents( &clgs );
1152
1153 CLG_ASSERT(clgs.ii_index>0);
1154 if (!clgs.seen_before) {
1155 ClgJumpKind jk;
1156
1157 if (st->Ist.Exit.jk == Ijk_Call) jk = jk_Call;
1158 else if (st->Ist.Exit.jk == Ijk_Ret) jk = jk_Return;
1159 else {
1160 if (IRConst2Addr(st->Ist.Exit.dst) ==
1161 origAddr + curr_inode->instr_offset + curr_inode->instr_size)
1162 jk = jk_None;
1163 else
1164 jk = jk_Jump;
1165 }
1166
1167 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
1168 clgs.bb->jmp[cJumps].jmpkind = jk;
1169 }
1170
1171 /* Update global variable jmps_passed before the jump
1172 * A correction is needed if VEX inverted the last jump condition
1173 */
1174 addConstMemStoreStmt( clgs.sbOut,
1175 (UWord) &CLG_(current_state).jmps_passed,
1176 inverted ? cJumps+1 : cJumps, hWordTy);
1177 cJumps++;
1178
1179 break;
1180 }
1181
1182 default:
1183 tl_assert(0);
1184 break;
1185 }
1186
1187 /* Copy the original statement */
1188 addStmtToIRSB( clgs.sbOut, st );
1189
1190 CLG_DEBUGIF(5) {
1191 VG_(printf)(" pass ");
1192 ppIRStmt(st);
1193 VG_(printf)("\n");
1194 }
1195 }
1196
1197 /* Deal with branches to unknown destinations. Except ignore ones
1198 which are function returns as we assume the return stack
1199 predictor never mispredicts. */
1200 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
1201 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1202 switch (sbIn->next->tag) {
1203 case Iex_Const:
1204 break; /* boring - branch to known address */
1205 case Iex_RdTmp:
1206 /* looks like an indirect branch (branch to unknown) */
1207 addEvent_Bi( &clgs, curr_inode, sbIn->next );
1208 break;
1209 default:
1210 /* shouldn't happen - if the incoming IR is properly
1211 flattened, should only have tmp and const cases to
1212 consider. */
1213 tl_assert(0);
1214 }
1215 }
1216
1217 /* At the end of the bb. Flush outstandings. */
1218 flushEvents( &clgs );
1219
1220 /* Always update global variable jmps_passed at end of bb.
1221 * A correction is needed if VEX inverted the last jump condition
1222 */
1223 {
1224 UInt jmps_passed = cJumps;
1225 if (clgs.bb->cjmp_inverted) jmps_passed--;
1226 addConstMemStoreStmt( clgs.sbOut,
1227 (UWord) &CLG_(current_state).jmps_passed,
1228 jmps_passed, hWordTy);
1229 }
1230 CLG_ASSERT(clgs.bb->cjmp_count == cJumps);
1231 CLG_ASSERT(clgs.bb->instr_count = clgs.ii_index);
1232
1233 /* Info for final exit from BB */
1234 {
1235 ClgJumpKind jk;
1236
1237 if (sbIn->jumpkind == Ijk_Call) jk = jk_Call;
1238 else if (sbIn->jumpkind == Ijk_Ret) jk = jk_Return;
1239 else {
1240 jk = jk_Jump;
1241 if ((sbIn->next->tag == Iex_Const) &&
1242 (IRConst2Addr(sbIn->next->Iex.Const.con) ==
1243 origAddr + clgs.instr_offset))
1244 jk = jk_None;
1245 }
1246 clgs.bb->jmp[cJumps].jmpkind = jk;
1247 /* Instruction index of the call/ret at BB end
1248 * (it is wrong for fall-through, but does not matter) */
1249 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
1250 }
1251
1252 /* swap information of last exit with final exit if inverted */
1253 if (clgs.bb->cjmp_inverted) {
1254 ClgJumpKind jk;
1255 UInt instr;
1256
1257 jk = clgs.bb->jmp[cJumps].jmpkind;
1258 clgs.bb->jmp[cJumps].jmpkind = clgs.bb->jmp[cJumps-1].jmpkind;
1259 clgs.bb->jmp[cJumps-1].jmpkind = jk;
1260 instr = clgs.bb->jmp[cJumps].instr;
1261 clgs.bb->jmp[cJumps].instr = clgs.bb->jmp[cJumps-1].instr;
1262 clgs.bb->jmp[cJumps-1].instr = instr;
1263 }
1264
1265 if (clgs.seen_before) {
1266 CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs));
1267 CLG_ASSERT(clgs.bb->instr_len = clgs.instr_offset);
1268 }
1269 else {
1270 clgs.bb->cost_count = update_cost_offsets(&clgs);
1271 clgs.bb->instr_len = clgs.instr_offset;
1272 }
1273
1274 CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n",
1275 origAddr, clgs.bb->instr_len,
1276 clgs.bb->cjmp_count, clgs.bb->cost_count);
1277 if (cJumps>0) {
1278 CLG_DEBUG(3, " [ ");
1279 for (i=0;i<cJumps;i++)
1280 CLG_DEBUG(3, "%d ", clgs.bb->jmp[i].instr);
1281 CLG_DEBUG(3, "], last inverted: %s \n",
1282 clgs.bb->cjmp_inverted ? "yes":"no");
1283 }
1284
1285 return clgs.sbOut;
1286 }
1287
1288 /*--------------------------------------------------------------------*/
1289 /*--- Discarding BB info ---*/
1290 /*--------------------------------------------------------------------*/
1291
1292 // Called when a translation is removed from the translation cache for
1293 // any reason at all: to free up space, because the guest code was
1294 // unmapped or modified, or for any arbitrary reason.
1295 static
clg_discard_superblock_info(Addr64 orig_addr64,VexGuestExtents vge)1296 void clg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge )
1297 {
1298 Addr orig_addr = (Addr)orig_addr64;
1299
1300 tl_assert(vge.n_used > 0);
1301
1302 if (0)
1303 VG_(printf)( "discard_superblock_info: %p, %p, %llu\n",
1304 (void*)(Addr)orig_addr,
1305 (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
1306
1307 // Get BB info, remove from table, free BB info. Simple! Note that we
1308 // use orig_addr, not the first instruction address in vge.
1309 CLG_(delete_bb)(orig_addr);
1310 }
1311
1312
1313 /*------------------------------------------------------------*/
1314 /*--- CLG_(fini)() and related function ---*/
1315 /*------------------------------------------------------------*/
1316
1317
1318
zero_thread_cost(thread_info * t)1319 static void zero_thread_cost(thread_info* t)
1320 {
1321 Int i;
1322
1323 for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1324 if (!CLG_(current_call_stack).entry[i].jcc) continue;
1325
1326 /* reset call counters to current for active calls */
1327 CLG_(copy_cost)( CLG_(sets).full,
1328 CLG_(current_call_stack).entry[i].enter_cost,
1329 CLG_(current_state).cost );
1330 CLG_(current_call_stack).entry[i].jcc->call_counter = 0;
1331 }
1332
1333 CLG_(forall_bbccs)(CLG_(zero_bbcc));
1334
1335 /* set counter for last dump */
1336 CLG_(copy_cost)( CLG_(sets).full,
1337 t->lastdump_cost, CLG_(current_state).cost );
1338 }
1339
CLG_(zero_all_cost)1340 void CLG_(zero_all_cost)(Bool only_current_thread)
1341 {
1342 if (VG_(clo_verbosity) > 1)
1343 VG_(message)(Vg_DebugMsg, " Zeroing costs...\n");
1344
1345 if (only_current_thread)
1346 zero_thread_cost(CLG_(get_current_thread)());
1347 else
1348 CLG_(forall_threads)(zero_thread_cost);
1349
1350 if (VG_(clo_verbosity) > 1)
1351 VG_(message)(Vg_DebugMsg, " ...done\n");
1352 }
1353
1354 static
unwind_thread(thread_info * t)1355 void unwind_thread(thread_info* t)
1356 {
1357 /* unwind signal handlers */
1358 while(CLG_(current_state).sig !=0)
1359 CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig);
1360
1361 /* unwind regular call stack */
1362 while(CLG_(current_call_stack).sp>0)
1363 CLG_(pop_call_stack)();
1364
1365 /* reset context and function stack for context generation */
1366 CLG_(init_exec_state)( &CLG_(current_state) );
1367 CLG_(current_fn_stack).top = CLG_(current_fn_stack).bottom;
1368 }
1369
1370 static
zero_state_cost(thread_info * t)1371 void zero_state_cost(thread_info* t)
1372 {
1373 CLG_(zero_cost)( CLG_(sets).full, CLG_(current_state).cost );
1374 }
1375
1376 /* Ups, this can go very wrong... */
1377 extern void VG_(discard_translations) ( Addr64 start, ULong range, HChar* who );
1378
CLG_(set_instrument_state)1379 void CLG_(set_instrument_state)(Char* reason, Bool state)
1380 {
1381 if (CLG_(instrument_state) == state) {
1382 CLG_DEBUG(2, "%s: instrumentation already %s\n",
1383 reason, state ? "ON" : "OFF");
1384 return;
1385 }
1386 CLG_(instrument_state) = state;
1387 CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n",
1388 reason, state ? "ON" : "OFF");
1389
1390 VG_(discard_translations)( (Addr64)0x1000, (ULong) ~0xfffl, "callgrind");
1391
1392 /* reset internal state: call stacks, simulator */
1393 CLG_(forall_threads)(unwind_thread);
1394 CLG_(forall_threads)(zero_state_cost);
1395 (*CLG_(cachesim).clear)();
1396
1397 if (VG_(clo_verbosity) > 1)
1398 VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n",
1399 reason, state ? "ON" : "OFF");
1400 }
1401
1402 /* helper for dump_state_togdb */
dump_state_of_thread_togdb(thread_info * ti)1403 static void dump_state_of_thread_togdb(thread_info* ti)
1404 {
1405 static Char buf[512];
1406 static FullCost sum = 0, tmp = 0;
1407 Int t, p, i;
1408 BBCC *from, *to;
1409 call_entry* ce;
1410
1411 t = CLG_(current_tid);
1412 CLG_(init_cost_lz)( CLG_(sets).full, &sum );
1413 CLG_(copy_cost_lz)( CLG_(sets).full, &tmp, ti->lastdump_cost );
1414 CLG_(add_diff_cost)( CLG_(sets).full, sum, ti->lastdump_cost,
1415 ti->states.entry[0]->cost);
1416 CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost, tmp );
1417 CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), sum);
1418 VG_(gdb_printf)("events-%d: %s\n", t, buf);
1419 VG_(gdb_printf)("frames-%d: %d\n", t, CLG_(current_call_stack).sp);
1420
1421 ce = 0;
1422 for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1423 ce = CLG_(get_call_entry)(i);
1424 /* if this frame is skipped, we don't have counters */
1425 if (!ce->jcc) continue;
1426
1427 from = ce->jcc->from;
1428 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, from->cxt->fn[0]->name);
1429 VG_(gdb_printf)("calls-%d-%d: %llu\n",t, i, ce->jcc->call_counter);
1430
1431 /* FIXME: EventSets! */
1432 CLG_(copy_cost)( CLG_(sets).full, sum, ce->jcc->cost );
1433 CLG_(copy_cost)( CLG_(sets).full, tmp, ce->enter_cost );
1434 CLG_(add_diff_cost)( CLG_(sets).full, sum,
1435 ce->enter_cost, CLG_(current_state).cost );
1436 CLG_(copy_cost)( CLG_(sets).full, ce->enter_cost, tmp );
1437
1438 p = VG_(sprintf)(buf, "events-%d-%d: ",t, i);
1439 CLG_(sprint_mappingcost)(buf + p, CLG_(dumpmap), sum );
1440 VG_(gdb_printf)("%s\n", buf);
1441 }
1442 if (ce && ce->jcc) {
1443 to = ce->jcc->to;
1444 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, to->cxt->fn[0]->name );
1445 }
1446 }
1447
1448 /* Dump current state */
dump_state_togdb(void)1449 static void dump_state_togdb(void)
1450 {
1451 static Char buf[512];
1452 thread_info** th;
1453 int t, p;
1454 Int orig_tid = CLG_(current_tid);
1455
1456 VG_(gdb_printf)("instrumentation: %s\n",
1457 CLG_(instrument_state) ? "on":"off");
1458 if (!CLG_(instrument_state)) return;
1459
1460 VG_(gdb_printf)("executed-bbs: %llu\n", CLG_(stat).bb_executions);
1461 VG_(gdb_printf)("executed-calls: %llu\n", CLG_(stat).call_counter);
1462 VG_(gdb_printf)("distinct-bbs: %d\n", CLG_(stat).distinct_bbs);
1463 VG_(gdb_printf)("distinct-calls: %d\n", CLG_(stat).distinct_jccs);
1464 VG_(gdb_printf)("distinct-functions: %d\n", CLG_(stat).distinct_fns);
1465 VG_(gdb_printf)("distinct-contexts: %d\n", CLG_(stat).distinct_contexts);
1466
1467 /* "events:" line. Given here because it will be dynamic in the future */
1468 p = VG_(sprintf)(buf, "events: ");
1469 CLG_(sprint_eventmapping)(buf+p, CLG_(dumpmap));
1470 VG_(gdb_printf)("%s\n", buf);
1471 /* "part:" line (number of last part. Is 0 at start */
1472 VG_(gdb_printf)("part: %d\n", CLG_(get_dump_counter)());
1473
1474 /* threads */
1475 th = CLG_(get_threads)();
1476 p = VG_(sprintf)(buf, "threads:");
1477 for(t=1;t<VG_N_THREADS;t++) {
1478 if (!th[t]) continue;
1479 p += VG_(sprintf)(buf+p, " %d", t);
1480 }
1481 VG_(gdb_printf)("%s\n", buf);
1482 VG_(gdb_printf)("current-tid: %d\n", orig_tid);
1483 CLG_(forall_threads)(dump_state_of_thread_togdb);
1484 }
1485
1486
print_monitor_help(void)1487 static void print_monitor_help ( void )
1488 {
1489 VG_(gdb_printf) ("\n");
1490 VG_(gdb_printf) ("callgrind monitor commands:\n");
1491 VG_(gdb_printf) (" dump [<dump_hint>]\n");
1492 VG_(gdb_printf) (" dump counters\n");
1493 VG_(gdb_printf) (" zero\n");
1494 VG_(gdb_printf) (" zero counters\n");
1495 VG_(gdb_printf) (" status\n");
1496 VG_(gdb_printf) (" print status\n");
1497 VG_(gdb_printf) (" instrumentation [on|off]\n");
1498 VG_(gdb_printf) (" get/set (if on/off given) instrumentation state\n");
1499 VG_(gdb_printf) ("\n");
1500 }
1501
1502 /* return True if request recognised, False otherwise */
handle_gdb_monitor_command(ThreadId tid,Char * req)1503 static Bool handle_gdb_monitor_command (ThreadId tid, Char *req)
1504 {
1505 Char* wcmd;
1506 Char s[VG_(strlen(req))]; /* copy for strtok_r */
1507 Char *ssaveptr;
1508
1509 VG_(strcpy) (s, req);
1510
1511 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
1512 switch (VG_(keyword_id) ("help dump zero status instrumentation",
1513 wcmd, kwd_report_duplicated_matches)) {
1514 case -2: /* multiple matches */
1515 return True;
1516 case -1: /* not found */
1517 return False;
1518 case 0: /* help */
1519 print_monitor_help();
1520 return True;
1521 case 1: { /* dump */
1522 CLG_(dump_profile)(req, False);
1523 return True;
1524 }
1525 case 2: { /* zero */
1526 CLG_(zero_all_cost)(False);
1527 return True;
1528 }
1529
1530 case 3: { /* status */
1531 Char* arg = VG_(strtok_r) (0, " ", &ssaveptr);
1532 if (arg && (VG_(strcmp)(arg, "internal") == 0)) {
1533 /* internal interface to callgrind_control */
1534 dump_state_togdb();
1535 return True;
1536 }
1537
1538 if (!CLG_(instrument_state)) {
1539 VG_(gdb_printf)("No status available as instrumentation is switched off\n");
1540 } else {
1541 // Status information to be improved ...
1542 thread_info** th = CLG_(get_threads)();
1543 Int t, tcount = 0;
1544 for(t=1;t<VG_N_THREADS;t++)
1545 if (th[t]) tcount++;
1546 VG_(gdb_printf)("%d thread(s) running.\n", tcount);
1547 }
1548 return True;
1549 }
1550
1551 case 4: { /* instrumentation */
1552 Char* arg = VG_(strtok_r) (0, " ", &ssaveptr);
1553 if (!arg) {
1554 VG_(gdb_printf)("instrumentation: %s\n",
1555 CLG_(instrument_state) ? "on":"off");
1556 }
1557 else
1558 CLG_(set_instrument_state)("Command", VG_(strcmp)(arg,"off")!=0);
1559 return True;
1560 }
1561
1562 default:
1563 tl_assert(0);
1564 return False;
1565 }
1566 }
1567
1568 static
CLG_(handle_client_request)1569 Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
1570 {
1571 if (!VG_IS_TOOL_USERREQ('C','T',args[0])
1572 && VG_USERREQ__GDB_MONITOR_COMMAND != args[0])
1573 return False;
1574
1575 switch(args[0]) {
1576 case VG_USERREQ__DUMP_STATS:
1577 CLG_(dump_profile)("Client Request", True);
1578 *ret = 0; /* meaningless */
1579 break;
1580
1581 case VG_USERREQ__DUMP_STATS_AT:
1582 {
1583 Char buf[512];
1584 VG_(sprintf)(buf,"Client Request: %s", (Char*)args[1]);
1585 CLG_(dump_profile)(buf, True);
1586 *ret = 0; /* meaningless */
1587 }
1588 break;
1589
1590 case VG_USERREQ__ZERO_STATS:
1591 CLG_(zero_all_cost)(True);
1592 *ret = 0; /* meaningless */
1593 break;
1594
1595 case VG_USERREQ__TOGGLE_COLLECT:
1596 CLG_(current_state).collect = !CLG_(current_state).collect;
1597 CLG_DEBUG(2, "Client Request: toggled collection state to %s\n",
1598 CLG_(current_state).collect ? "ON" : "OFF");
1599 *ret = 0; /* meaningless */
1600 break;
1601
1602 case VG_USERREQ__START_INSTRUMENTATION:
1603 CLG_(set_instrument_state)("Client Request", True);
1604 *ret = 0; /* meaningless */
1605 break;
1606
1607 case VG_USERREQ__STOP_INSTRUMENTATION:
1608 CLG_(set_instrument_state)("Client Request", False);
1609 *ret = 0; /* meaningless */
1610 break;
1611
1612 case VG_USERREQ__GDB_MONITOR_COMMAND: {
1613 Bool handled = handle_gdb_monitor_command (tid, (Char*)args[1]);
1614 if (handled)
1615 *ret = 1;
1616 else
1617 *ret = 0;
1618 return handled;
1619 }
1620 default:
1621 return False;
1622 }
1623
1624 return True;
1625 }
1626
1627
1628 /* Syscall Timing */
1629
1630 /* struct timeval syscalltime[VG_N_THREADS]; */
1631 #if CLG_MICROSYSTIME
1632 #include <sys/time.h>
1633 #include <sys/syscall.h>
1634 extern Int VG_(do_syscall) ( UInt, ... );
1635
1636 ULong syscalltime[VG_N_THREADS];
1637 #else
1638 UInt syscalltime[VG_N_THREADS];
1639 #endif
1640
1641 static
CLG_(pre_syscalltime)1642 void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno,
1643 UWord* args, UInt nArgs)
1644 {
1645 if (CLG_(clo).collect_systime) {
1646 #if CLG_MICROSYSTIME
1647 struct vki_timeval tv_now;
1648 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
1649 syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec;
1650 #else
1651 syscalltime[tid] = VG_(read_millisecond_timer)();
1652 #endif
1653 }
1654 }
1655
1656 static
CLG_(post_syscalltime)1657 void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno,
1658 UWord* args, UInt nArgs, SysRes res)
1659 {
1660 if (CLG_(clo).collect_systime &&
1661 CLG_(current_state).bbcc) {
1662 Int o;
1663 #if CLG_MICROSYSTIME
1664 struct vki_timeval tv_now;
1665 ULong diff;
1666
1667 VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
1668 diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid];
1669 #else
1670 UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid];
1671 #endif
1672
1673 /* offset o is for "SysCount", o+1 for "SysTime" */
1674 o = fullOffset(EG_SYS);
1675 CLG_ASSERT(o>=0);
1676 CLG_DEBUG(0," Time (Off %d) for Syscall %d: %ull\n", o, syscallno, diff);
1677
1678 CLG_(current_state).cost[o] ++;
1679 CLG_(current_state).cost[o+1] += diff;
1680 if (!CLG_(current_state).bbcc->skipped)
1681 CLG_(init_cost_lz)(CLG_(sets).full,
1682 &(CLG_(current_state).bbcc->skipped));
1683 CLG_(current_state).bbcc->skipped[o] ++;
1684 CLG_(current_state).bbcc->skipped[o+1] += diff;
1685 }
1686 }
1687
ULong_width(ULong n)1688 static UInt ULong_width(ULong n)
1689 {
1690 UInt w = 0;
1691 while (n > 0) {
1692 n = n / 10;
1693 w++;
1694 }
1695 if (w == 0) w = 1;
1696 return w + (w-1)/3; // add space for commas
1697 }
1698
1699 static
branchsim_printstat(int l1,int l2,int l3)1700 void branchsim_printstat(int l1, int l2, int l3)
1701 {
1702 static Char buf1[128], buf2[128], buf3[128], fmt[128];
1703 FullCost total;
1704 ULong Bc_total_b, Bc_total_mp, Bi_total_b, Bi_total_mp;
1705 ULong B_total_b, B_total_mp;
1706
1707 total = CLG_(total_cost);
1708 Bc_total_b = total[ fullOffset(EG_BC) ];
1709 Bc_total_mp = total[ fullOffset(EG_BC)+1 ];
1710 Bi_total_b = total[ fullOffset(EG_BI) ];
1711 Bi_total_mp = total[ fullOffset(EG_BI)+1 ];
1712
1713 /* Make format string, getting width right for numbers */
1714 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1715 l1, l2, l3);
1716
1717 if (0 == Bc_total_b) Bc_total_b = 1;
1718 if (0 == Bi_total_b) Bi_total_b = 1;
1719 B_total_b = Bc_total_b + Bi_total_b;
1720 B_total_mp = Bc_total_mp + Bi_total_mp;
1721
1722 VG_(umsg)("\n");
1723 VG_(umsg)(fmt, "Branches: ",
1724 B_total_b, Bc_total_b, Bi_total_b);
1725
1726 VG_(umsg)(fmt, "Mispredicts: ",
1727 B_total_mp, Bc_total_mp, Bi_total_mp);
1728
1729 VG_(percentify)(B_total_mp, B_total_b, 1, l1+1, buf1);
1730 VG_(percentify)(Bc_total_mp, Bc_total_b, 1, l2+1, buf2);
1731 VG_(percentify)(Bi_total_mp, Bi_total_b, 1, l3+1, buf3);
1732
1733 VG_(umsg)("Mispred rate: %s (%s + %s )\n", buf1, buf2,buf3);
1734 }
1735
1736
1737 static
finish(void)1738 void finish(void)
1739 {
1740 Char buf[32+COSTS_LEN], fmt[128];
1741 Int l1, l2, l3;
1742 FullCost total;
1743
1744 CLG_DEBUG(0, "finish()\n");
1745
1746 (*CLG_(cachesim).finish)();
1747
1748 /* pop all remaining items from CallStack for correct sum
1749 */
1750 CLG_(forall_threads)(unwind_thread);
1751
1752 CLG_(dump_profile)(0, False);
1753
1754 if (VG_(clo_verbosity) == 0) return;
1755
1756 /* Hash table stats */
1757 if (VG_(clo_stats)) {
1758 int BB_lookups =
1759 CLG_(stat).full_debug_BBs +
1760 CLG_(stat).fn_name_debug_BBs +
1761 CLG_(stat).file_line_debug_BBs +
1762 CLG_(stat).no_debug_BBs;
1763
1764 VG_(message)(Vg_DebugMsg, "\n");
1765 VG_(message)(Vg_DebugMsg, "Distinct objects: %d\n",
1766 CLG_(stat).distinct_objs);
1767 VG_(message)(Vg_DebugMsg, "Distinct files: %d\n",
1768 CLG_(stat).distinct_files);
1769 VG_(message)(Vg_DebugMsg, "Distinct fns: %d\n",
1770 CLG_(stat).distinct_fns);
1771 VG_(message)(Vg_DebugMsg, "Distinct contexts:%d\n",
1772 CLG_(stat).distinct_contexts);
1773 VG_(message)(Vg_DebugMsg, "Distinct BBs: %d\n",
1774 CLG_(stat).distinct_bbs);
1775 VG_(message)(Vg_DebugMsg, "Cost entries: %d (Chunks %d)\n",
1776 CLG_(costarray_entries), CLG_(costarray_chunks));
1777 VG_(message)(Vg_DebugMsg, "Distinct BBCCs: %d\n",
1778 CLG_(stat).distinct_bbccs);
1779 VG_(message)(Vg_DebugMsg, "Distinct JCCs: %d\n",
1780 CLG_(stat).distinct_jccs);
1781 VG_(message)(Vg_DebugMsg, "Distinct skips: %d\n",
1782 CLG_(stat).distinct_skips);
1783 VG_(message)(Vg_DebugMsg, "BB lookups: %d\n",
1784 BB_lookups);
1785 if (BB_lookups>0) {
1786 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)\n",
1787 CLG_(stat).full_debug_BBs * 100 / BB_lookups,
1788 CLG_(stat).full_debug_BBs);
1789 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)\n",
1790 CLG_(stat).file_line_debug_BBs * 100 / BB_lookups,
1791 CLG_(stat).file_line_debug_BBs);
1792 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)\n",
1793 CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups,
1794 CLG_(stat).fn_name_debug_BBs);
1795 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)\n",
1796 CLG_(stat).no_debug_BBs * 100 / BB_lookups,
1797 CLG_(stat).no_debug_BBs);
1798 }
1799 VG_(message)(Vg_DebugMsg, "BBCC Clones: %d\n",
1800 CLG_(stat).bbcc_clones);
1801 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d\n",
1802 CLG_(stat).bb_retranslations);
1803 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d\n",
1804 CLG_(stat).distinct_instrs);
1805 VG_(message)(Vg_DebugMsg, "");
1806
1807 VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d\n",
1808 CLG_(stat).cxt_lru_misses);
1809 VG_(message)(Vg_DebugMsg, "LRU BBCC Misses: %d\n",
1810 CLG_(stat).bbcc_lru_misses);
1811 VG_(message)(Vg_DebugMsg, "LRU JCC Misses: %d\n",
1812 CLG_(stat).jcc_lru_misses);
1813 VG_(message)(Vg_DebugMsg, "BBs Executed: %llu\n",
1814 CLG_(stat).bb_executions);
1815 VG_(message)(Vg_DebugMsg, "Calls: %llu\n",
1816 CLG_(stat).call_counter);
1817 VG_(message)(Vg_DebugMsg, "CondJMP followed: %llu\n",
1818 CLG_(stat).jcnd_counter);
1819 VG_(message)(Vg_DebugMsg, "Boring JMPs: %llu\n",
1820 CLG_(stat).jump_counter);
1821 VG_(message)(Vg_DebugMsg, "Recursive calls: %llu\n",
1822 CLG_(stat).rec_call_counter);
1823 VG_(message)(Vg_DebugMsg, "Returns: %llu\n",
1824 CLG_(stat).ret_counter);
1825
1826 VG_(message)(Vg_DebugMsg, "");
1827 }
1828
1829 CLG_(sprint_eventmapping)(buf, CLG_(dumpmap));
1830 VG_(message)(Vg_UserMsg, "Events : %s\n", buf);
1831 CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), CLG_(total_cost));
1832 VG_(message)(Vg_UserMsg, "Collected : %s\n", buf);
1833 VG_(message)(Vg_UserMsg, "\n");
1834
1835 /* determine value widths for statistics */
1836 total = CLG_(total_cost);
1837 l1 = ULong_width( total[fullOffset(EG_IR)] );
1838 l2 = l3 = 0;
1839 if (CLG_(clo).simulate_cache) {
1840 l2 = ULong_width( total[fullOffset(EG_DR)] );
1841 l3 = ULong_width( total[fullOffset(EG_DW)] );
1842 }
1843 if (CLG_(clo).simulate_branch) {
1844 int l2b = ULong_width( total[fullOffset(EG_BC)] );
1845 int l3b = ULong_width( total[fullOffset(EG_BI)] );
1846 if (l2b > l2) l2 = l2b;
1847 if (l3b > l3) l3 = l3b;
1848 }
1849
1850 /* Make format string, getting width right for numbers */
1851 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1852
1853 /* Always print this */
1854 VG_(umsg)(fmt, "I refs: ", total[fullOffset(EG_IR)] );
1855
1856 if (CLG_(clo).simulate_cache)
1857 (*CLG_(cachesim).printstat)(l1, l2, l3);
1858
1859 if (CLG_(clo).simulate_branch)
1860 branchsim_printstat(l1, l2, l3);
1861
1862 }
1863
1864
CLG_(fini)1865 void CLG_(fini)(Int exitcode)
1866 {
1867 finish();
1868 }
1869
1870
1871 /*--------------------------------------------------------------------*/
1872 /*--- Setup ---*/
1873 /*--------------------------------------------------------------------*/
1874
clg_start_client_code_callback(ThreadId tid,ULong blocks_done)1875 static void clg_start_client_code_callback ( ThreadId tid, ULong blocks_done )
1876 {
1877 static ULong last_blocks_done = 0;
1878
1879 if (0)
1880 VG_(printf)("%d R %llu\n", (Int)tid, blocks_done);
1881
1882 /* throttle calls to CLG_(run_thread) by number of BBs executed */
1883 if (blocks_done - last_blocks_done < 5000) return;
1884 last_blocks_done = blocks_done;
1885
1886 CLG_(run_thread)( tid );
1887 }
1888
1889 static
CLG_(post_clo_init)1890 void CLG_(post_clo_init)(void)
1891 {
1892 VG_(clo_vex_control).iropt_unroll_thresh = 0;
1893 VG_(clo_vex_control).guest_chase_thresh = 0;
1894
1895 CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No");
1896 CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo).separate_callers);
1897 CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo).separate_recursions);
1898
1899 if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) {
1900 VG_(message)(Vg_UserMsg, "Using source line as position.\n");
1901 CLG_(clo).dump_line = True;
1902 }
1903
1904 CLG_(init_dumps)();
1905
1906 (*CLG_(cachesim).post_clo_init)();
1907
1908 CLG_(init_eventsets)();
1909 CLG_(init_statistics)(& CLG_(stat));
1910 CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) );
1911
1912 /* initialize hash tables */
1913 CLG_(init_obj_table)();
1914 CLG_(init_cxt_table)();
1915 CLG_(init_bb_hash)();
1916
1917 CLG_(init_threads)();
1918 CLG_(run_thread)(1);
1919
1920 CLG_(instrument_state) = CLG_(clo).instrument_atstart;
1921
1922 if (VG_(clo_verbosity > 0)) {
1923 VG_(message)(Vg_UserMsg,
1924 "For interactive control, run 'callgrind_control -h'.\n");
1925 }
1926 }
1927
1928 static
CLG_(pre_clo_init)1929 void CLG_(pre_clo_init)(void)
1930 {
1931 VG_(details_name) ("Callgrind");
1932 VG_(details_version) (NULL);
1933 VG_(details_description) ("a call-graph generating cache profiler");
1934 VG_(details_copyright_author)("Copyright (C) 2002-2012, and GNU GPL'd, "
1935 "by Josef Weidendorfer et al.");
1936 VG_(details_bug_reports_to) (VG_BUGS_TO);
1937 VG_(details_avg_translation_sizeB) ( 500 );
1938
1939 VG_(basic_tool_funcs) (CLG_(post_clo_init),
1940 CLG_(instrument),
1941 CLG_(fini));
1942
1943 VG_(needs_superblock_discards)(clg_discard_superblock_info);
1944
1945
1946 VG_(needs_command_line_options)(CLG_(process_cmd_line_option),
1947 CLG_(print_usage),
1948 CLG_(print_debug_usage));
1949
1950 VG_(needs_client_requests)(CLG_(handle_client_request));
1951 VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime),
1952 CLG_(post_syscalltime));
1953
1954 VG_(track_start_client_code) ( & clg_start_client_code_callback );
1955 VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) );
1956 VG_(track_post_deliver_signal)( & CLG_(post_signal) );
1957
1958 CLG_(set_clo_defaults)();
1959 }
1960
1961 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init))
1962
1963 /*--------------------------------------------------------------------*/
1964 /*--- end main.c ---*/
1965 /*--------------------------------------------------------------------*/
1966