1
2 /*--------------------------------------------------------------------*/
3 /*--- Callgrind ---*/
4 /*--- main.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of Callgrind, a Valgrind tool for call graph
9 profiling programs.
10
11 Copyright (C) 2002-2017, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
12
13 This tool is derived from and contains code from Cachegrind
14 Copyright (C) 2002-2017 Nicholas Nethercote (njn@valgrind.org)
15
16 This program is free software; you can redistribute it and/or
17 modify it under the terms of the GNU General Public License as
18 published by the Free Software Foundation; either version 2 of the
19 License, or (at your option) any later version.
20
21 This program is distributed in the hope that it will be useful, but
22 WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 General Public License for more details.
25
26 You should have received a copy of the GNU General Public License
27 along with this program; if not, write to the Free Software
28 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 02111-1307, USA.
30
31 The GNU General Public License is contained in the file COPYING.
32 */
33
34 #include "config.h"
35 #include "callgrind.h"
36 #include "global.h"
37
38 #include "pub_tool_threadstate.h"
39 #include "pub_tool_gdbserver.h"
40 #include "pub_tool_transtab.h" // VG_(discard_translations_safely)
41
42 #include "cg_branchpred.c"
43
44 /*------------------------------------------------------------*/
45 /*--- Global variables ---*/
46 /*------------------------------------------------------------*/
47
48 /* for all threads */
49 CommandLineOptions CLG_(clo);
50 Statistics CLG_(stat);
51 Bool CLG_(instrument_state) = True; /* Instrumentation on ? */
52
53 /* thread and signal handler specific */
54 exec_state CLG_(current_state);
55
56 /* min of L1 and LL cache line sizes. This only gets set to a
57 non-zero value if we are doing cache simulation. */
58 Int CLG_(min_line_size) = 0;
59
60
61 /*------------------------------------------------------------*/
62 /*--- Statistics ---*/
63 /*------------------------------------------------------------*/
64
CLG_(init_statistics)65 static void CLG_(init_statistics)(Statistics* s)
66 {
67 s->call_counter = 0;
68 s->jcnd_counter = 0;
69 s->jump_counter = 0;
70 s->rec_call_counter = 0;
71 s->ret_counter = 0;
72 s->bb_executions = 0;
73
74 s->context_counter = 0;
75 s->bb_retranslations = 0;
76
77 s->distinct_objs = 0;
78 s->distinct_files = 0;
79 s->distinct_fns = 0;
80 s->distinct_contexts = 0;
81 s->distinct_bbs = 0;
82 s->distinct_bbccs = 0;
83 s->distinct_instrs = 0;
84 s->distinct_skips = 0;
85
86 s->bb_hash_resizes = 0;
87 s->bbcc_hash_resizes = 0;
88 s->jcc_hash_resizes = 0;
89 s->cxt_hash_resizes = 0;
90 s->fn_array_resizes = 0;
91 s->call_stack_resizes = 0;
92 s->fn_stack_resizes = 0;
93
94 s->full_debug_BBs = 0;
95 s->file_line_debug_BBs = 0;
96 s->fn_name_debug_BBs = 0;
97 s->no_debug_BBs = 0;
98 s->bbcc_lru_misses = 0;
99 s->jcc_lru_misses = 0;
100 s->cxt_lru_misses = 0;
101 s->bbcc_clones = 0;
102 }
103
104
105 /*------------------------------------------------------------*/
106 /*--- Simple callbacks (not cache similator) ---*/
107 /*------------------------------------------------------------*/
108
109 VG_REGPARM(1)
log_global_event(InstrInfo * ii)110 static void log_global_event(InstrInfo* ii)
111 {
112 ULong* cost_Bus;
113
114 CLG_DEBUG(6, "log_global_event: Ir %#lx/%u\n",
115 CLG_(bb_base) + ii->instr_offset, ii->instr_size);
116
117 if (!CLG_(current_state).collect) return;
118
119 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BUS))>0 );
120
121 CLG_(current_state).cost[ fullOffset(EG_BUS) ]++;
122
123 if (CLG_(current_state).nonskipped)
124 cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS);
125 else
126 cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS];
127 cost_Bus[0]++;
128 }
129
130
131 /* For branches, we consult two different predictors, one which
132 predicts taken/untaken for conditional branches, and the other
133 which predicts the branch target address for indirect branches
134 (jump-to-register style ones). */
135
136 static VG_REGPARM(2)
log_cond_branch(InstrInfo * ii,Word taken)137 void log_cond_branch(InstrInfo* ii, Word taken)
138 {
139 Bool miss;
140 Int fullOffset_Bc;
141 ULong* cost_Bc;
142
143 CLG_DEBUG(6, "log_cond_branch: Ir %#lx, taken %ld\n",
144 CLG_(bb_base) + ii->instr_offset, taken);
145
146 miss = 1 & do_cond_branch_predict(CLG_(bb_base) + ii->instr_offset, taken);
147
148 if (!CLG_(current_state).collect) return;
149
150 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BC))>0 );
151
152 if (CLG_(current_state).nonskipped)
153 cost_Bc = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BC);
154 else
155 cost_Bc = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BC];
156
157 fullOffset_Bc = fullOffset(EG_BC);
158 CLG_(current_state).cost[ fullOffset_Bc ]++;
159 cost_Bc[0]++;
160 if (miss) {
161 CLG_(current_state).cost[ fullOffset_Bc+1 ]++;
162 cost_Bc[1]++;
163 }
164 }
165
166 static VG_REGPARM(2)
log_ind_branch(InstrInfo * ii,UWord actual_dst)167 void log_ind_branch(InstrInfo* ii, UWord actual_dst)
168 {
169 Bool miss;
170 Int fullOffset_Bi;
171 ULong* cost_Bi;
172
173 CLG_DEBUG(6, "log_ind_branch: Ir %#lx, dst %#lx\n",
174 CLG_(bb_base) + ii->instr_offset, actual_dst);
175
176 miss = 1 & do_ind_branch_predict(CLG_(bb_base) + ii->instr_offset, actual_dst);
177
178 if (!CLG_(current_state).collect) return;
179
180 CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BI))>0 );
181
182 if (CLG_(current_state).nonskipped)
183 cost_Bi = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BI);
184 else
185 cost_Bi = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BI];
186
187 fullOffset_Bi = fullOffset(EG_BI);
188 CLG_(current_state).cost[ fullOffset_Bi ]++;
189 cost_Bi[0]++;
190 if (miss) {
191 CLG_(current_state).cost[ fullOffset_Bi+1 ]++;
192 cost_Bi[1]++;
193 }
194 }
195
196 /*------------------------------------------------------------*/
197 /*--- Instrumentation structures and event queue handling ---*/
198 /*------------------------------------------------------------*/
199
200 /* Maintain an ordered list of memory events which are outstanding, in
201 the sense that no IR has yet been generated to do the relevant
202 helper calls. The BB is scanned top to bottom and memory events
203 are added to the end of the list, merging with the most recent
204 notified event where possible (Dw immediately following Dr and
205 having the same size and EA can be merged).
206
207 This merging is done so that for architectures which have
208 load-op-store instructions (x86, amd64), the insn is treated as if
209 it makes just one memory reference (a modify), rather than two (a
210 read followed by a write at the same address).
211
212 At various points the list will need to be flushed, that is, IR
213 generated from it. That must happen before any possible exit from
214 the block (the end, or an IRStmt_Exit). Flushing also takes place
215 when there is no space to add a new event.
216
217 If we require the simulation statistics to be up to date with
218 respect to possible memory exceptions, then the list would have to
219 be flushed before each memory reference. That would however lose
220 performance by inhibiting event-merging during flushing.
221
222 Flushing the list consists of walking it start to end and emitting
223 instrumentation IR for each event, in the order in which they
224 appear. It may be possible to emit a single call for two adjacent
225 events in order to reduce the number of helper function calls made.
226 For example, it could well be profitable to handle two adjacent Ir
227 events with a single helper call. */
228
229 typedef
230 IRExpr
231 IRAtom;
232
233 typedef
234 enum {
235 Ev_Ir, // Instruction read
236 Ev_Dr, // Data read
237 Ev_Dw, // Data write
238 Ev_Dm, // Data modify (read then write)
239 Ev_Bc, // branch conditional
240 Ev_Bi, // branch indirect (to unknown destination)
241 Ev_G // Global bus event
242 }
243 EventTag;
244
245 typedef
246 struct {
247 EventTag tag;
248 InstrInfo* inode;
249 union {
250 struct {
251 } Ir;
252 struct {
253 IRAtom* ea;
254 Int szB;
255 } Dr;
256 struct {
257 IRAtom* ea;
258 Int szB;
259 } Dw;
260 struct {
261 IRAtom* ea;
262 Int szB;
263 } Dm;
264 struct {
265 IRAtom* taken; /* :: Ity_I1 */
266 } Bc;
267 struct {
268 IRAtom* dst;
269 } Bi;
270 struct {
271 } G;
272 } Ev;
273 }
274 Event;
275
init_Event(Event * ev)276 static void init_Event ( Event* ev ) {
277 VG_(memset)(ev, 0, sizeof(Event));
278 }
279
get_Event_dea(Event * ev)280 static IRAtom* get_Event_dea ( Event* ev ) {
281 switch (ev->tag) {
282 case Ev_Dr: return ev->Ev.Dr.ea;
283 case Ev_Dw: return ev->Ev.Dw.ea;
284 case Ev_Dm: return ev->Ev.Dm.ea;
285 default: tl_assert(0);
286 }
287 }
288
get_Event_dszB(Event * ev)289 static Int get_Event_dszB ( Event* ev ) {
290 switch (ev->tag) {
291 case Ev_Dr: return ev->Ev.Dr.szB;
292 case Ev_Dw: return ev->Ev.Dw.szB;
293 case Ev_Dm: return ev->Ev.Dm.szB;
294 default: tl_assert(0);
295 }
296 }
297
298
299 /* Up to this many unnotified events are allowed. Number is
300 arbitrary. Larger numbers allow more event merging to occur, but
301 potentially induce more spilling due to extending live ranges of
302 address temporaries. */
303 #define N_EVENTS 16
304
305
306 /* A struct which holds all the running state during instrumentation.
307 Mostly to avoid passing loads of parameters everywhere. */
308 typedef struct {
309 /* The current outstanding-memory-event list. */
310 Event events[N_EVENTS];
311 Int events_used;
312
313 /* The array of InstrInfo's is part of BB struct. */
314 BB* bb;
315
316 /* BB seen before (ie. re-instrumentation) */
317 Bool seen_before;
318
319 /* Number InstrInfo bins 'used' so far. */
320 UInt ii_index;
321
322 // current offset of guest instructions from BB start
323 UInt instr_offset;
324
325 /* The output SB being constructed. */
326 IRSB* sbOut;
327 } ClgState;
328
329
showEvent(Event * ev)330 static void showEvent ( Event* ev )
331 {
332 switch (ev->tag) {
333 case Ev_Ir:
334 VG_(printf)("Ir (InstrInfo %p) at +%u\n",
335 ev->inode, ev->inode->instr_offset);
336 break;
337 case Ev_Dr:
338 VG_(printf)("Dr (InstrInfo %p) at +%u %d EA=",
339 ev->inode, ev->inode->instr_offset, ev->Ev.Dr.szB);
340 ppIRExpr(ev->Ev.Dr.ea);
341 VG_(printf)("\n");
342 break;
343 case Ev_Dw:
344 VG_(printf)("Dw (InstrInfo %p) at +%u %d EA=",
345 ev->inode, ev->inode->instr_offset, ev->Ev.Dw.szB);
346 ppIRExpr(ev->Ev.Dw.ea);
347 VG_(printf)("\n");
348 break;
349 case Ev_Dm:
350 VG_(printf)("Dm (InstrInfo %p) at +%u %d EA=",
351 ev->inode, ev->inode->instr_offset, ev->Ev.Dm.szB);
352 ppIRExpr(ev->Ev.Dm.ea);
353 VG_(printf)("\n");
354 break;
355 case Ev_Bc:
356 VG_(printf)("Bc %p GA=", ev->inode);
357 ppIRExpr(ev->Ev.Bc.taken);
358 VG_(printf)("\n");
359 break;
360 case Ev_Bi:
361 VG_(printf)("Bi %p DST=", ev->inode);
362 ppIRExpr(ev->Ev.Bi.dst);
363 VG_(printf)("\n");
364 break;
365 case Ev_G:
366 VG_(printf)("G %p\n", ev->inode);
367 break;
368 default:
369 tl_assert(0);
370 break;
371 }
372 }
373
374 /* Generate code for all outstanding memory events, and mark the queue
375 empty. Code is generated into cgs->sbOut, and this activity
376 'consumes' slots in cgs->bb. */
377
flushEvents(ClgState * clgs)378 static void flushEvents ( ClgState* clgs )
379 {
380 Int i, regparms, inew;
381 const HChar* helperName;
382 void* helperAddr;
383 IRExpr** argv;
384 IRExpr* i_node_expr;
385 IRDirty* di;
386 Event* ev;
387 Event* ev2;
388 Event* ev3;
389
390 if (!clgs->seen_before) {
391 // extend event sets as needed
392 // available sets: D0 Dr
393 for(i=0; i<clgs->events_used; i++) {
394 ev = &clgs->events[i];
395 switch(ev->tag) {
396 case Ev_Ir:
397 // Ir event always is first for a guest instruction
398 CLG_ASSERT(ev->inode->eventset == 0);
399 ev->inode->eventset = CLG_(sets).base;
400 break;
401 case Ev_Dr:
402 // extend event set by Dr counters
403 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
404 EG_DR);
405 break;
406 case Ev_Dw:
407 case Ev_Dm:
408 // extend event set by Dw counters
409 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
410 EG_DW);
411 break;
412 case Ev_Bc:
413 // extend event set by Bc counters
414 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
415 EG_BC);
416 break;
417 case Ev_Bi:
418 // extend event set by Bi counters
419 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
420 EG_BI);
421 break;
422 case Ev_G:
423 // extend event set by Bus counter
424 ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
425 EG_BUS);
426 break;
427 default:
428 tl_assert(0);
429 }
430 }
431 }
432
433 for(i = 0; i < clgs->events_used; i = inew) {
434
435 helperName = NULL;
436 helperAddr = NULL;
437 argv = NULL;
438 regparms = 0;
439
440 /* generate IR to notify event i and possibly the ones
441 immediately following it. */
442 tl_assert(i >= 0 && i < clgs->events_used);
443
444 ev = &clgs->events[i];
445 ev2 = ( i < clgs->events_used-1 ? &clgs->events[i+1] : NULL );
446 ev3 = ( i < clgs->events_used-2 ? &clgs->events[i+2] : NULL );
447
448 CLG_DEBUGIF(5) {
449 VG_(printf)(" flush ");
450 showEvent( ev );
451 }
452
453 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
454
455 /* Decide on helper fn to call and args to pass it, and advance
456 i appropriately.
457 Dm events have same effect as Dw events */
458 switch (ev->tag) {
459 case Ev_Ir:
460 /* Merge an Ir with a following Dr. */
461 if (ev2 && ev2->tag == Ev_Dr) {
462 /* Why is this true? It's because we're merging an Ir
463 with a following Dr. The Ir derives from the
464 instruction's IMark and the Dr from data
465 references which follow it. In short it holds
466 because each insn starts with an IMark, hence an
467 Ev_Ir, and so these Dr must pertain to the
468 immediately preceding Ir. Same applies to analogous
469 assertions in the subsequent cases. */
470 tl_assert(ev2->inode == ev->inode);
471 helperName = CLG_(cachesim).log_1I1Dr_name;
472 helperAddr = CLG_(cachesim).log_1I1Dr;
473 argv = mkIRExprVec_3( i_node_expr,
474 get_Event_dea(ev2),
475 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
476 regparms = 3;
477 inew = i+2;
478 }
479 /* Merge an Ir with a following Dw/Dm. */
480 else
481 if (ev2 && (ev2->tag == Ev_Dw || ev2->tag == Ev_Dm)) {
482 tl_assert(ev2->inode == ev->inode);
483 helperName = CLG_(cachesim).log_1I1Dw_name;
484 helperAddr = CLG_(cachesim).log_1I1Dw;
485 argv = mkIRExprVec_3( i_node_expr,
486 get_Event_dea(ev2),
487 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
488 regparms = 3;
489 inew = i+2;
490 }
491 /* Merge an Ir with two following Irs. */
492 else
493 if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) {
494 helperName = CLG_(cachesim).log_3I0D_name;
495 helperAddr = CLG_(cachesim).log_3I0D;
496 argv = mkIRExprVec_3( i_node_expr,
497 mkIRExpr_HWord( (HWord)ev2->inode ),
498 mkIRExpr_HWord( (HWord)ev3->inode ) );
499 regparms = 3;
500 inew = i+3;
501 }
502 /* Merge an Ir with one following Ir. */
503 else
504 if (ev2 && ev2->tag == Ev_Ir) {
505 helperName = CLG_(cachesim).log_2I0D_name;
506 helperAddr = CLG_(cachesim).log_2I0D;
507 argv = mkIRExprVec_2( i_node_expr,
508 mkIRExpr_HWord( (HWord)ev2->inode ) );
509 regparms = 2;
510 inew = i+2;
511 }
512 /* No merging possible; emit as-is. */
513 else {
514 helperName = CLG_(cachesim).log_1I0D_name;
515 helperAddr = CLG_(cachesim).log_1I0D;
516 argv = mkIRExprVec_1( i_node_expr );
517 regparms = 1;
518 inew = i+1;
519 }
520 break;
521 case Ev_Dr:
522 /* Data read or modify */
523 helperName = CLG_(cachesim).log_0I1Dr_name;
524 helperAddr = CLG_(cachesim).log_0I1Dr;
525 argv = mkIRExprVec_3( i_node_expr,
526 get_Event_dea(ev),
527 mkIRExpr_HWord( get_Event_dszB(ev) ) );
528 regparms = 3;
529 inew = i+1;
530 break;
531 case Ev_Dw:
532 case Ev_Dm:
533 /* Data write */
534 helperName = CLG_(cachesim).log_0I1Dw_name;
535 helperAddr = CLG_(cachesim).log_0I1Dw;
536 argv = mkIRExprVec_3( i_node_expr,
537 get_Event_dea(ev),
538 mkIRExpr_HWord( get_Event_dszB(ev) ) );
539 regparms = 3;
540 inew = i+1;
541 break;
542 case Ev_Bc:
543 /* Conditional branch */
544 helperName = "log_cond_branch";
545 helperAddr = &log_cond_branch;
546 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
547 regparms = 2;
548 inew = i+1;
549 break;
550 case Ev_Bi:
551 /* Branch to an unknown destination */
552 helperName = "log_ind_branch";
553 helperAddr = &log_ind_branch;
554 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
555 regparms = 2;
556 inew = i+1;
557 break;
558 case Ev_G:
559 /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */
560 helperName = "log_global_event";
561 helperAddr = &log_global_event;
562 argv = mkIRExprVec_1( i_node_expr );
563 regparms = 1;
564 inew = i+1;
565 break;
566 default:
567 tl_assert(0);
568 }
569
570 CLG_DEBUGIF(5) {
571 if (inew > i+1) {
572 VG_(printf)(" merge ");
573 showEvent( ev2 );
574 }
575 if (inew > i+2) {
576 VG_(printf)(" merge ");
577 showEvent( ev3 );
578 }
579 if (helperAddr)
580 VG_(printf)(" call %s (%p)\n",
581 helperName, helperAddr);
582 }
583
584 /* helper could be unset depending on the simulator used */
585 if (helperAddr == 0) continue;
586
587 /* Add the helper. */
588 tl_assert(helperName);
589 tl_assert(helperAddr);
590 tl_assert(argv);
591 di = unsafeIRDirty_0_N( regparms,
592 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
593 argv );
594 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
595 }
596
597 clgs->events_used = 0;
598 }
599
addEvent_Ir(ClgState * clgs,InstrInfo * inode)600 static void addEvent_Ir ( ClgState* clgs, InstrInfo* inode )
601 {
602 Event* evt;
603 tl_assert(clgs->seen_before || (inode->eventset == 0));
604 if (!CLG_(clo).simulate_cache) return;
605
606 if (clgs->events_used == N_EVENTS)
607 flushEvents(clgs);
608 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
609 evt = &clgs->events[clgs->events_used];
610 init_Event(evt);
611 evt->tag = Ev_Ir;
612 evt->inode = inode;
613 clgs->events_used++;
614 }
615
616 static
addEvent_Dr(ClgState * clgs,InstrInfo * inode,Int datasize,IRAtom * ea)617 void addEvent_Dr ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
618 {
619 Event* evt;
620 tl_assert(isIRAtom(ea));
621 tl_assert(datasize >= 1);
622 if (!CLG_(clo).simulate_cache) return;
623 tl_assert(datasize <= CLG_(min_line_size));
624
625 if (clgs->events_used == N_EVENTS)
626 flushEvents(clgs);
627 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
628 evt = &clgs->events[clgs->events_used];
629 init_Event(evt);
630 evt->tag = Ev_Dr;
631 evt->inode = inode;
632 evt->Ev.Dr.szB = datasize;
633 evt->Ev.Dr.ea = ea;
634 clgs->events_used++;
635 }
636
637 static
addEvent_Dw(ClgState * clgs,InstrInfo * inode,Int datasize,IRAtom * ea)638 void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
639 {
640 Event* evt;
641 tl_assert(isIRAtom(ea));
642 tl_assert(datasize >= 1);
643 if (!CLG_(clo).simulate_cache) return;
644 tl_assert(datasize <= CLG_(min_line_size));
645
646 /* Is it possible to merge this write with the preceding read? */
647 if (clgs->events_used > 0) {
648 Event* lastEvt = &clgs->events[clgs->events_used-1];
649 if ( lastEvt->tag == Ev_Dr
650 && lastEvt->Ev.Dr.szB == datasize
651 && lastEvt->inode == inode
652 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
653 {
654 lastEvt->tag = Ev_Dm;
655 return;
656 }
657 }
658
659 /* No. Add as normal. */
660 if (clgs->events_used == N_EVENTS)
661 flushEvents(clgs);
662 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
663 evt = &clgs->events[clgs->events_used];
664 init_Event(evt);
665 evt->tag = Ev_Dw;
666 evt->inode = inode;
667 evt->Ev.Dw.szB = datasize;
668 evt->Ev.Dw.ea = ea;
669 clgs->events_used++;
670 }
671
672 static
addEvent_D_guarded(ClgState * clgs,InstrInfo * inode,Int datasize,IRAtom * ea,IRAtom * guard,Bool isWrite)673 void addEvent_D_guarded ( ClgState* clgs, InstrInfo* inode,
674 Int datasize, IRAtom* ea, IRAtom* guard,
675 Bool isWrite )
676 {
677 tl_assert(isIRAtom(ea));
678 tl_assert(guard);
679 tl_assert(isIRAtom(guard));
680 tl_assert(datasize >= 1);
681 if (!CLG_(clo).simulate_cache) return;
682 tl_assert(datasize <= CLG_(min_line_size));
683
684 /* Adding guarded memory actions and merging them with the existing
685 queue is too complex. Simply flush the queue and add this
686 action immediately. Since guarded loads and stores are pretty
687 rare, this is not thought likely to cause any noticeable
688 performance loss as a result of the loss of event-merging
689 opportunities. */
690 tl_assert(clgs->events_used >= 0);
691 flushEvents(clgs);
692 tl_assert(clgs->events_used == 0);
693 /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
694 IRExpr* i_node_expr;
695 const HChar* helperName;
696 void* helperAddr;
697 IRExpr** argv;
698 Int regparms;
699 IRDirty* di;
700 i_node_expr = mkIRExpr_HWord( (HWord)inode );
701 helperName = isWrite ? CLG_(cachesim).log_0I1Dw_name
702 : CLG_(cachesim).log_0I1Dr_name;
703 helperAddr = isWrite ? CLG_(cachesim).log_0I1Dw
704 : CLG_(cachesim).log_0I1Dr;
705 argv = mkIRExprVec_3( i_node_expr,
706 ea, mkIRExpr_HWord( datasize ) );
707 regparms = 3;
708 di = unsafeIRDirty_0_N(
709 regparms,
710 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
711 argv );
712 di->guard = guard;
713 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
714 }
715
716 static
addEvent_Bc(ClgState * clgs,InstrInfo * inode,IRAtom * guard)717 void addEvent_Bc ( ClgState* clgs, InstrInfo* inode, IRAtom* guard )
718 {
719 Event* evt;
720 tl_assert(isIRAtom(guard));
721 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, guard)
722 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
723 if (!CLG_(clo).simulate_branch) return;
724
725 if (clgs->events_used == N_EVENTS)
726 flushEvents(clgs);
727 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
728 evt = &clgs->events[clgs->events_used];
729 init_Event(evt);
730 evt->tag = Ev_Bc;
731 evt->inode = inode;
732 evt->Ev.Bc.taken = guard;
733 clgs->events_used++;
734 }
735
736 static
addEvent_Bi(ClgState * clgs,InstrInfo * inode,IRAtom * whereTo)737 void addEvent_Bi ( ClgState* clgs, InstrInfo* inode, IRAtom* whereTo )
738 {
739 Event* evt;
740 tl_assert(isIRAtom(whereTo));
741 tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, whereTo)
742 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
743 if (!CLG_(clo).simulate_branch) return;
744
745 if (clgs->events_used == N_EVENTS)
746 flushEvents(clgs);
747 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
748 evt = &clgs->events[clgs->events_used];
749 init_Event(evt);
750 evt->tag = Ev_Bi;
751 evt->inode = inode;
752 evt->Ev.Bi.dst = whereTo;
753 clgs->events_used++;
754 }
755
756 static
addEvent_G(ClgState * clgs,InstrInfo * inode)757 void addEvent_G ( ClgState* clgs, InstrInfo* inode )
758 {
759 Event* evt;
760 if (!CLG_(clo).collect_bus) return;
761
762 if (clgs->events_used == N_EVENTS)
763 flushEvents(clgs);
764 tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
765 evt = &clgs->events[clgs->events_used];
766 init_Event(evt);
767 evt->tag = Ev_G;
768 evt->inode = inode;
769 clgs->events_used++;
770 }
771
772 /* Initialise or check (if already seen before) an InstrInfo for next insn.
773 We only can set instr_offset/instr_size here. The required event set and
774 resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
775 instructions. The event set is extended as required on flush of the event
776 queue (when Dm events were determined), cost offsets are determined at
777 end of BB instrumentation. */
778 static
next_InstrInfo(ClgState * clgs,UInt instr_size)779 InstrInfo* next_InstrInfo ( ClgState* clgs, UInt instr_size )
780 {
781 InstrInfo* ii;
782 tl_assert(clgs->ii_index >= 0);
783 tl_assert(clgs->ii_index < clgs->bb->instr_count);
784 ii = &clgs->bb->instr[ clgs->ii_index ];
785
786 if (clgs->seen_before) {
787 CLG_ASSERT(ii->instr_offset == clgs->instr_offset);
788 CLG_ASSERT(ii->instr_size == instr_size);
789 }
790 else {
791 ii->instr_offset = clgs->instr_offset;
792 ii->instr_size = instr_size;
793 ii->cost_offset = 0;
794 ii->eventset = 0;
795 }
796
797 clgs->ii_index++;
798 clgs->instr_offset += instr_size;
799 CLG_(stat).distinct_instrs++;
800
801 return ii;
802 }
803
804 // return total number of cost values needed for this BB
805 static
update_cost_offsets(ClgState * clgs)806 UInt update_cost_offsets( ClgState* clgs )
807 {
808 Int i;
809 InstrInfo* ii;
810 UInt cost_offset = 0;
811
812 CLG_ASSERT(clgs->bb->instr_count == clgs->ii_index);
813 for(i=0; i<clgs->ii_index; i++) {
814 ii = &clgs->bb->instr[i];
815 if (clgs->seen_before) {
816 CLG_ASSERT(ii->cost_offset == cost_offset);
817 } else
818 ii->cost_offset = cost_offset;
819 cost_offset += ii->eventset ? ii->eventset->size : 0;
820 }
821
822 return cost_offset;
823 }
824
825 /*------------------------------------------------------------*/
826 /*--- Instrumentation ---*/
827 /*------------------------------------------------------------*/
828
829 #if defined(VG_BIGENDIAN)
830 # define CLGEndness Iend_BE
831 #elif defined(VG_LITTLEENDIAN)
832 # define CLGEndness Iend_LE
833 #else
834 # error "Unknown endianness"
835 #endif
836
837 static
IRConst2Addr(IRConst * con)838 Addr IRConst2Addr(IRConst* con)
839 {
840 Addr addr;
841
842 if (sizeof(Addr) == 4) {
843 CLG_ASSERT( con->tag == Ico_U32 );
844 addr = con->Ico.U32;
845 }
846 else if (sizeof(Addr) == 8) {
847 CLG_ASSERT( con->tag == Ico_U64 );
848 addr = con->Ico.U64;
849 }
850 else
851 VG_(tool_panic)("Callgrind: invalid Addr type");
852
853 return addr;
854 }
855
856 /* First pass over a BB to instrument, counting instructions and jumps
857 * This is needed for the size of the BB struct to allocate
858 *
859 * Called from CLG_(get_bb)
860 */
CLG_(collectBlockInfo)861 void CLG_(collectBlockInfo)(IRSB* sbIn,
862 /*INOUT*/ UInt* instrs,
863 /*INOUT*/ UInt* cjmps,
864 /*INOUT*/ Bool* cjmp_inverted)
865 {
866 Int i;
867 IRStmt* st;
868 Addr instrAddr =0, jumpDst;
869 UInt instrLen = 0;
870 Bool toNextInstr = False;
871
872 // Ist_Exit has to be ignored in preamble code, before first IMark:
873 // preamble code is added by VEX for self modifying code, and has
874 // nothing to do with client code
875 Bool inPreamble = True;
876
877 if (!sbIn) return;
878
879 for (i = 0; i < sbIn->stmts_used; i++) {
880 st = sbIn->stmts[i];
881 if (Ist_IMark == st->tag) {
882 inPreamble = False;
883
884 instrAddr = st->Ist.IMark.addr;
885 instrLen = st->Ist.IMark.len;
886
887 (*instrs)++;
888 toNextInstr = False;
889 }
890 if (inPreamble) continue;
891 if (Ist_Exit == st->tag) {
892 jumpDst = IRConst2Addr(st->Ist.Exit.dst);
893 toNextInstr = (jumpDst == instrAddr + instrLen);
894
895 (*cjmps)++;
896 }
897 }
898
899 /* if the last instructions of BB conditionally jumps to next instruction
900 * (= first instruction of next BB in memory), this is a inverted by VEX.
901 */
902 *cjmp_inverted = toNextInstr;
903 }
904
905 static
addConstMemStoreStmt(IRSB * bbOut,UWord addr,UInt val,IRType hWordTy)906 void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy)
907 {
908 addStmtToIRSB( bbOut,
909 IRStmt_Store(CLGEndness,
910 IRExpr_Const(hWordTy == Ity_I32 ?
911 IRConst_U32( addr ) :
912 IRConst_U64( addr )),
913 IRExpr_Const(IRConst_U32(val)) ));
914 }
915
916
917 /* add helper call to setup_bbcc, with pointer to BB struct as argument
918 *
919 * precondition for setup_bbcc:
920 * - jmps_passed has number of cond.jumps passed in last executed BB
921 * - current_bbcc has a pointer to the BBCC of the last executed BB
922 * Thus, if bbcc_jmpkind is != -1 (JmpNone),
923 * current_bbcc->bb->jmp_addr
924 * gives the address of the jump source.
925 *
926 * the setup does 2 things:
927 * - trace call:
928 * * Unwind own call stack, i.e sync our ESP with real ESP
929 * This is for ESP manipulation (longjmps, C++ exec handling) and RET
930 * * For CALLs or JMPs crossing objects, record call arg +
931 * push are on own call stack
932 *
933 * - prepare for cache log functions:
934 * set current_bbcc to BBCC that gets the costs for this BB execution
935 * attached
936 */
937 static
addBBSetupCall(ClgState * clgs)938 void addBBSetupCall(ClgState* clgs)
939 {
940 IRDirty* di;
941 IRExpr *arg1, **argv;
942
943 arg1 = mkIRExpr_HWord( (HWord)clgs->bb );
944 argv = mkIRExprVec_1(arg1);
945 di = unsafeIRDirty_0_N( 1, "setup_bbcc",
946 VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ),
947 argv);
948 addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
949 }
950
951
952 static
CLG_(instrument)953 IRSB* CLG_(instrument)( VgCallbackClosure* closure,
954 IRSB* sbIn,
955 const VexGuestLayout* layout,
956 const VexGuestExtents* vge,
957 const VexArchInfo* archinfo_host,
958 IRType gWordTy, IRType hWordTy )
959 {
960 Int i;
961 IRStmt* st;
962 Addr origAddr;
963 InstrInfo* curr_inode = NULL;
964 ClgState clgs;
965 UInt cJumps = 0;
966 IRTypeEnv* tyenv = sbIn->tyenv;
967
968 if (gWordTy != hWordTy) {
969 /* We don't currently support this case. */
970 VG_(tool_panic)("host/guest word size mismatch");
971 }
972
973 // No instrumentation if it is switched off
974 if (! CLG_(instrument_state)) {
975 CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n",
976 (Addr)closure->readdr);
977 return sbIn;
978 }
979
980 CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr)closure->readdr);
981
982 /* Set up SB for instrumented IR */
983 clgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
984
985 // Copy verbatim any IR preamble preceding the first IMark
986 i = 0;
987 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
988 addStmtToIRSB( clgs.sbOut, sbIn->stmts[i] );
989 i++;
990 }
991
992 // Get the first statement, and origAddr from it
993 CLG_ASSERT(sbIn->stmts_used >0);
994 CLG_ASSERT(i < sbIn->stmts_used);
995 st = sbIn->stmts[i];
996 CLG_ASSERT(Ist_IMark == st->tag);
997
998 origAddr = st->Ist.IMark.addr + st->Ist.IMark.delta;
999 CLG_ASSERT(origAddr == st->Ist.IMark.addr
1000 + st->Ist.IMark.delta); // XXX: check no overflow
1001
1002 /* Get BB struct (creating if necessary).
1003 * JS: The hash table is keyed with orig_addr_noredir -- important!
1004 * JW: Why? If it is because of different chasing of the redirection,
1005 * this is not needed, as chasing is switched off in callgrind
1006 */
1007 clgs.bb = CLG_(get_bb)(origAddr, sbIn, &(clgs.seen_before));
1008
1009 addBBSetupCall(&clgs);
1010
1011 // Set up running state
1012 clgs.events_used = 0;
1013 clgs.ii_index = 0;
1014 clgs.instr_offset = 0;
1015
1016 for (/*use current i*/; i < sbIn->stmts_used; i++) {
1017
1018 st = sbIn->stmts[i];
1019 CLG_ASSERT(isFlatIRStmt(st));
1020
1021 switch (st->tag) {
1022 case Ist_NoOp:
1023 case Ist_AbiHint:
1024 case Ist_Put:
1025 case Ist_PutI:
1026 case Ist_MBE:
1027 break;
1028
1029 case Ist_IMark: {
1030 Addr cia = st->Ist.IMark.addr + st->Ist.IMark.delta;
1031 UInt isize = st->Ist.IMark.len;
1032 CLG_ASSERT(clgs.instr_offset == cia - origAddr);
1033 // If Vex fails to decode an instruction, the size will be zero.
1034 // Pretend otherwise.
1035 if (isize == 0) isize = VG_MIN_INSTR_SZB;
1036
1037 // Sanity-check size.
1038 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
1039 || VG_CLREQ_SZB == isize );
1040
1041 // Init the inode, record it as the current one.
1042 // Subsequent Dr/Dw/Dm events from the same instruction will
1043 // also use it.
1044 curr_inode = next_InstrInfo (&clgs, isize);
1045
1046 addEvent_Ir( &clgs, curr_inode );
1047 break;
1048 }
1049
1050 case Ist_WrTmp: {
1051 IRExpr* data = st->Ist.WrTmp.data;
1052 if (data->tag == Iex_Load) {
1053 IRExpr* aexpr = data->Iex.Load.addr;
1054 // Note also, endianness info is ignored. I guess
1055 // that's not interesting.
1056 addEvent_Dr( &clgs, curr_inode,
1057 sizeofIRType(data->Iex.Load.ty), aexpr );
1058 }
1059 break;
1060 }
1061
1062 case Ist_Store: {
1063 IRExpr* data = st->Ist.Store.data;
1064 IRExpr* aexpr = st->Ist.Store.addr;
1065 addEvent_Dw( &clgs, curr_inode,
1066 sizeofIRType(typeOfIRExpr(sbIn->tyenv, data)), aexpr );
1067 break;
1068 }
1069
1070 case Ist_StoreG: {
1071 IRStoreG* sg = st->Ist.StoreG.details;
1072 IRExpr* data = sg->data;
1073 IRExpr* addr = sg->addr;
1074 IRType type = typeOfIRExpr(tyenv, data);
1075 tl_assert(type != Ity_INVALID);
1076 addEvent_D_guarded( &clgs, curr_inode,
1077 sizeofIRType(type), addr, sg->guard,
1078 True/*isWrite*/ );
1079 break;
1080 }
1081
1082 case Ist_LoadG: {
1083 IRLoadG* lg = st->Ist.LoadG.details;
1084 IRType type = Ity_INVALID; /* loaded type */
1085 IRType typeWide = Ity_INVALID; /* after implicit widening */
1086 IRExpr* addr = lg->addr;
1087 typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
1088 tl_assert(type != Ity_INVALID);
1089 addEvent_D_guarded( &clgs, curr_inode,
1090 sizeofIRType(type), addr, lg->guard,
1091 False/*!isWrite*/ );
1092 break;
1093 }
1094
1095 case Ist_Dirty: {
1096 Int dataSize;
1097 IRDirty* d = st->Ist.Dirty.details;
1098 if (d->mFx != Ifx_None) {
1099 /* This dirty helper accesses memory. Collect the details. */
1100 tl_assert(d->mAddr != NULL);
1101 tl_assert(d->mSize != 0);
1102 dataSize = d->mSize;
1103 // Large (eg. 28B, 108B, 512B on x86) data-sized
1104 // instructions will be done inaccurately, but they're
1105 // very rare and this avoids errors from hitting more
1106 // than two cache lines in the simulation.
1107 if (CLG_(clo).simulate_cache && dataSize > CLG_(min_line_size))
1108 dataSize = CLG_(min_line_size);
1109 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1110 addEvent_Dr( &clgs, curr_inode, dataSize, d->mAddr );
1111 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1112 addEvent_Dw( &clgs, curr_inode, dataSize, d->mAddr );
1113 } else {
1114 tl_assert(d->mAddr == NULL);
1115 tl_assert(d->mSize == 0);
1116 }
1117 break;
1118 }
1119
1120 case Ist_CAS: {
1121 /* We treat it as a read and a write of the location. I
1122 think that is the same behaviour as it was before IRCAS
1123 was introduced, since prior to that point, the Vex
1124 front ends would translate a lock-prefixed instruction
1125 into a (normal) read followed by a (normal) write. */
1126 Int dataSize;
1127 IRCAS* cas = st->Ist.CAS.details;
1128 CLG_ASSERT(cas->addr && isIRAtom(cas->addr));
1129 CLG_ASSERT(cas->dataLo);
1130 dataSize = sizeofIRType(typeOfIRExpr(sbIn->tyenv, cas->dataLo));
1131 if (cas->dataHi != NULL)
1132 dataSize *= 2; /* since this is a doubleword-cas */
1133 addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr );
1134 addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr );
1135 addEvent_G( &clgs, curr_inode );
1136 break;
1137 }
1138
1139 case Ist_LLSC: {
1140 IRType dataTy;
1141 if (st->Ist.LLSC.storedata == NULL) {
1142 /* LL */
1143 dataTy = typeOfIRTemp(sbIn->tyenv, st->Ist.LLSC.result);
1144 addEvent_Dr( &clgs, curr_inode,
1145 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1146 /* flush events before LL, should help SC to succeed */
1147 flushEvents( &clgs );
1148 } else {
1149 /* SC */
1150 dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata);
1151 addEvent_Dw( &clgs, curr_inode,
1152 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1153 /* I don't know whether the global-bus-lock cost should
1154 be attributed to the LL or the SC, but it doesn't
1155 really matter since they always have to be used in
1156 pairs anyway. Hence put it (quite arbitrarily) on
1157 the SC. */
1158 addEvent_G( &clgs, curr_inode );
1159 }
1160 break;
1161 }
1162
1163 case Ist_Exit: {
1164 Bool guest_exit, inverted;
1165
1166 /* VEX code generation sometimes inverts conditional branches.
1167 * As Callgrind counts (conditional) jumps, it has to correct
1168 * inversions. The heuristic is the following:
1169 * (1) Callgrind switches off SB chasing and unrolling, and
1170 * therefore it assumes that a candidate for inversion only is
1171 * the last conditional branch in an SB.
1172 * (2) inversion is assumed if the branch jumps to the address of
1173 * the next guest instruction in memory.
1174 * This heuristic is precalculated in CLG_(collectBlockInfo)().
1175 *
1176 * Branching behavior is also used for branch prediction. Note that
1177 * above heuristic is different from what Cachegrind does.
1178 * Cachegrind uses (2) for all branches.
1179 */
1180 if (cJumps+1 == clgs.bb->cjmp_count)
1181 inverted = clgs.bb->cjmp_inverted;
1182 else
1183 inverted = False;
1184
1185 // call branch predictor only if this is a branch in guest code
1186 guest_exit = (st->Ist.Exit.jk == Ijk_Boring) ||
1187 (st->Ist.Exit.jk == Ijk_Call) ||
1188 (st->Ist.Exit.jk == Ijk_Ret);
1189
1190 if (guest_exit) {
1191 /* Stuff to widen the guard expression to a host word, so
1192 we can pass it to the branch predictor simulation
1193 functions easily. */
1194 IRType tyW = hWordTy;
1195 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
1196 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
1197 IRTemp guard1 = newIRTemp(clgs.sbOut->tyenv, Ity_I1);
1198 IRTemp guardW = newIRTemp(clgs.sbOut->tyenv, tyW);
1199 IRTemp guard = newIRTemp(clgs.sbOut->tyenv, tyW);
1200 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1201 : IRExpr_Const(IRConst_U64(1));
1202
1203 /* Widen the guard expression. */
1204 addStmtToIRSB( clgs.sbOut,
1205 IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1206 addStmtToIRSB( clgs.sbOut,
1207 IRStmt_WrTmp( guardW,
1208 IRExpr_Unop(widen,
1209 IRExpr_RdTmp(guard1))) );
1210 /* If the exit is inverted, invert the sense of the guard. */
1211 addStmtToIRSB(
1212 clgs.sbOut,
1213 IRStmt_WrTmp(
1214 guard,
1215 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1216 : IRExpr_RdTmp(guardW)
1217 ));
1218 /* And post the event. */
1219 addEvent_Bc( &clgs, curr_inode, IRExpr_RdTmp(guard) );
1220 }
1221
1222 /* We may never reach the next statement, so need to flush
1223 all outstanding transactions now. */
1224 flushEvents( &clgs );
1225
1226 CLG_ASSERT(clgs.ii_index>0);
1227 if (!clgs.seen_before) {
1228 ClgJumpKind jk;
1229
1230 if (st->Ist.Exit.jk == Ijk_Call) jk = jk_Call;
1231 else if (st->Ist.Exit.jk == Ijk_Ret) jk = jk_Return;
1232 else {
1233 if (IRConst2Addr(st->Ist.Exit.dst) ==
1234 origAddr + curr_inode->instr_offset + curr_inode->instr_size)
1235 jk = jk_None;
1236 else
1237 jk = jk_Jump;
1238 }
1239
1240 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
1241 clgs.bb->jmp[cJumps].jmpkind = jk;
1242 }
1243
1244 /* Update global variable jmps_passed before the jump
1245 * A correction is needed if VEX inverted the last jump condition
1246 */
1247 UInt val = inverted ? cJumps+1 : cJumps;
1248 addConstMemStoreStmt( clgs.sbOut,
1249 (UWord) &CLG_(current_state).jmps_passed,
1250 val, hWordTy);
1251 cJumps++;
1252
1253 break;
1254 }
1255
1256 default:
1257 tl_assert(0);
1258 break;
1259 }
1260
1261 /* Copy the original statement */
1262 addStmtToIRSB( clgs.sbOut, st );
1263
1264 CLG_DEBUGIF(5) {
1265 VG_(printf)(" pass ");
1266 ppIRStmt(st);
1267 VG_(printf)("\n");
1268 }
1269 }
1270
1271 /* Deal with branches to unknown destinations. Except ignore ones
1272 which are function returns as we assume the return stack
1273 predictor never mispredicts. */
1274 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
1275 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1276 switch (sbIn->next->tag) {
1277 case Iex_Const:
1278 break; /* boring - branch to known address */
1279 case Iex_RdTmp:
1280 /* looks like an indirect branch (branch to unknown) */
1281 addEvent_Bi( &clgs, curr_inode, sbIn->next );
1282 break;
1283 default:
1284 /* shouldn't happen - if the incoming IR is properly
1285 flattened, should only have tmp and const cases to
1286 consider. */
1287 tl_assert(0);
1288 }
1289 }
1290
1291 /* At the end of the bb. Flush outstandings. */
1292 flushEvents( &clgs );
1293
1294 /* Update global variable jmps_passed at end of SB.
1295 * As CLG_(current_state).jmps_passed is reset to 0 in setup_bbcc,
1296 * this can be omitted if there is no conditional jump in this SB.
1297 * A correction is needed if VEX inverted the last jump condition
1298 */
1299 if (cJumps>0) {
1300 UInt jmps_passed = cJumps;
1301 if (clgs.bb->cjmp_inverted) jmps_passed--;
1302 addConstMemStoreStmt( clgs.sbOut,
1303 (UWord) &CLG_(current_state).jmps_passed,
1304 jmps_passed, hWordTy);
1305 }
1306 CLG_ASSERT(clgs.bb->cjmp_count == cJumps);
1307 CLG_ASSERT(clgs.bb->instr_count == clgs.ii_index);
1308
1309 /* Info for final exit from BB */
1310 {
1311 ClgJumpKind jk;
1312
1313 if (sbIn->jumpkind == Ijk_Call) jk = jk_Call;
1314 else if (sbIn->jumpkind == Ijk_Ret) jk = jk_Return;
1315 else {
1316 jk = jk_Jump;
1317 if ((sbIn->next->tag == Iex_Const) &&
1318 (IRConst2Addr(sbIn->next->Iex.Const.con) ==
1319 origAddr + clgs.instr_offset))
1320 jk = jk_None;
1321 }
1322 clgs.bb->jmp[cJumps].jmpkind = jk;
1323 /* Instruction index of the call/ret at BB end
1324 * (it is wrong for fall-through, but does not matter) */
1325 clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
1326 }
1327
1328 /* swap information of last exit with final exit if inverted */
1329 if (clgs.bb->cjmp_inverted) {
1330 ClgJumpKind jk;
1331 UInt instr;
1332
1333 jk = clgs.bb->jmp[cJumps].jmpkind;
1334 clgs.bb->jmp[cJumps].jmpkind = clgs.bb->jmp[cJumps-1].jmpkind;
1335 clgs.bb->jmp[cJumps-1].jmpkind = jk;
1336 instr = clgs.bb->jmp[cJumps].instr;
1337 clgs.bb->jmp[cJumps].instr = clgs.bb->jmp[cJumps-1].instr;
1338 clgs.bb->jmp[cJumps-1].instr = instr;
1339 }
1340
1341 if (clgs.seen_before) {
1342 CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs));
1343 CLG_ASSERT(clgs.bb->instr_len == clgs.instr_offset);
1344 }
1345 else {
1346 clgs.bb->cost_count = update_cost_offsets(&clgs);
1347 clgs.bb->instr_len = clgs.instr_offset;
1348 }
1349
1350 CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n",
1351 origAddr, clgs.bb->instr_len,
1352 clgs.bb->cjmp_count, clgs.bb->cost_count);
1353 if (cJumps>0) {
1354 CLG_DEBUG(3, " [ ");
1355 for (i=0;i<cJumps;i++)
1356 CLG_DEBUG(3, "%u ", clgs.bb->jmp[i].instr);
1357 CLG_DEBUG(3, "], last inverted: %s \n",
1358 clgs.bb->cjmp_inverted ? "yes":"no");
1359 }
1360
1361 return clgs.sbOut;
1362 }
1363
1364 /*--------------------------------------------------------------------*/
1365 /*--- Discarding BB info ---*/
1366 /*--------------------------------------------------------------------*/
1367
1368 // Called when a translation is removed from the translation cache for
1369 // any reason at all: to free up space, because the guest code was
1370 // unmapped or modified, or for any arbitrary reason.
1371 static
clg_discard_superblock_info(Addr orig_addr,VexGuestExtents vge)1372 void clg_discard_superblock_info ( Addr orig_addr, VexGuestExtents vge )
1373 {
1374 tl_assert(vge.n_used > 0);
1375
1376 if (0)
1377 VG_(printf)( "discard_superblock_info: %p, %p, %llu\n",
1378 (void*)orig_addr,
1379 (void*)vge.base[0], (ULong)vge.len[0]);
1380
1381 // Get BB info, remove from table, free BB info. Simple!
1382 // When created, the BB is keyed by the first instruction address,
1383 // (not orig_addr, but eventually redirected address). Thus, we
1384 // use the first instruction address in vge.
1385 CLG_(delete_bb)(vge.base[0]);
1386 }
1387
1388
1389 /*------------------------------------------------------------*/
1390 /*--- CLG_(fini)() and related function ---*/
1391 /*------------------------------------------------------------*/
1392
1393
1394
zero_thread_cost(thread_info * t)1395 static void zero_thread_cost(thread_info* t)
1396 {
1397 Int i;
1398
1399 for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1400 if (!CLG_(current_call_stack).entry[i].jcc) continue;
1401
1402 /* reset call counters to current for active calls */
1403 CLG_(copy_cost)( CLG_(sets).full,
1404 CLG_(current_call_stack).entry[i].enter_cost,
1405 CLG_(current_state).cost );
1406 CLG_(current_call_stack).entry[i].jcc->call_counter = 0;
1407 }
1408
1409 CLG_(forall_bbccs)(CLG_(zero_bbcc));
1410
1411 /* set counter for last dump */
1412 CLG_(copy_cost)( CLG_(sets).full,
1413 t->lastdump_cost, CLG_(current_state).cost );
1414 }
1415
CLG_(zero_all_cost)1416 void CLG_(zero_all_cost)(Bool only_current_thread)
1417 {
1418 if (VG_(clo_verbosity) > 1)
1419 VG_(message)(Vg_DebugMsg, " Zeroing costs...\n");
1420
1421 if (only_current_thread)
1422 zero_thread_cost(CLG_(get_current_thread)());
1423 else
1424 CLG_(forall_threads)(zero_thread_cost);
1425
1426 if (VG_(clo_verbosity) > 1)
1427 VG_(message)(Vg_DebugMsg, " ...done\n");
1428 }
1429
1430 static
unwind_thread(thread_info * t)1431 void unwind_thread(thread_info* t)
1432 {
1433 /* unwind signal handlers */
1434 while(CLG_(current_state).sig !=0)
1435 CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig);
1436
1437 /* unwind regular call stack */
1438 while(CLG_(current_call_stack).sp>0)
1439 CLG_(pop_call_stack)();
1440
1441 /* reset context and function stack for context generation */
1442 CLG_(init_exec_state)( &CLG_(current_state) );
1443 CLG_(current_fn_stack).top = CLG_(current_fn_stack).bottom;
1444 }
1445
1446 static
zero_state_cost(thread_info * t)1447 void zero_state_cost(thread_info* t)
1448 {
1449 CLG_(zero_cost)( CLG_(sets).full, CLG_(current_state).cost );
1450 }
1451
CLG_(set_instrument_state)1452 void CLG_(set_instrument_state)(const HChar* reason, Bool state)
1453 {
1454 if (CLG_(instrument_state) == state) {
1455 CLG_DEBUG(2, "%s: instrumentation already %s\n",
1456 reason, state ? "ON" : "OFF");
1457 return;
1458 }
1459 CLG_(instrument_state) = state;
1460 CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n",
1461 reason, state ? "ON" : "OFF");
1462
1463 VG_(discard_translations_safely)( (Addr)0x1000, ~(SizeT)0xfff, "callgrind");
1464
1465 /* reset internal state: call stacks, simulator */
1466 CLG_(forall_threads)(unwind_thread);
1467 CLG_(forall_threads)(zero_state_cost);
1468 (*CLG_(cachesim).clear)();
1469
1470 if (VG_(clo_verbosity) > 1)
1471 VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n",
1472 reason, state ? "ON" : "OFF");
1473 }
1474
1475 /* helper for dump_state_togdb */
dump_state_of_thread_togdb(thread_info * ti)1476 static void dump_state_of_thread_togdb(thread_info* ti)
1477 {
1478 static FullCost sum = 0, tmp = 0;
1479 Int t, i;
1480 BBCC *from, *to;
1481 call_entry* ce;
1482 HChar *mcost;
1483
1484 t = CLG_(current_tid);
1485 CLG_(init_cost_lz)( CLG_(sets).full, &sum );
1486 CLG_(copy_cost_lz)( CLG_(sets).full, &tmp, ti->lastdump_cost );
1487 CLG_(add_diff_cost)( CLG_(sets).full, sum, ti->lastdump_cost,
1488 ti->states.entry[0]->cost);
1489 CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost, tmp );
1490 mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), sum);
1491 VG_(gdb_printf)("events-%d: %s\n", t, mcost);
1492 VG_(free)(mcost);
1493 VG_(gdb_printf)("frames-%d: %d\n", t, CLG_(current_call_stack).sp);
1494
1495 ce = 0;
1496 for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1497 ce = CLG_(get_call_entry)(i);
1498 /* if this frame is skipped, we don't have counters */
1499 if (!ce->jcc) continue;
1500
1501 from = ce->jcc->from;
1502 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, from->cxt->fn[0]->name);
1503 VG_(gdb_printf)("calls-%d-%d: %llu\n",t, i, ce->jcc->call_counter);
1504
1505 /* FIXME: EventSets! */
1506 CLG_(copy_cost)( CLG_(sets).full, sum, ce->jcc->cost );
1507 CLG_(copy_cost)( CLG_(sets).full, tmp, ce->enter_cost );
1508 CLG_(add_diff_cost)( CLG_(sets).full, sum,
1509 ce->enter_cost, CLG_(current_state).cost );
1510 CLG_(copy_cost)( CLG_(sets).full, ce->enter_cost, tmp );
1511
1512 mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), sum);
1513 VG_(gdb_printf)("events-%d-%d: %s\n",t, i, mcost);
1514 VG_(free)(mcost);
1515 }
1516 if (ce && ce->jcc) {
1517 to = ce->jcc->to;
1518 VG_(gdb_printf)("function-%d-%d: %s\n",t, i, to->cxt->fn[0]->name );
1519 }
1520 }
1521
1522 /* Dump current state */
dump_state_togdb(void)1523 static void dump_state_togdb(void)
1524 {
1525 thread_info** th;
1526 int t;
1527 Int orig_tid = CLG_(current_tid);
1528
1529 VG_(gdb_printf)("instrumentation: %s\n",
1530 CLG_(instrument_state) ? "on":"off");
1531 if (!CLG_(instrument_state)) return;
1532
1533 VG_(gdb_printf)("executed-bbs: %llu\n", CLG_(stat).bb_executions);
1534 VG_(gdb_printf)("executed-calls: %llu\n", CLG_(stat).call_counter);
1535 VG_(gdb_printf)("distinct-bbs: %d\n", CLG_(stat).distinct_bbs);
1536 VG_(gdb_printf)("distinct-calls: %d\n", CLG_(stat).distinct_jccs);
1537 VG_(gdb_printf)("distinct-functions: %d\n", CLG_(stat).distinct_fns);
1538 VG_(gdb_printf)("distinct-contexts: %d\n", CLG_(stat).distinct_contexts);
1539
1540 /* "events:" line. Given here because it will be dynamic in the future */
1541 HChar *evmap = CLG_(eventmapping_as_string)(CLG_(dumpmap));
1542 VG_(gdb_printf)("events: %s\n", evmap);
1543 VG_(free)(evmap);
1544 /* "part:" line (number of last part. Is 0 at start */
1545 VG_(gdb_printf)("part: %d\n", CLG_(get_dump_counter)());
1546
1547 /* threads */
1548 th = CLG_(get_threads)();
1549 VG_(gdb_printf)("threads:");
1550 for(t=1;t<VG_N_THREADS;t++) {
1551 if (!th[t]) continue;
1552 VG_(gdb_printf)(" %d", t);
1553 }
1554 VG_(gdb_printf)("\n");
1555 VG_(gdb_printf)("current-tid: %d\n", orig_tid);
1556 CLG_(forall_threads)(dump_state_of_thread_togdb);
1557 }
1558
1559
print_monitor_help(void)1560 static void print_monitor_help ( void )
1561 {
1562 VG_(gdb_printf) ("\n");
1563 VG_(gdb_printf) ("callgrind monitor commands:\n");
1564 VG_(gdb_printf) (" dump [<dump_hint>]\n");
1565 VG_(gdb_printf) (" dump counters\n");
1566 VG_(gdb_printf) (" zero\n");
1567 VG_(gdb_printf) (" zero counters\n");
1568 VG_(gdb_printf) (" status\n");
1569 VG_(gdb_printf) (" print status\n");
1570 VG_(gdb_printf) (" instrumentation [on|off]\n");
1571 VG_(gdb_printf) (" get/set (if on/off given) instrumentation state\n");
1572 VG_(gdb_printf) ("\n");
1573 }
1574
1575 /* return True if request recognised, False otherwise */
handle_gdb_monitor_command(ThreadId tid,const HChar * req)1576 static Bool handle_gdb_monitor_command (ThreadId tid, const HChar *req)
1577 {
1578 HChar* wcmd;
1579 HChar s[VG_(strlen)(req) + 1]; /* copy for strtok_r */
1580 HChar *ssaveptr;
1581
1582 VG_(strcpy) (s, req);
1583
1584 wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
1585 switch (VG_(keyword_id) ("help dump zero status instrumentation",
1586 wcmd, kwd_report_duplicated_matches)) {
1587 case -2: /* multiple matches */
1588 return True;
1589 case -1: /* not found */
1590 return False;
1591 case 0: /* help */
1592 print_monitor_help();
1593 return True;
1594 case 1: { /* dump */
1595 CLG_(dump_profile)(req, False);
1596 return True;
1597 }
1598 case 2: { /* zero */
1599 CLG_(zero_all_cost)(False);
1600 return True;
1601 }
1602
1603 case 3: { /* status */
1604 HChar* arg = VG_(strtok_r) (0, " ", &ssaveptr);
1605 if (arg && (VG_(strcmp)(arg, "internal") == 0)) {
1606 /* internal interface to callgrind_control */
1607 dump_state_togdb();
1608 return True;
1609 }
1610
1611 if (!CLG_(instrument_state)) {
1612 VG_(gdb_printf)("No status available as instrumentation is switched off\n");
1613 } else {
1614 // Status information to be improved ...
1615 thread_info** th = CLG_(get_threads)();
1616 Int t, tcount = 0;
1617 for(t=1;t<VG_N_THREADS;t++)
1618 if (th[t]) tcount++;
1619 VG_(gdb_printf)("%d thread(s) running.\n", tcount);
1620 }
1621 return True;
1622 }
1623
1624 case 4: { /* instrumentation */
1625 HChar* arg = VG_(strtok_r) (0, " ", &ssaveptr);
1626 if (!arg) {
1627 VG_(gdb_printf)("instrumentation: %s\n",
1628 CLG_(instrument_state) ? "on":"off");
1629 }
1630 else
1631 CLG_(set_instrument_state)("Command", VG_(strcmp)(arg,"off")!=0);
1632 return True;
1633 }
1634
1635 default:
1636 tl_assert(0);
1637 return False;
1638 }
1639 }
1640
1641 static
CLG_(handle_client_request)1642 Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
1643 {
1644 if (!VG_IS_TOOL_USERREQ('C','T',args[0])
1645 && VG_USERREQ__GDB_MONITOR_COMMAND != args[0])
1646 return False;
1647
1648 switch(args[0]) {
1649 case VG_USERREQ__DUMP_STATS:
1650 CLG_(dump_profile)("Client Request", True);
1651 *ret = 0; /* meaningless */
1652 break;
1653
1654 case VG_USERREQ__DUMP_STATS_AT:
1655 {
1656 const HChar *arg = (HChar*)args[1];
1657 HChar buf[30 + VG_(strlen)(arg)]; // large enough
1658 VG_(sprintf)(buf,"Client Request: %s", arg);
1659 CLG_(dump_profile)(buf, True);
1660 *ret = 0; /* meaningless */
1661 }
1662 break;
1663
1664 case VG_USERREQ__ZERO_STATS:
1665 CLG_(zero_all_cost)(True);
1666 *ret = 0; /* meaningless */
1667 break;
1668
1669 case VG_USERREQ__TOGGLE_COLLECT:
1670 CLG_(current_state).collect = !CLG_(current_state).collect;
1671 CLG_DEBUG(2, "Client Request: toggled collection state to %s\n",
1672 CLG_(current_state).collect ? "ON" : "OFF");
1673 *ret = 0; /* meaningless */
1674 break;
1675
1676 case VG_USERREQ__START_INSTRUMENTATION:
1677 CLG_(set_instrument_state)("Client Request", True);
1678 *ret = 0; /* meaningless */
1679 break;
1680
1681 case VG_USERREQ__STOP_INSTRUMENTATION:
1682 CLG_(set_instrument_state)("Client Request", False);
1683 *ret = 0; /* meaningless */
1684 break;
1685
1686 case VG_USERREQ__GDB_MONITOR_COMMAND: {
1687 Bool handled = handle_gdb_monitor_command (tid, (HChar*)args[1]);
1688 if (handled)
1689 *ret = 1;
1690 else
1691 *ret = 0;
1692 return handled;
1693 }
1694 default:
1695 return False;
1696 }
1697
1698 return True;
1699 }
1700
1701
1702 /* Syscall Timing */
1703
1704 /* struct timeval syscalltime[VG_N_THREADS]; */
1705 #if CLG_MICROSYSTIME
1706 ULong *syscalltime;
1707 #else
1708 UInt *syscalltime;
1709 #endif
1710
1711 static
CLG_(pre_syscalltime)1712 void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno,
1713 UWord* args, UInt nArgs)
1714 {
1715 if (CLG_(clo).collect_systime) {
1716 #if CLG_MICROSYSTIME
1717 struct vki_timeval tv_now;
1718 VG_(gettimeofday)(&tv_now, NULL);
1719 syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec;
1720 #else
1721 syscalltime[tid] = VG_(read_millisecond_timer)();
1722 #endif
1723 }
1724 }
1725
1726 static
CLG_(post_syscalltime)1727 void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno,
1728 UWord* args, UInt nArgs, SysRes res)
1729 {
1730 if (CLG_(clo).collect_systime &&
1731 CLG_(current_state).bbcc) {
1732 Int o;
1733 #if CLG_MICROSYSTIME
1734 struct vki_timeval tv_now;
1735 ULong diff;
1736
1737 VG_(gettimeofday)(&tv_now, NULL);
1738 diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid];
1739 #else
1740 UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid];
1741 #endif
1742
1743 /* offset o is for "SysCount", o+1 for "SysTime" */
1744 o = fullOffset(EG_SYS);
1745 CLG_ASSERT(o>=0);
1746 CLG_DEBUG(0," Time (Off %d) for Syscall %u: %llu\n", o, syscallno,
1747 (ULong)diff);
1748
1749 CLG_(current_state).cost[o] ++;
1750 CLG_(current_state).cost[o+1] += diff;
1751 if (!CLG_(current_state).bbcc->skipped)
1752 CLG_(init_cost_lz)(CLG_(sets).full,
1753 &(CLG_(current_state).bbcc->skipped));
1754 CLG_(current_state).bbcc->skipped[o] ++;
1755 CLG_(current_state).bbcc->skipped[o+1] += diff;
1756 }
1757 }
1758
ULong_width(ULong n)1759 static UInt ULong_width(ULong n)
1760 {
1761 UInt w = 0;
1762 while (n > 0) {
1763 n = n / 10;
1764 w++;
1765 }
1766 if (w == 0) w = 1;
1767 return w + (w-1)/3; // add space for commas
1768 }
1769
1770 static
branchsim_printstat(int l1,int l2,int l3)1771 void branchsim_printstat(int l1, int l2, int l3)
1772 {
1773 static HChar fmt[128]; // large enough
1774 FullCost total;
1775 ULong Bc_total_b, Bc_total_mp, Bi_total_b, Bi_total_mp;
1776 ULong B_total_b, B_total_mp;
1777
1778 total = CLG_(total_cost);
1779 Bc_total_b = total[ fullOffset(EG_BC) ];
1780 Bc_total_mp = total[ fullOffset(EG_BC)+1 ];
1781 Bi_total_b = total[ fullOffset(EG_BI) ];
1782 Bi_total_mp = total[ fullOffset(EG_BI)+1 ];
1783
1784 /* Make format string, getting width right for numbers */
1785 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1786 l1, l2, l3);
1787
1788 if (0 == Bc_total_b) Bc_total_b = 1;
1789 if (0 == Bi_total_b) Bi_total_b = 1;
1790 B_total_b = Bc_total_b + Bi_total_b;
1791 B_total_mp = Bc_total_mp + Bi_total_mp;
1792
1793 VG_(umsg)("\n");
1794 VG_(umsg)(fmt, "Branches: ",
1795 B_total_b, Bc_total_b, Bi_total_b);
1796
1797 VG_(umsg)(fmt, "Mispredicts: ",
1798 B_total_mp, Bc_total_mp, Bi_total_mp);
1799
1800 VG_(umsg)("Mispred rate: %*.1f%% (%*.1f%% + %*.1f%% )\n",
1801 l1, B_total_mp * 100.0 / B_total_b,
1802 l2, Bc_total_mp * 100.0 / Bc_total_b,
1803 l3, Bi_total_mp * 100.0 / Bi_total_b);
1804 }
1805
1806 static
clg_print_stats(void)1807 void clg_print_stats(void)
1808 {
1809 int BB_lookups =
1810 CLG_(stat).full_debug_BBs +
1811 CLG_(stat).fn_name_debug_BBs +
1812 CLG_(stat).file_line_debug_BBs +
1813 CLG_(stat).no_debug_BBs;
1814
1815 /* Hash table stats */
1816 VG_(message)(Vg_DebugMsg, "Distinct objects: %d\n",
1817 CLG_(stat).distinct_objs);
1818 VG_(message)(Vg_DebugMsg, "Distinct files: %d\n",
1819 CLG_(stat).distinct_files);
1820 VG_(message)(Vg_DebugMsg, "Distinct fns: %d\n",
1821 CLG_(stat).distinct_fns);
1822 VG_(message)(Vg_DebugMsg, "Distinct contexts:%d\n",
1823 CLG_(stat).distinct_contexts);
1824 VG_(message)(Vg_DebugMsg, "Distinct BBs: %d\n",
1825 CLG_(stat).distinct_bbs);
1826 VG_(message)(Vg_DebugMsg, "Cost entries: %u (Chunks %u)\n",
1827 CLG_(costarray_entries), CLG_(costarray_chunks));
1828 VG_(message)(Vg_DebugMsg, "Distinct BBCCs: %d\n",
1829 CLG_(stat).distinct_bbccs);
1830 VG_(message)(Vg_DebugMsg, "Distinct JCCs: %d\n",
1831 CLG_(stat).distinct_jccs);
1832 VG_(message)(Vg_DebugMsg, "Distinct skips: %d\n",
1833 CLG_(stat).distinct_skips);
1834 VG_(message)(Vg_DebugMsg, "BB lookups: %d\n",
1835 BB_lookups);
1836 if (BB_lookups>0) {
1837 VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)\n",
1838 CLG_(stat).full_debug_BBs * 100 / BB_lookups,
1839 CLG_(stat).full_debug_BBs);
1840 VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)\n",
1841 CLG_(stat).file_line_debug_BBs * 100 / BB_lookups,
1842 CLG_(stat).file_line_debug_BBs);
1843 VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)\n",
1844 CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups,
1845 CLG_(stat).fn_name_debug_BBs);
1846 VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)\n",
1847 CLG_(stat).no_debug_BBs * 100 / BB_lookups,
1848 CLG_(stat).no_debug_BBs);
1849 }
1850 VG_(message)(Vg_DebugMsg, "BBCC Clones: %d\n",
1851 CLG_(stat).bbcc_clones);
1852 VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d\n",
1853 CLG_(stat).bb_retranslations);
1854 VG_(message)(Vg_DebugMsg, "Distinct instrs: %d\n",
1855 CLG_(stat).distinct_instrs);
1856
1857 VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d\n",
1858 CLG_(stat).cxt_lru_misses);
1859 VG_(message)(Vg_DebugMsg, "LRU BBCC Misses: %d\n",
1860 CLG_(stat).bbcc_lru_misses);
1861 VG_(message)(Vg_DebugMsg, "LRU JCC Misses: %d\n",
1862 CLG_(stat).jcc_lru_misses);
1863 VG_(message)(Vg_DebugMsg, "BBs Executed: %llu\n",
1864 CLG_(stat).bb_executions);
1865 VG_(message)(Vg_DebugMsg, "Calls: %llu\n",
1866 CLG_(stat).call_counter);
1867 VG_(message)(Vg_DebugMsg, "CondJMP followed: %llu\n",
1868 CLG_(stat).jcnd_counter);
1869 VG_(message)(Vg_DebugMsg, "Boring JMPs: %llu\n",
1870 CLG_(stat).jump_counter);
1871 VG_(message)(Vg_DebugMsg, "Recursive calls: %llu\n",
1872 CLG_(stat).rec_call_counter);
1873 VG_(message)(Vg_DebugMsg, "Returns: %llu\n",
1874 CLG_(stat).ret_counter);
1875 }
1876
1877
1878 static
finish(void)1879 void finish(void)
1880 {
1881 HChar fmt[128]; // large enough
1882 Int l1, l2, l3;
1883 FullCost total;
1884
1885 CLG_DEBUG(0, "finish()\n");
1886
1887 (*CLG_(cachesim).finish)();
1888
1889 /* pop all remaining items from CallStack for correct sum
1890 */
1891 CLG_(forall_threads)(unwind_thread);
1892
1893 CLG_(dump_profile)(0, False);
1894
1895 if (VG_(clo_verbosity) == 0) return;
1896
1897 if (VG_(clo_stats)) {
1898 VG_(message)(Vg_DebugMsg, "\n");
1899 clg_print_stats();
1900 VG_(message)(Vg_DebugMsg, "\n");
1901 }
1902
1903 HChar *evmap = CLG_(eventmapping_as_string)(CLG_(dumpmap));
1904 VG_(message)(Vg_UserMsg, "Events : %s\n", evmap);
1905 VG_(free)(evmap);
1906 HChar *mcost = CLG_(mappingcost_as_string)(CLG_(dumpmap), CLG_(total_cost));
1907 VG_(message)(Vg_UserMsg, "Collected : %s\n", mcost);
1908 VG_(free)(mcost);
1909 VG_(message)(Vg_UserMsg, "\n");
1910
1911 /* determine value widths for statistics */
1912 total = CLG_(total_cost);
1913 l1 = ULong_width( total[fullOffset(EG_IR)] );
1914 l2 = l3 = 0;
1915 if (CLG_(clo).simulate_cache) {
1916 l2 = ULong_width( total[fullOffset(EG_DR)] );
1917 l3 = ULong_width( total[fullOffset(EG_DW)] );
1918 }
1919 if (CLG_(clo).simulate_branch) {
1920 int l2b = ULong_width( total[fullOffset(EG_BC)] );
1921 int l3b = ULong_width( total[fullOffset(EG_BI)] );
1922 if (l2b > l2) l2 = l2b;
1923 if (l3b > l3) l3 = l3b;
1924 }
1925
1926 /* Make format string, getting width right for numbers */
1927 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1928
1929 /* Always print this */
1930 VG_(umsg)(fmt, "I refs: ", total[fullOffset(EG_IR)] );
1931
1932 if (CLG_(clo).simulate_cache)
1933 (*CLG_(cachesim).printstat)(l1, l2, l3);
1934
1935 if (CLG_(clo).simulate_branch)
1936 branchsim_printstat(l1, l2, l3);
1937
1938 }
1939
1940
CLG_(fini)1941 void CLG_(fini)(Int exitcode)
1942 {
1943 finish();
1944 }
1945
1946
1947 /*--------------------------------------------------------------------*/
1948 /*--- Setup ---*/
1949 /*--------------------------------------------------------------------*/
1950
clg_start_client_code_callback(ThreadId tid,ULong blocks_done)1951 static void clg_start_client_code_callback ( ThreadId tid, ULong blocks_done )
1952 {
1953 static ULong last_blocks_done = 0;
1954
1955 if (0)
1956 VG_(printf)("%d R %llu\n", (Int)tid, blocks_done);
1957
1958 /* throttle calls to CLG_(run_thread) by number of BBs executed */
1959 if (blocks_done - last_blocks_done < 5000) return;
1960 last_blocks_done = blocks_done;
1961
1962 CLG_(run_thread)( tid );
1963 }
1964
1965 static
CLG_(post_clo_init)1966 void CLG_(post_clo_init)(void)
1967 {
1968 if (VG_(clo_vex_control).iropt_register_updates_default
1969 != VexRegUpdSpAtMemAccess) {
1970 CLG_DEBUG(1, " Using user specified value for "
1971 "--vex-iropt-register-updates\n");
1972 } else {
1973 CLG_DEBUG(1,
1974 " Using default --vex-iropt-register-updates="
1975 "sp-at-mem-access\n");
1976 }
1977
1978 if (CLG_(clo).collect_systime) {
1979 VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime),
1980 CLG_(post_syscalltime));
1981 syscalltime = CLG_MALLOC("cl.main.pci.1",
1982 VG_N_THREADS * sizeof syscalltime[0]);
1983 for (UInt i = 0; i < VG_N_THREADS; ++i) {
1984 syscalltime[i] = 0;
1985 }
1986 }
1987
1988 if (VG_(clo_px_file_backed) != VexRegUpdSpAtMemAccess) {
1989 CLG_DEBUG(1, " Using user specified value for "
1990 "--px-file-backed\n");
1991 } else {
1992 CLG_DEBUG(1,
1993 " Using default --px-file-backed="
1994 "sp-at-mem-access\n");
1995 }
1996
1997 if (VG_(clo_vex_control).iropt_unroll_thresh != 0) {
1998 VG_(message)(Vg_UserMsg,
1999 "callgrind only works with --vex-iropt-unroll-thresh=0\n"
2000 "=> resetting it back to 0\n");
2001 VG_(clo_vex_control).iropt_unroll_thresh = 0; // cannot be overridden.
2002 }
2003 if (VG_(clo_vex_control).guest_chase_thresh != 0) {
2004 VG_(message)(Vg_UserMsg,
2005 "callgrind only works with --vex-guest-chase-thresh=0\n"
2006 "=> resetting it back to 0\n");
2007 VG_(clo_vex_control).guest_chase_thresh = 0; // cannot be overridden.
2008 }
2009
2010 CLG_DEBUG(1, " dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No");
2011 CLG_DEBUG(1, " call sep. : %d\n", CLG_(clo).separate_callers);
2012 CLG_DEBUG(1, " rec. sep. : %d\n", CLG_(clo).separate_recursions);
2013
2014 if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) {
2015 VG_(message)(Vg_UserMsg, "Using source line as position.\n");
2016 CLG_(clo).dump_line = True;
2017 }
2018
2019 CLG_(init_dumps)();
2020
2021 (*CLG_(cachesim).post_clo_init)();
2022
2023 CLG_(init_eventsets)();
2024 CLG_(init_statistics)(& CLG_(stat));
2025 CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) );
2026
2027 /* initialize hash tables */
2028 CLG_(init_obj_table)();
2029 CLG_(init_cxt_table)();
2030 CLG_(init_bb_hash)();
2031
2032 CLG_(init_threads)();
2033 CLG_(run_thread)(1);
2034
2035 CLG_(instrument_state) = CLG_(clo).instrument_atstart;
2036
2037 if (VG_(clo_verbosity) > 0) {
2038 VG_(message)(Vg_UserMsg,
2039 "For interactive control, run 'callgrind_control%s%s -h'.\n",
2040 (VG_(arg_vgdb_prefix) ? " " : ""),
2041 (VG_(arg_vgdb_prefix) ? VG_(arg_vgdb_prefix) : ""));
2042 }
2043 }
2044
2045 static
CLG_(pre_clo_init)2046 void CLG_(pre_clo_init)(void)
2047 {
2048 VG_(details_name) ("Callgrind");
2049 VG_(details_version) (NULL);
2050 VG_(details_description) ("a call-graph generating cache profiler");
2051 VG_(details_copyright_author)("Copyright (C) 2002-2017, and GNU GPL'd, "
2052 "by Josef Weidendorfer et al.");
2053 VG_(details_bug_reports_to) (VG_BUGS_TO);
2054 VG_(details_avg_translation_sizeB) ( 500 );
2055
2056 VG_(clo_vex_control).iropt_register_updates_default
2057 = VG_(clo_px_file_backed)
2058 = VexRegUpdSpAtMemAccess; // overridable by the user.
2059
2060 VG_(clo_vex_control).iropt_unroll_thresh = 0; // cannot be overridden.
2061 VG_(clo_vex_control).guest_chase_thresh = 0; // cannot be overridden.
2062
2063 VG_(basic_tool_funcs) (CLG_(post_clo_init),
2064 CLG_(instrument),
2065 CLG_(fini));
2066
2067 VG_(needs_superblock_discards)(clg_discard_superblock_info);
2068
2069
2070 VG_(needs_command_line_options)(CLG_(process_cmd_line_option),
2071 CLG_(print_usage),
2072 CLG_(print_debug_usage));
2073
2074 VG_(needs_client_requests)(CLG_(handle_client_request));
2075 VG_(needs_print_stats) (clg_print_stats);
2076
2077 VG_(track_start_client_code) ( & clg_start_client_code_callback );
2078 VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) );
2079 VG_(track_post_deliver_signal)( & CLG_(post_signal) );
2080
2081 CLG_(set_clo_defaults)();
2082
2083 }
2084
2085 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init))
2086
2087 /*--------------------------------------------------------------------*/
2088 /*--- end main.c ---*/
2089 /*--------------------------------------------------------------------*/
2090