• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Callgrind                                                    ---*/
4 /*---                                                       main.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of Callgrind, a Valgrind tool for call graph
9    profiling programs.
10 
11    Copyright (C) 2002-2013, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
12 
13    This tool is derived from and contains code from Cachegrind
14    Copyright (C) 2002-2013 Nicholas Nethercote (njn@valgrind.org)
15 
16    This program is free software; you can redistribute it and/or
17    modify it under the terms of the GNU General Public License as
18    published by the Free Software Foundation; either version 2 of the
19    License, or (at your option) any later version.
20 
21    This program is distributed in the hope that it will be useful, but
22    WITHOUT ANY WARRANTY; without even the implied warranty of
23    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24    General Public License for more details.
25 
26    You should have received a copy of the GNU General Public License
27    along with this program; if not, write to the Free Software
28    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29    02111-1307, USA.
30 
31    The GNU General Public License is contained in the file COPYING.
32 */
33 
34 #include "config.h"
35 #include "callgrind.h"
36 #include "global.h"
37 
38 #include "pub_tool_threadstate.h"
39 #include "pub_tool_gdbserver.h"
40 
41 #include "cg_branchpred.c"
42 
43 /*------------------------------------------------------------*/
44 /*--- Global variables                                     ---*/
45 /*------------------------------------------------------------*/
46 
47 /* for all threads */
48 CommandLineOptions CLG_(clo);
49 Statistics CLG_(stat);
50 Bool CLG_(instrument_state) = True; /* Instrumentation on ? */
51 
52 /* thread and signal handler specific */
53 exec_state CLG_(current_state);
54 
55 /* min of L1 and LL cache line sizes.  This only gets set to a
56    non-zero value if we are doing cache simulation. */
57 Int CLG_(min_line_size) = 0;
58 
59 
60 /*------------------------------------------------------------*/
61 /*--- Statistics                                           ---*/
62 /*------------------------------------------------------------*/
63 
CLG_(init_statistics)64 static void CLG_(init_statistics)(Statistics* s)
65 {
66   s->call_counter        = 0;
67   s->jcnd_counter        = 0;
68   s->jump_counter        = 0;
69   s->rec_call_counter    = 0;
70   s->ret_counter         = 0;
71   s->bb_executions       = 0;
72 
73   s->context_counter     = 0;
74   s->bb_retranslations   = 0;
75 
76   s->distinct_objs       = 0;
77   s->distinct_files      = 0;
78   s->distinct_fns        = 0;
79   s->distinct_contexts   = 0;
80   s->distinct_bbs        = 0;
81   s->distinct_bbccs      = 0;
82   s->distinct_instrs     = 0;
83   s->distinct_skips      = 0;
84 
85   s->bb_hash_resizes     = 0;
86   s->bbcc_hash_resizes   = 0;
87   s->jcc_hash_resizes    = 0;
88   s->cxt_hash_resizes    = 0;
89   s->fn_array_resizes    = 0;
90   s->call_stack_resizes  = 0;
91   s->fn_stack_resizes    = 0;
92 
93   s->full_debug_BBs      = 0;
94   s->file_line_debug_BBs = 0;
95   s->fn_name_debug_BBs   = 0;
96   s->no_debug_BBs        = 0;
97   s->bbcc_lru_misses     = 0;
98   s->jcc_lru_misses      = 0;
99   s->cxt_lru_misses      = 0;
100   s->bbcc_clones         = 0;
101 }
102 
103 
104 /*------------------------------------------------------------*/
105 /*--- Simple callbacks (not cache similator)               ---*/
106 /*------------------------------------------------------------*/
107 
108 VG_REGPARM(1)
log_global_event(InstrInfo * ii)109 static void log_global_event(InstrInfo* ii)
110 {
111     ULong* cost_Bus;
112 
113     CLG_DEBUG(6, "log_global_event:  Ir  %#lx/%u\n",
114               CLG_(bb_base) + ii->instr_offset, ii->instr_size);
115 
116     if (!CLG_(current_state).collect) return;
117 
118     CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BUS))>0 );
119 
120     CLG_(current_state).cost[ fullOffset(EG_BUS) ]++;
121 
122     if (CLG_(current_state).nonskipped)
123         cost_Bus = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BUS);
124     else
125         cost_Bus = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BUS];
126     cost_Bus[0]++;
127 }
128 
129 
130 /* For branches, we consult two different predictors, one which
131    predicts taken/untaken for conditional branches, and the other
132    which predicts the branch target address for indirect branches
133    (jump-to-register style ones). */
134 
135 static VG_REGPARM(2)
log_cond_branch(InstrInfo * ii,Word taken)136 void log_cond_branch(InstrInfo* ii, Word taken)
137 {
138     Bool miss;
139     Int fullOffset_Bc;
140     ULong* cost_Bc;
141 
142     CLG_DEBUG(6, "log_cond_branch:  Ir %#lx, taken %lu\n",
143               CLG_(bb_base) + ii->instr_offset, taken);
144 
145     miss = 1 & do_cond_branch_predict(CLG_(bb_base) + ii->instr_offset, taken);
146 
147     if (!CLG_(current_state).collect) return;
148 
149     CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BC))>0 );
150 
151     if (CLG_(current_state).nonskipped)
152         cost_Bc = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BC);
153     else
154         cost_Bc = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BC];
155 
156     fullOffset_Bc = fullOffset(EG_BC);
157     CLG_(current_state).cost[ fullOffset_Bc ]++;
158     cost_Bc[0]++;
159     if (miss) {
160         CLG_(current_state).cost[ fullOffset_Bc+1 ]++;
161         cost_Bc[1]++;
162     }
163 }
164 
165 static VG_REGPARM(2)
log_ind_branch(InstrInfo * ii,UWord actual_dst)166 void log_ind_branch(InstrInfo* ii, UWord actual_dst)
167 {
168     Bool miss;
169     Int fullOffset_Bi;
170     ULong* cost_Bi;
171 
172     CLG_DEBUG(6, "log_ind_branch:  Ir  %#lx, dst %#lx\n",
173               CLG_(bb_base) + ii->instr_offset, actual_dst);
174 
175     miss = 1 & do_ind_branch_predict(CLG_(bb_base) + ii->instr_offset, actual_dst);
176 
177     if (!CLG_(current_state).collect) return;
178 
179     CLG_ASSERT( (ii->eventset->mask & (1u<<EG_BI))>0 );
180 
181     if (CLG_(current_state).nonskipped)
182         cost_Bi = CLG_(current_state).nonskipped->skipped + fullOffset(EG_BI);
183     else
184         cost_Bi = CLG_(cost_base) + ii->cost_offset + ii->eventset->offset[EG_BI];
185 
186     fullOffset_Bi = fullOffset(EG_BI);
187     CLG_(current_state).cost[ fullOffset_Bi ]++;
188     cost_Bi[0]++;
189     if (miss) {
190         CLG_(current_state).cost[ fullOffset_Bi+1 ]++;
191         cost_Bi[1]++;
192     }
193 }
194 
195 /*------------------------------------------------------------*/
196 /*--- Instrumentation structures and event queue handling  ---*/
197 /*------------------------------------------------------------*/
198 
199 /* Maintain an ordered list of memory events which are outstanding, in
200    the sense that no IR has yet been generated to do the relevant
201    helper calls.  The BB is scanned top to bottom and memory events
202    are added to the end of the list, merging with the most recent
203    notified event where possible (Dw immediately following Dr and
204    having the same size and EA can be merged).
205 
206    This merging is done so that for architectures which have
207    load-op-store instructions (x86, amd64), the insn is treated as if
208    it makes just one memory reference (a modify), rather than two (a
209    read followed by a write at the same address).
210 
211    At various points the list will need to be flushed, that is, IR
212    generated from it.  That must happen before any possible exit from
213    the block (the end, or an IRStmt_Exit).  Flushing also takes place
214    when there is no space to add a new event.
215 
216    If we require the simulation statistics to be up to date with
217    respect to possible memory exceptions, then the list would have to
218    be flushed before each memory reference.  That would however lose
219    performance by inhibiting event-merging during flushing.
220 
221    Flushing the list consists of walking it start to end and emitting
222    instrumentation IR for each event, in the order in which they
223    appear.  It may be possible to emit a single call for two adjacent
224    events in order to reduce the number of helper function calls made.
225    For example, it could well be profitable to handle two adjacent Ir
226    events with a single helper call.  */
227 
228 typedef
229    IRExpr
230    IRAtom;
231 
232 typedef
233    enum {
234       Ev_Ir,  // Instruction read
235       Ev_Dr,  // Data read
236       Ev_Dw,  // Data write
237       Ev_Dm,  // Data modify (read then write)
238       Ev_Bc,  // branch conditional
239       Ev_Bi,  // branch indirect (to unknown destination)
240       Ev_G    // Global bus event
241    }
242    EventTag;
243 
244 typedef
245    struct {
246       EventTag   tag;
247       InstrInfo* inode;
248       union {
249 	 struct {
250 	 } Ir;
251 	 struct {
252 	    IRAtom* ea;
253 	    Int     szB;
254 	 } Dr;
255 	 struct {
256 	    IRAtom* ea;
257 	    Int     szB;
258 	 } Dw;
259 	 struct {
260 	    IRAtom* ea;
261 	    Int     szB;
262 	 } Dm;
263          struct {
264             IRAtom* taken; /* :: Ity_I1 */
265          } Bc;
266          struct {
267             IRAtom* dst;
268          } Bi;
269 	 struct {
270 	 } G;
271       } Ev;
272    }
273    Event;
274 
init_Event(Event * ev)275 static void init_Event ( Event* ev ) {
276    VG_(memset)(ev, 0, sizeof(Event));
277 }
278 
get_Event_dea(Event * ev)279 static IRAtom* get_Event_dea ( Event* ev ) {
280    switch (ev->tag) {
281       case Ev_Dr: return ev->Ev.Dr.ea;
282       case Ev_Dw: return ev->Ev.Dw.ea;
283       case Ev_Dm: return ev->Ev.Dm.ea;
284       default:    tl_assert(0);
285    }
286 }
287 
get_Event_dszB(Event * ev)288 static Int get_Event_dszB ( Event* ev ) {
289    switch (ev->tag) {
290       case Ev_Dr: return ev->Ev.Dr.szB;
291       case Ev_Dw: return ev->Ev.Dw.szB;
292       case Ev_Dm: return ev->Ev.Dm.szB;
293       default:    tl_assert(0);
294    }
295 }
296 
297 
298 /* Up to this many unnotified events are allowed.  Number is
299    arbitrary.  Larger numbers allow more event merging to occur, but
300    potentially induce more spilling due to extending live ranges of
301    address temporaries. */
302 #define N_EVENTS 16
303 
304 
305 /* A struct which holds all the running state during instrumentation.
306    Mostly to avoid passing loads of parameters everywhere. */
307 typedef struct {
308     /* The current outstanding-memory-event list. */
309     Event events[N_EVENTS];
310     Int   events_used;
311 
312     /* The array of InstrInfo's is part of BB struct. */
313     BB* bb;
314 
315     /* BB seen before (ie. re-instrumentation) */
316     Bool seen_before;
317 
318     /* Number InstrInfo bins 'used' so far. */
319     UInt ii_index;
320 
321     // current offset of guest instructions from BB start
322     UInt instr_offset;
323 
324     /* The output SB being constructed. */
325     IRSB* sbOut;
326 } ClgState;
327 
328 
showEvent(Event * ev)329 static void showEvent ( Event* ev )
330 {
331    switch (ev->tag) {
332       case Ev_Ir:
333 	 VG_(printf)("Ir (InstrInfo %p) at +%d\n",
334 		     ev->inode, ev->inode->instr_offset);
335 	 break;
336       case Ev_Dr:
337 	 VG_(printf)("Dr (InstrInfo %p) at +%d %d EA=",
338 		     ev->inode, ev->inode->instr_offset, ev->Ev.Dr.szB);
339 	 ppIRExpr(ev->Ev.Dr.ea);
340 	 VG_(printf)("\n");
341 	 break;
342       case Ev_Dw:
343 	 VG_(printf)("Dw (InstrInfo %p) at +%d %d EA=",
344 		     ev->inode, ev->inode->instr_offset, ev->Ev.Dw.szB);
345 	 ppIRExpr(ev->Ev.Dw.ea);
346 	 VG_(printf)("\n");
347 	 break;
348       case Ev_Dm:
349 	 VG_(printf)("Dm (InstrInfo %p) at +%d %d EA=",
350 		     ev->inode, ev->inode->instr_offset, ev->Ev.Dm.szB);
351 	 ppIRExpr(ev->Ev.Dm.ea);
352 	 VG_(printf)("\n");
353 	 break;
354       case Ev_Bc:
355          VG_(printf)("Bc %p   GA=", ev->inode);
356          ppIRExpr(ev->Ev.Bc.taken);
357          VG_(printf)("\n");
358          break;
359       case Ev_Bi:
360          VG_(printf)("Bi %p  DST=", ev->inode);
361          ppIRExpr(ev->Ev.Bi.dst);
362          VG_(printf)("\n");
363          break;
364       case Ev_G:
365          VG_(printf)("G  %p\n", ev->inode);
366          break;
367       default:
368 	 tl_assert(0);
369 	 break;
370    }
371 }
372 
373 /* Generate code for all outstanding memory events, and mark the queue
374    empty.  Code is generated into cgs->sbOut, and this activity
375    'consumes' slots in cgs->bb. */
376 
flushEvents(ClgState * clgs)377 static void flushEvents ( ClgState* clgs )
378 {
379    Int        i, regparms, inew;
380    const HChar* helperName;
381    void*      helperAddr;
382    IRExpr**   argv;
383    IRExpr*    i_node_expr;
384    IRDirty*   di;
385    Event*     ev;
386    Event*     ev2;
387    Event*     ev3;
388 
389    if (!clgs->seen_before) {
390        // extend event sets as needed
391        // available sets: D0 Dr
392        for(i=0; i<clgs->events_used; i++) {
393 	   ev  = &clgs->events[i];
394 	   switch(ev->tag) {
395 	   case Ev_Ir:
396 	       // Ir event always is first for a guest instruction
397 	       CLG_ASSERT(ev->inode->eventset == 0);
398 	       ev->inode->eventset = CLG_(sets).base;
399 	       break;
400 	   case Ev_Dr:
401                // extend event set by Dr counters
402 	       ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
403 							   EG_DR);
404 	       break;
405 	   case Ev_Dw:
406 	   case Ev_Dm:
407                // extend event set by Dw counters
408 	       ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
409 							   EG_DW);
410 	       break;
411            case Ev_Bc:
412                // extend event set by Bc counters
413                ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
414                                                            EG_BC);
415                break;
416            case Ev_Bi:
417                // extend event set by Bi counters
418                ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
419                                                            EG_BI);
420                break;
421 	   case Ev_G:
422                // extend event set by Bus counter
423 	       ev->inode->eventset = CLG_(add_event_group)(ev->inode->eventset,
424 							   EG_BUS);
425 	       break;
426 	   default:
427 	       tl_assert(0);
428 	   }
429        }
430    }
431 
432    for(i = 0; i < clgs->events_used; i = inew) {
433 
434       helperName = NULL;
435       helperAddr = NULL;
436       argv       = NULL;
437       regparms   = 0;
438 
439       /* generate IR to notify event i and possibly the ones
440 	 immediately following it. */
441       tl_assert(i >= 0 && i < clgs->events_used);
442 
443       ev  = &clgs->events[i];
444       ev2 = ( i < clgs->events_used-1 ? &clgs->events[i+1] : NULL );
445       ev3 = ( i < clgs->events_used-2 ? &clgs->events[i+2] : NULL );
446 
447       CLG_DEBUGIF(5) {
448 	 VG_(printf)("   flush ");
449 	 showEvent( ev );
450       }
451 
452       i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
453 
454       /* Decide on helper fn to call and args to pass it, and advance
455 	 i appropriately.
456 	 Dm events have same effect as Dw events */
457       switch (ev->tag) {
458 	 case Ev_Ir:
459 	    /* Merge an Ir with a following Dr. */
460 	    if (ev2 && ev2->tag == Ev_Dr) {
461 	       /* Why is this true?  It's because we're merging an Ir
462 		  with a following Dr.  The Ir derives from the
463 		  instruction's IMark and the Dr from data
464 		  references which follow it.  In short it holds
465 		  because each insn starts with an IMark, hence an
466 		  Ev_Ir, and so these Dr must pertain to the
467 		  immediately preceding Ir.  Same applies to analogous
468 		  assertions in the subsequent cases. */
469 	       tl_assert(ev2->inode == ev->inode);
470 	       helperName = CLG_(cachesim).log_1I1Dr_name;
471 	       helperAddr = CLG_(cachesim).log_1I1Dr;
472 	       argv = mkIRExprVec_3( i_node_expr,
473 				     get_Event_dea(ev2),
474 				     mkIRExpr_HWord( get_Event_dszB(ev2) ) );
475 	       regparms = 3;
476 	       inew = i+2;
477 	    }
478 	    /* Merge an Ir with a following Dw/Dm. */
479 	    else
480 	    if (ev2 && (ev2->tag == Ev_Dw || ev2->tag == Ev_Dm)) {
481 	       tl_assert(ev2->inode == ev->inode);
482 	       helperName = CLG_(cachesim).log_1I1Dw_name;
483 	       helperAddr = CLG_(cachesim).log_1I1Dw;
484 	       argv = mkIRExprVec_3( i_node_expr,
485 				     get_Event_dea(ev2),
486 				     mkIRExpr_HWord( get_Event_dszB(ev2) ) );
487 	       regparms = 3;
488 	       inew = i+2;
489 	    }
490 	    /* Merge an Ir with two following Irs. */
491 	    else
492 	    if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir) {
493 	       helperName = CLG_(cachesim).log_3I0D_name;
494 	       helperAddr = CLG_(cachesim).log_3I0D;
495 	       argv = mkIRExprVec_3( i_node_expr,
496 				     mkIRExpr_HWord( (HWord)ev2->inode ),
497 				     mkIRExpr_HWord( (HWord)ev3->inode ) );
498 	       regparms = 3;
499 	       inew = i+3;
500 	    }
501 	    /* Merge an Ir with one following Ir. */
502 	    else
503 	    if (ev2 && ev2->tag == Ev_Ir) {
504 	       helperName = CLG_(cachesim).log_2I0D_name;
505 	       helperAddr = CLG_(cachesim).log_2I0D;
506 	       argv = mkIRExprVec_2( i_node_expr,
507 				     mkIRExpr_HWord( (HWord)ev2->inode ) );
508 	       regparms = 2;
509 	       inew = i+2;
510 	    }
511 	    /* No merging possible; emit as-is. */
512 	    else {
513 	       helperName = CLG_(cachesim).log_1I0D_name;
514 	       helperAddr = CLG_(cachesim).log_1I0D;
515 	       argv = mkIRExprVec_1( i_node_expr );
516 	       regparms = 1;
517 	       inew = i+1;
518 	    }
519 	    break;
520 	 case Ev_Dr:
521 	    /* Data read or modify */
522 	    helperName = CLG_(cachesim).log_0I1Dr_name;
523 	    helperAddr = CLG_(cachesim).log_0I1Dr;
524 	    argv = mkIRExprVec_3( i_node_expr,
525 				  get_Event_dea(ev),
526 				  mkIRExpr_HWord( get_Event_dszB(ev) ) );
527 	    regparms = 3;
528 	    inew = i+1;
529 	    break;
530 	 case Ev_Dw:
531 	 case Ev_Dm:
532 	    /* Data write */
533 	    helperName = CLG_(cachesim).log_0I1Dw_name;
534 	    helperAddr = CLG_(cachesim).log_0I1Dw;
535 	    argv = mkIRExprVec_3( i_node_expr,
536 				  get_Event_dea(ev),
537 				  mkIRExpr_HWord( get_Event_dszB(ev) ) );
538 	    regparms = 3;
539 	    inew = i+1;
540 	    break;
541          case Ev_Bc:
542             /* Conditional branch */
543             helperName = "log_cond_branch";
544             helperAddr = &log_cond_branch;
545             argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
546             regparms = 2;
547             inew = i+1;
548             break;
549          case Ev_Bi:
550             /* Branch to an unknown destination */
551             helperName = "log_ind_branch";
552             helperAddr = &log_ind_branch;
553             argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
554             regparms = 2;
555             inew = i+1;
556             break;
557          case Ev_G:
558             /* Global bus event (CAS, LOCK-prefix, LL-SC, etc) */
559             helperName = "log_global_event";
560             helperAddr = &log_global_event;
561             argv = mkIRExprVec_1( i_node_expr );
562             regparms = 1;
563             inew = i+1;
564             break;
565 	 default:
566 	    tl_assert(0);
567       }
568 
569       CLG_DEBUGIF(5) {
570 	  if (inew > i+1) {
571 	      VG_(printf)("   merge ");
572 	      showEvent( ev2 );
573 	  }
574 	  if (inew > i+2) {
575 	      VG_(printf)("   merge ");
576 	      showEvent( ev3 );
577 	  }
578 	  if (helperAddr)
579 	      VG_(printf)("   call  %s (%p)\n",
580 			  helperName, helperAddr);
581       }
582 
583       /* helper could be unset depending on the simulator used */
584       if (helperAddr == 0) continue;
585 
586       /* Add the helper. */
587       tl_assert(helperName);
588       tl_assert(helperAddr);
589       tl_assert(argv);
590       di = unsafeIRDirty_0_N( regparms,
591 			      helperName, VG_(fnptr_to_fnentry)( helperAddr ),
592 			      argv );
593       addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
594    }
595 
596    clgs->events_used = 0;
597 }
598 
addEvent_Ir(ClgState * clgs,InstrInfo * inode)599 static void addEvent_Ir ( ClgState* clgs, InstrInfo* inode )
600 {
601    Event* evt;
602    tl_assert(clgs->seen_before || (inode->eventset == 0));
603    if (!CLG_(clo).simulate_cache) return;
604 
605    if (clgs->events_used == N_EVENTS)
606       flushEvents(clgs);
607    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
608    evt = &clgs->events[clgs->events_used];
609    init_Event(evt);
610    evt->tag      = Ev_Ir;
611    evt->inode    = inode;
612    clgs->events_used++;
613 }
614 
615 static
addEvent_Dr(ClgState * clgs,InstrInfo * inode,Int datasize,IRAtom * ea)616 void addEvent_Dr ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
617 {
618    Event* evt;
619    tl_assert(isIRAtom(ea));
620    tl_assert(datasize >= 1);
621    if (!CLG_(clo).simulate_cache) return;
622    tl_assert(datasize <= CLG_(min_line_size));
623 
624    if (clgs->events_used == N_EVENTS)
625       flushEvents(clgs);
626    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
627    evt = &clgs->events[clgs->events_used];
628    init_Event(evt);
629    evt->tag       = Ev_Dr;
630    evt->inode     = inode;
631    evt->Ev.Dr.szB = datasize;
632    evt->Ev.Dr.ea  = ea;
633    clgs->events_used++;
634 }
635 
636 static
addEvent_Dw(ClgState * clgs,InstrInfo * inode,Int datasize,IRAtom * ea)637 void addEvent_Dw ( ClgState* clgs, InstrInfo* inode, Int datasize, IRAtom* ea )
638 {
639    Event* lastEvt;
640    Event* evt;
641    tl_assert(isIRAtom(ea));
642    tl_assert(datasize >= 1);
643    if (!CLG_(clo).simulate_cache) return;
644    tl_assert(datasize <= CLG_(min_line_size));
645 
646    /* Is it possible to merge this write with the preceding read? */
647    lastEvt = &clgs->events[clgs->events_used-1];
648    if (clgs->events_used > 0
649        && lastEvt->tag       == Ev_Dr
650        && lastEvt->Ev.Dr.szB == datasize
651        && lastEvt->inode     == inode
652        && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
653    {
654       lastEvt->tag   = Ev_Dm;
655       return;
656    }
657 
658    /* No.  Add as normal. */
659    if (clgs->events_used == N_EVENTS)
660       flushEvents(clgs);
661    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
662    evt = &clgs->events[clgs->events_used];
663    init_Event(evt);
664    evt->tag       = Ev_Dw;
665    evt->inode     = inode;
666    evt->Ev.Dw.szB = datasize;
667    evt->Ev.Dw.ea  = ea;
668    clgs->events_used++;
669 }
670 
671 static
addEvent_D_guarded(ClgState * clgs,InstrInfo * inode,Int datasize,IRAtom * ea,IRAtom * guard,Bool isWrite)672 void addEvent_D_guarded ( ClgState* clgs, InstrInfo* inode,
673                           Int datasize, IRAtom* ea, IRAtom* guard,
674                           Bool isWrite )
675 {
676    tl_assert(isIRAtom(ea));
677    tl_assert(guard);
678    tl_assert(isIRAtom(guard));
679    tl_assert(datasize >= 1);
680    if (!CLG_(clo).simulate_cache) return;
681    tl_assert(datasize <= CLG_(min_line_size));
682 
683    /* Adding guarded memory actions and merging them with the existing
684       queue is too complex.  Simply flush the queue and add this
685       action immediately.  Since guarded loads and stores are pretty
686       rare, this is not thought likely to cause any noticeable
687       performance loss as a result of the loss of event-merging
688       opportunities. */
689    tl_assert(clgs->events_used >= 0);
690    flushEvents(clgs);
691    tl_assert(clgs->events_used == 0);
692    /* Same as case Ev_Dw / case Ev_Dr in flushEvents, except with guard */
693    IRExpr*      i_node_expr;
694    const HChar* helperName;
695    void*        helperAddr;
696    IRExpr**     argv;
697    Int          regparms;
698    IRDirty*     di;
699    i_node_expr = mkIRExpr_HWord( (HWord)inode );
700    helperName  = isWrite ? CLG_(cachesim).log_0I1Dw_name
701                          : CLG_(cachesim).log_0I1Dr_name;
702    helperAddr  = isWrite ? CLG_(cachesim).log_0I1Dw
703                          : CLG_(cachesim).log_0I1Dr;
704    argv        = mkIRExprVec_3( i_node_expr,
705                                 ea, mkIRExpr_HWord( datasize ) );
706    regparms    = 3;
707    di          = unsafeIRDirty_0_N(
708                     regparms,
709                     helperName, VG_(fnptr_to_fnentry)( helperAddr ),
710                     argv );
711    di->guard = guard;
712    addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
713 }
714 
715 static
addEvent_Bc(ClgState * clgs,InstrInfo * inode,IRAtom * guard)716 void addEvent_Bc ( ClgState* clgs, InstrInfo* inode, IRAtom* guard )
717 {
718    Event* evt;
719    tl_assert(isIRAtom(guard));
720    tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, guard)
721              == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
722    if (!CLG_(clo).simulate_branch) return;
723 
724    if (clgs->events_used == N_EVENTS)
725       flushEvents(clgs);
726    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
727    evt = &clgs->events[clgs->events_used];
728    init_Event(evt);
729    evt->tag         = Ev_Bc;
730    evt->inode       = inode;
731    evt->Ev.Bc.taken = guard;
732    clgs->events_used++;
733 }
734 
735 static
addEvent_Bi(ClgState * clgs,InstrInfo * inode,IRAtom * whereTo)736 void addEvent_Bi ( ClgState* clgs, InstrInfo* inode, IRAtom* whereTo )
737 {
738    Event* evt;
739    tl_assert(isIRAtom(whereTo));
740    tl_assert(typeOfIRExpr(clgs->sbOut->tyenv, whereTo)
741              == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
742    if (!CLG_(clo).simulate_branch) return;
743 
744    if (clgs->events_used == N_EVENTS)
745       flushEvents(clgs);
746    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
747    evt = &clgs->events[clgs->events_used];
748    init_Event(evt);
749    evt->tag       = Ev_Bi;
750    evt->inode     = inode;
751    evt->Ev.Bi.dst = whereTo;
752    clgs->events_used++;
753 }
754 
755 static
addEvent_G(ClgState * clgs,InstrInfo * inode)756 void addEvent_G ( ClgState* clgs, InstrInfo* inode )
757 {
758    Event* evt;
759    if (!CLG_(clo).collect_bus) return;
760 
761    if (clgs->events_used == N_EVENTS)
762       flushEvents(clgs);
763    tl_assert(clgs->events_used >= 0 && clgs->events_used < N_EVENTS);
764    evt = &clgs->events[clgs->events_used];
765    init_Event(evt);
766    evt->tag       = Ev_G;
767    evt->inode     = inode;
768    clgs->events_used++;
769 }
770 
771 /* Initialise or check (if already seen before) an InstrInfo for next insn.
772    We only can set instr_offset/instr_size here. The required event set and
773    resulting cost offset depend on events (Ir/Dr/Dw/Dm) in guest
774    instructions. The event set is extended as required on flush of the event
775    queue (when Dm events were determined), cost offsets are determined at
776    end of BB instrumentation. */
777 static
next_InstrInfo(ClgState * clgs,UInt instr_size)778 InstrInfo* next_InstrInfo ( ClgState* clgs, UInt instr_size )
779 {
780    InstrInfo* ii;
781    tl_assert(clgs->ii_index >= 0);
782    tl_assert(clgs->ii_index < clgs->bb->instr_count);
783    ii = &clgs->bb->instr[ clgs->ii_index ];
784 
785    if (clgs->seen_before) {
786        CLG_ASSERT(ii->instr_offset == clgs->instr_offset);
787        CLG_ASSERT(ii->instr_size == instr_size);
788    }
789    else {
790        ii->instr_offset = clgs->instr_offset;
791        ii->instr_size = instr_size;
792        ii->cost_offset = 0;
793        ii->eventset = 0;
794    }
795 
796    clgs->ii_index++;
797    clgs->instr_offset += instr_size;
798    CLG_(stat).distinct_instrs++;
799 
800    return ii;
801 }
802 
803 // return total number of cost values needed for this BB
804 static
update_cost_offsets(ClgState * clgs)805 UInt update_cost_offsets( ClgState* clgs )
806 {
807     Int i;
808     InstrInfo* ii;
809     UInt cost_offset = 0;
810 
811     CLG_ASSERT(clgs->bb->instr_count == clgs->ii_index);
812     for(i=0; i<clgs->ii_index; i++) {
813 	ii = &clgs->bb->instr[i];
814 	if (clgs->seen_before) {
815 	    CLG_ASSERT(ii->cost_offset == cost_offset);
816 	} else
817 	    ii->cost_offset = cost_offset;
818 	cost_offset += ii->eventset ? ii->eventset->size : 0;
819     }
820 
821     return cost_offset;
822 }
823 
824 /*------------------------------------------------------------*/
825 /*--- Instrumentation                                      ---*/
826 /*------------------------------------------------------------*/
827 
828 #if defined(VG_BIGENDIAN)
829 # define CLGEndness Iend_BE
830 #elif defined(VG_LITTLEENDIAN)
831 # define CLGEndness Iend_LE
832 #else
833 # error "Unknown endianness"
834 #endif
835 
836 static
IRConst2Addr(IRConst * con)837 Addr IRConst2Addr(IRConst* con)
838 {
839     Addr addr;
840 
841     if (sizeof(Addr) == 4) {
842 	CLG_ASSERT( con->tag == Ico_U32 );
843 	addr = con->Ico.U32;
844     }
845     else if (sizeof(Addr) == 8) {
846 	CLG_ASSERT( con->tag == Ico_U64 );
847 	addr = con->Ico.U64;
848     }
849     else
850 	VG_(tool_panic)("Callgrind: invalid Addr type");
851 
852     return addr;
853 }
854 
855 /* First pass over a BB to instrument, counting instructions and jumps
856  * This is needed for the size of the BB struct to allocate
857  *
858  * Called from CLG_(get_bb)
859  */
CLG_(collectBlockInfo)860 void CLG_(collectBlockInfo)(IRSB* sbIn,
861 			    /*INOUT*/ UInt* instrs,
862 			    /*INOUT*/ UInt* cjmps,
863 			    /*INOUT*/ Bool* cjmp_inverted)
864 {
865     Int i;
866     IRStmt* st;
867     Addr instrAddr =0, jumpDst;
868     UInt instrLen = 0;
869     Bool toNextInstr = False;
870 
871     // Ist_Exit has to be ignored in preamble code, before first IMark:
872     // preamble code is added by VEX for self modifying code, and has
873     // nothing to do with client code
874     Bool inPreamble = True;
875 
876     if (!sbIn) return;
877 
878     for (i = 0; i < sbIn->stmts_used; i++) {
879 	  st = sbIn->stmts[i];
880 	  if (Ist_IMark == st->tag) {
881 	      inPreamble = False;
882 
883 	      instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr);
884 	      instrLen  = st->Ist.IMark.len;
885 
886 	      (*instrs)++;
887 	      toNextInstr = False;
888 	  }
889 	  if (inPreamble) continue;
890 	  if (Ist_Exit == st->tag) {
891 	      jumpDst = IRConst2Addr(st->Ist.Exit.dst);
892 	      toNextInstr =  (jumpDst == instrAddr + instrLen);
893 
894 	      (*cjmps)++;
895 	  }
896     }
897 
898     /* if the last instructions of BB conditionally jumps to next instruction
899      * (= first instruction of next BB in memory), this is a inverted by VEX.
900      */
901     *cjmp_inverted = toNextInstr;
902 }
903 
904 static
addConstMemStoreStmt(IRSB * bbOut,UWord addr,UInt val,IRType hWordTy)905 void addConstMemStoreStmt( IRSB* bbOut, UWord addr, UInt val, IRType hWordTy)
906 {
907     addStmtToIRSB( bbOut,
908 		   IRStmt_Store(CLGEndness,
909 				IRExpr_Const(hWordTy == Ity_I32 ?
910 					     IRConst_U32( addr ) :
911 					     IRConst_U64( addr )),
912 				IRExpr_Const(IRConst_U32(val)) ));
913 }
914 
915 
916 /* add helper call to setup_bbcc, with pointer to BB struct as argument
917  *
918  * precondition for setup_bbcc:
919  * - jmps_passed has number of cond.jumps passed in last executed BB
920  * - current_bbcc has a pointer to the BBCC of the last executed BB
921  *   Thus, if bbcc_jmpkind is != -1 (JmpNone),
922  *     current_bbcc->bb->jmp_addr
923  *   gives the address of the jump source.
924  *
925  * the setup does 2 things:
926  * - trace call:
927  *   * Unwind own call stack, i.e sync our ESP with real ESP
928  *     This is for ESP manipulation (longjmps, C++ exec handling) and RET
929  *   * For CALLs or JMPs crossing objects, record call arg +
930  *     push are on own call stack
931  *
932  * - prepare for cache log functions:
933  *   set current_bbcc to BBCC that gets the costs for this BB execution
934  *   attached
935  */
936 static
addBBSetupCall(ClgState * clgs)937 void addBBSetupCall(ClgState* clgs)
938 {
939    IRDirty* di;
940    IRExpr  *arg1, **argv;
941 
942    arg1 = mkIRExpr_HWord( (HWord)clgs->bb );
943    argv = mkIRExprVec_1(arg1);
944    di = unsafeIRDirty_0_N( 1, "setup_bbcc",
945 			      VG_(fnptr_to_fnentry)( & CLG_(setup_bbcc) ),
946 			      argv);
947    addStmtToIRSB( clgs->sbOut, IRStmt_Dirty(di) );
948 }
949 
950 
951 static
CLG_(instrument)952 IRSB* CLG_(instrument)( VgCallbackClosure* closure,
953 			IRSB* sbIn,
954 			VexGuestLayout* layout,
955 			VexGuestExtents* vge,
956                         VexArchInfo* archinfo_host,
957 			IRType gWordTy, IRType hWordTy )
958 {
959    Int        i;
960    IRStmt*    st;
961    Addr       origAddr;
962    InstrInfo* curr_inode = NULL;
963    ClgState   clgs;
964    UInt       cJumps = 0;
965    IRTypeEnv* tyenv = sbIn->tyenv;
966 
967    if (gWordTy != hWordTy) {
968       /* We don't currently support this case. */
969       VG_(tool_panic)("host/guest word size mismatch");
970    }
971 
972    // No instrumentation if it is switched off
973    if (! CLG_(instrument_state)) {
974        CLG_DEBUG(5, "instrument(BB %#lx) [Instrumentation OFF]\n",
975 		 (Addr)closure->readdr);
976        return sbIn;
977    }
978 
979    CLG_DEBUG(3, "+ instrument(BB %#lx)\n", (Addr)closure->readdr);
980 
981    /* Set up SB for instrumented IR */
982    clgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
983 
984    // Copy verbatim any IR preamble preceding the first IMark
985    i = 0;
986    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
987       addStmtToIRSB( clgs.sbOut, sbIn->stmts[i] );
988       i++;
989    }
990 
991    // Get the first statement, and origAddr from it
992    CLG_ASSERT(sbIn->stmts_used >0);
993    CLG_ASSERT(i < sbIn->stmts_used);
994    st = sbIn->stmts[i];
995    CLG_ASSERT(Ist_IMark == st->tag);
996 
997    origAddr = (Addr)st->Ist.IMark.addr + (Addr)st->Ist.IMark.delta;
998    CLG_ASSERT(origAddr == st->Ist.IMark.addr
999                           + st->Ist.IMark.delta);  // XXX: check no overflow
1000 
1001    /* Get BB struct (creating if necessary).
1002     * JS: The hash table is keyed with orig_addr_noredir -- important!
1003     * JW: Why? If it is because of different chasing of the redirection,
1004     *     this is not needed, as chasing is switched off in callgrind
1005     */
1006    clgs.bb = CLG_(get_bb)(origAddr, sbIn, &(clgs.seen_before));
1007 
1008    addBBSetupCall(&clgs);
1009 
1010    // Set up running state
1011    clgs.events_used = 0;
1012    clgs.ii_index = 0;
1013    clgs.instr_offset = 0;
1014 
1015    for (/*use current i*/; i < sbIn->stmts_used; i++) {
1016 
1017       st = sbIn->stmts[i];
1018       CLG_ASSERT(isFlatIRStmt(st));
1019 
1020       switch (st->tag) {
1021 	 case Ist_NoOp:
1022 	 case Ist_AbiHint:
1023 	 case Ist_Put:
1024 	 case Ist_PutI:
1025 	 case Ist_MBE:
1026 	    break;
1027 
1028 	 case Ist_IMark: {
1029             Addr64 cia   = st->Ist.IMark.addr + st->Ist.IMark.delta;
1030             Int    isize = st->Ist.IMark.len;
1031             CLG_ASSERT(clgs.instr_offset == (Addr)cia - origAddr);
1032 	    // If Vex fails to decode an instruction, the size will be zero.
1033 	    // Pretend otherwise.
1034 	    if (isize == 0) isize = VG_MIN_INSTR_SZB;
1035 
1036 	    // Sanity-check size.
1037 	    tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
1038 		     || VG_CLREQ_SZB == isize );
1039 
1040 	    // Init the inode, record it as the current one.
1041 	    // Subsequent Dr/Dw/Dm events from the same instruction will
1042 	    // also use it.
1043 	    curr_inode = next_InstrInfo (&clgs, isize);
1044 
1045 	    addEvent_Ir( &clgs, curr_inode );
1046 	    break;
1047 	 }
1048 
1049 	 case Ist_WrTmp: {
1050 	    IRExpr* data = st->Ist.WrTmp.data;
1051 	    if (data->tag == Iex_Load) {
1052 	       IRExpr* aexpr = data->Iex.Load.addr;
1053 	       // Note also, endianness info is ignored.  I guess
1054 	       // that's not interesting.
1055 	       addEvent_Dr( &clgs, curr_inode,
1056 			    sizeofIRType(data->Iex.Load.ty), aexpr );
1057 	    }
1058 	    break;
1059 	 }
1060 
1061 	 case Ist_Store: {
1062 	    IRExpr* data  = st->Ist.Store.data;
1063 	    IRExpr* aexpr = st->Ist.Store.addr;
1064 	    addEvent_Dw( &clgs, curr_inode,
1065 			 sizeofIRType(typeOfIRExpr(sbIn->tyenv, data)), aexpr );
1066 	    break;
1067 	 }
1068 
1069          case Ist_StoreG: {
1070             IRStoreG* sg   = st->Ist.StoreG.details;
1071             IRExpr*   data = sg->data;
1072             IRExpr*   addr = sg->addr;
1073             IRType    type = typeOfIRExpr(tyenv, data);
1074             tl_assert(type != Ity_INVALID);
1075             addEvent_D_guarded( &clgs, curr_inode,
1076                                 sizeofIRType(type), addr, sg->guard,
1077                                 True/*isWrite*/ );
1078             break;
1079          }
1080 
1081          case Ist_LoadG: {
1082             IRLoadG* lg       = st->Ist.LoadG.details;
1083             IRType   type     = Ity_INVALID; /* loaded type */
1084             IRType   typeWide = Ity_INVALID; /* after implicit widening */
1085             IRExpr*  addr     = lg->addr;
1086             typeOfIRLoadGOp(lg->cvt, &typeWide, &type);
1087             tl_assert(type != Ity_INVALID);
1088             addEvent_D_guarded( &clgs, curr_inode,
1089                                 sizeofIRType(type), addr, lg->guard,
1090                                 False/*!isWrite*/ );
1091             break;
1092          }
1093 
1094 	 case Ist_Dirty: {
1095 	    Int      dataSize;
1096 	    IRDirty* d = st->Ist.Dirty.details;
1097 	    if (d->mFx != Ifx_None) {
1098 	       /* This dirty helper accesses memory.  Collect the details. */
1099 	       tl_assert(d->mAddr != NULL);
1100 	       tl_assert(d->mSize != 0);
1101 	       dataSize = d->mSize;
1102 	       // Large (eg. 28B, 108B, 512B on x86) data-sized
1103 	       // instructions will be done inaccurately, but they're
1104 	       // very rare and this avoids errors from hitting more
1105 	       // than two cache lines in the simulation.
1106 	       if (CLG_(clo).simulate_cache && dataSize > CLG_(min_line_size))
1107 		  dataSize = CLG_(min_line_size);
1108 	       if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1109 		  addEvent_Dr( &clgs, curr_inode, dataSize, d->mAddr );
1110 	       if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1111 		  addEvent_Dw( &clgs, curr_inode, dataSize, d->mAddr );
1112 	    } else {
1113 	       tl_assert(d->mAddr == NULL);
1114 	       tl_assert(d->mSize == 0);
1115 	    }
1116 	    break;
1117 	 }
1118 
1119          case Ist_CAS: {
1120             /* We treat it as a read and a write of the location.  I
1121                think that is the same behaviour as it was before IRCAS
1122                was introduced, since prior to that point, the Vex
1123                front ends would translate a lock-prefixed instruction
1124                into a (normal) read followed by a (normal) write. */
1125             Int    dataSize;
1126             IRCAS* cas = st->Ist.CAS.details;
1127             CLG_ASSERT(cas->addr && isIRAtom(cas->addr));
1128             CLG_ASSERT(cas->dataLo);
1129             dataSize = sizeofIRType(typeOfIRExpr(sbIn->tyenv, cas->dataLo));
1130             if (cas->dataHi != NULL)
1131                dataSize *= 2; /* since this is a doubleword-cas */
1132             addEvent_Dr( &clgs, curr_inode, dataSize, cas->addr );
1133             addEvent_Dw( &clgs, curr_inode, dataSize, cas->addr );
1134             addEvent_G(  &clgs, curr_inode );
1135             break;
1136          }
1137 
1138          case Ist_LLSC: {
1139             IRType dataTy;
1140             if (st->Ist.LLSC.storedata == NULL) {
1141                /* LL */
1142                dataTy = typeOfIRTemp(sbIn->tyenv, st->Ist.LLSC.result);
1143                addEvent_Dr( &clgs, curr_inode,
1144                             sizeofIRType(dataTy), st->Ist.LLSC.addr );
1145                /* flush events before LL, should help SC to succeed */
1146                flushEvents( &clgs );
1147             } else {
1148                /* SC */
1149                dataTy = typeOfIRExpr(sbIn->tyenv, st->Ist.LLSC.storedata);
1150                addEvent_Dw( &clgs, curr_inode,
1151                             sizeofIRType(dataTy), st->Ist.LLSC.addr );
1152                /* I don't know whether the global-bus-lock cost should
1153                   be attributed to the LL or the SC, but it doesn't
1154                   really matter since they always have to be used in
1155                   pairs anyway.  Hence put it (quite arbitrarily) on
1156                   the SC. */
1157                addEvent_G(  &clgs, curr_inode );
1158             }
1159             break;
1160          }
1161 
1162  	 case Ist_Exit: {
1163             Bool guest_exit, inverted;
1164 
1165             /* VEX code generation sometimes inverts conditional branches.
1166              * As Callgrind counts (conditional) jumps, it has to correct
1167              * inversions. The heuristic is the following:
1168              * (1) Callgrind switches off SB chasing and unrolling, and
1169              *     therefore it assumes that a candidate for inversion only is
1170              *     the last conditional branch in an SB.
1171              * (2) inversion is assumed if the branch jumps to the address of
1172              *     the next guest instruction in memory.
1173              * This heuristic is precalculated in CLG_(collectBlockInfo)().
1174              *
1175              * Branching behavior is also used for branch prediction. Note that
1176              * above heuristic is different from what Cachegrind does.
1177              * Cachegrind uses (2) for all branches.
1178              */
1179             if (cJumps+1 == clgs.bb->cjmp_count)
1180                 inverted = clgs.bb->cjmp_inverted;
1181             else
1182                 inverted = False;
1183 
1184             // call branch predictor only if this is a branch in guest code
1185             guest_exit = (st->Ist.Exit.jk == Ijk_Boring) ||
1186                          (st->Ist.Exit.jk == Ijk_Call) ||
1187                          (st->Ist.Exit.jk == Ijk_Ret);
1188 
1189             if (guest_exit) {
1190                 /* Stuff to widen the guard expression to a host word, so
1191                    we can pass it to the branch predictor simulation
1192                    functions easily. */
1193                 IRType   tyW    = hWordTy;
1194                 IROp     widen  = tyW==Ity_I32  ? Iop_1Uto32  : Iop_1Uto64;
1195                 IROp     opXOR  = tyW==Ity_I32  ? Iop_Xor32   : Iop_Xor64;
1196                 IRTemp   guard1 = newIRTemp(clgs.sbOut->tyenv, Ity_I1);
1197                 IRTemp   guardW = newIRTemp(clgs.sbOut->tyenv, tyW);
1198                 IRTemp   guard  = newIRTemp(clgs.sbOut->tyenv, tyW);
1199                 IRExpr*  one    = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1200                                                : IRExpr_Const(IRConst_U64(1));
1201 
1202                 /* Widen the guard expression. */
1203                 addStmtToIRSB( clgs.sbOut,
1204                                IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1205                 addStmtToIRSB( clgs.sbOut,
1206                                IRStmt_WrTmp( guardW,
1207                                              IRExpr_Unop(widen,
1208                                                          IRExpr_RdTmp(guard1))) );
1209                 /* If the exit is inverted, invert the sense of the guard. */
1210                 addStmtToIRSB(
1211                         clgs.sbOut,
1212                         IRStmt_WrTmp(
1213                                 guard,
1214                                 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1215                                     : IRExpr_RdTmp(guardW)
1216                                     ));
1217                 /* And post the event. */
1218                 addEvent_Bc( &clgs, curr_inode, IRExpr_RdTmp(guard) );
1219             }
1220 
1221 	    /* We may never reach the next statement, so need to flush
1222 	       all outstanding transactions now. */
1223 	    flushEvents( &clgs );
1224 
1225 	    CLG_ASSERT(clgs.ii_index>0);
1226 	    if (!clgs.seen_before) {
1227 	      ClgJumpKind jk;
1228 
1229 	      if      (st->Ist.Exit.jk == Ijk_Call) jk = jk_Call;
1230 	      else if (st->Ist.Exit.jk == Ijk_Ret)  jk = jk_Return;
1231 	      else {
1232 		if (IRConst2Addr(st->Ist.Exit.dst) ==
1233 		    origAddr + curr_inode->instr_offset + curr_inode->instr_size)
1234 		  jk = jk_None;
1235 		else
1236 		  jk = jk_Jump;
1237 	      }
1238 
1239 	      clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
1240 	      clgs.bb->jmp[cJumps].jmpkind = jk;
1241 	    }
1242 
1243 	    /* Update global variable jmps_passed before the jump
1244 	     * A correction is needed if VEX inverted the last jump condition
1245 	    */
1246 	    UInt val = inverted ? cJumps+1 : cJumps;
1247 	    addConstMemStoreStmt( clgs.sbOut,
1248 				  (UWord) &CLG_(current_state).jmps_passed,
1249 				  val, hWordTy);
1250 	    cJumps++;
1251 
1252 	    break;
1253 	 }
1254 
1255 	 default:
1256 	    tl_assert(0);
1257 	    break;
1258       }
1259 
1260       /* Copy the original statement */
1261       addStmtToIRSB( clgs.sbOut, st );
1262 
1263       CLG_DEBUGIF(5) {
1264 	 VG_(printf)("   pass  ");
1265 	 ppIRStmt(st);
1266 	 VG_(printf)("\n");
1267       }
1268    }
1269 
1270    /* Deal with branches to unknown destinations.  Except ignore ones
1271       which are function returns as we assume the return stack
1272       predictor never mispredicts. */
1273    if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
1274       if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1275       switch (sbIn->next->tag) {
1276          case Iex_Const:
1277             break; /* boring - branch to known address */
1278          case Iex_RdTmp:
1279             /* looks like an indirect branch (branch to unknown) */
1280             addEvent_Bi( &clgs, curr_inode, sbIn->next );
1281             break;
1282          default:
1283             /* shouldn't happen - if the incoming IR is properly
1284                flattened, should only have tmp and const cases to
1285                consider. */
1286             tl_assert(0);
1287       }
1288    }
1289 
1290    /* At the end of the bb.  Flush outstandings. */
1291    flushEvents( &clgs );
1292 
1293    /* Update global variable jmps_passed at end of SB.
1294     * As CLG_(current_state).jmps_passed is reset to 0 in setup_bbcc,
1295     * this can be omitted if there is no conditional jump in this SB.
1296     * A correction is needed if VEX inverted the last jump condition
1297     */
1298    if (cJumps>0) {
1299       UInt jmps_passed = cJumps;
1300       if (clgs.bb->cjmp_inverted) jmps_passed--;
1301       addConstMemStoreStmt( clgs.sbOut,
1302 			    (UWord) &CLG_(current_state).jmps_passed,
1303 			    jmps_passed, hWordTy);
1304    }
1305    CLG_ASSERT(clgs.bb->cjmp_count == cJumps);
1306    CLG_ASSERT(clgs.bb->instr_count == clgs.ii_index);
1307 
1308    /* Info for final exit from BB */
1309    {
1310      ClgJumpKind jk;
1311 
1312      if      (sbIn->jumpkind == Ijk_Call) jk = jk_Call;
1313      else if (sbIn->jumpkind == Ijk_Ret)  jk = jk_Return;
1314      else {
1315        jk = jk_Jump;
1316        if ((sbIn->next->tag == Iex_Const) &&
1317 	   (IRConst2Addr(sbIn->next->Iex.Const.con) ==
1318 	    origAddr + clgs.instr_offset))
1319 	 jk = jk_None;
1320      }
1321      clgs.bb->jmp[cJumps].jmpkind = jk;
1322      /* Instruction index of the call/ret at BB end
1323       * (it is wrong for fall-through, but does not matter) */
1324      clgs.bb->jmp[cJumps].instr = clgs.ii_index-1;
1325    }
1326 
1327    /* swap information of last exit with final exit if inverted */
1328    if (clgs.bb->cjmp_inverted) {
1329      ClgJumpKind jk;
1330      UInt instr;
1331 
1332      jk = clgs.bb->jmp[cJumps].jmpkind;
1333      clgs.bb->jmp[cJumps].jmpkind = clgs.bb->jmp[cJumps-1].jmpkind;
1334      clgs.bb->jmp[cJumps-1].jmpkind = jk;
1335      instr = clgs.bb->jmp[cJumps].instr;
1336      clgs.bb->jmp[cJumps].instr = clgs.bb->jmp[cJumps-1].instr;
1337      clgs.bb->jmp[cJumps-1].instr = instr;
1338    }
1339 
1340    if (clgs.seen_before) {
1341        CLG_ASSERT(clgs.bb->cost_count == update_cost_offsets(&clgs));
1342        CLG_ASSERT(clgs.bb->instr_len == clgs.instr_offset);
1343    }
1344    else {
1345        clgs.bb->cost_count = update_cost_offsets(&clgs);
1346        clgs.bb->instr_len = clgs.instr_offset;
1347    }
1348 
1349    CLG_DEBUG(3, "- instrument(BB %#lx): byteLen %u, CJumps %u, CostLen %u\n",
1350 	     origAddr, clgs.bb->instr_len,
1351 	     clgs.bb->cjmp_count, clgs.bb->cost_count);
1352    if (cJumps>0) {
1353        CLG_DEBUG(3, "                     [ ");
1354        for (i=0;i<cJumps;i++)
1355 	   CLG_DEBUG(3, "%d ", clgs.bb->jmp[i].instr);
1356        CLG_DEBUG(3, "], last inverted: %s \n",
1357 		 clgs.bb->cjmp_inverted ? "yes":"no");
1358    }
1359 
1360   return clgs.sbOut;
1361 }
1362 
1363 /*--------------------------------------------------------------------*/
1364 /*--- Discarding BB info                                           ---*/
1365 /*--------------------------------------------------------------------*/
1366 
1367 // Called when a translation is removed from the translation cache for
1368 // any reason at all: to free up space, because the guest code was
1369 // unmapped or modified, or for any arbitrary reason.
1370 static
clg_discard_superblock_info(Addr64 orig_addr64,VexGuestExtents vge)1371 void clg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge )
1372 {
1373     Addr orig_addr = (Addr)orig_addr64;
1374 
1375     tl_assert(vge.n_used > 0);
1376 
1377    if (0)
1378       VG_(printf)( "discard_superblock_info: %p, %p, %llu\n",
1379                    (void*)(Addr)orig_addr,
1380                    (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
1381 
1382    // Get BB info, remove from table, free BB info.  Simple!  Note that we
1383    // use orig_addr, not the first instruction address in vge.
1384    CLG_(delete_bb)(orig_addr);
1385 }
1386 
1387 
1388 /*------------------------------------------------------------*/
1389 /*--- CLG_(fini)() and related function                     ---*/
1390 /*------------------------------------------------------------*/
1391 
1392 
1393 
zero_thread_cost(thread_info * t)1394 static void zero_thread_cost(thread_info* t)
1395 {
1396   Int i;
1397 
1398   for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1399     if (!CLG_(current_call_stack).entry[i].jcc) continue;
1400 
1401     /* reset call counters to current for active calls */
1402     CLG_(copy_cost)( CLG_(sets).full,
1403 		    CLG_(current_call_stack).entry[i].enter_cost,
1404 		    CLG_(current_state).cost );
1405     CLG_(current_call_stack).entry[i].jcc->call_counter = 0;
1406   }
1407 
1408   CLG_(forall_bbccs)(CLG_(zero_bbcc));
1409 
1410   /* set counter for last dump */
1411   CLG_(copy_cost)( CLG_(sets).full,
1412 		  t->lastdump_cost, CLG_(current_state).cost );
1413 }
1414 
CLG_(zero_all_cost)1415 void CLG_(zero_all_cost)(Bool only_current_thread)
1416 {
1417   if (VG_(clo_verbosity) > 1)
1418     VG_(message)(Vg_DebugMsg, "  Zeroing costs...\n");
1419 
1420   if (only_current_thread)
1421     zero_thread_cost(CLG_(get_current_thread)());
1422   else
1423     CLG_(forall_threads)(zero_thread_cost);
1424 
1425   if (VG_(clo_verbosity) > 1)
1426     VG_(message)(Vg_DebugMsg, "  ...done\n");
1427 }
1428 
1429 static
unwind_thread(thread_info * t)1430 void unwind_thread(thread_info* t)
1431 {
1432   /* unwind signal handlers */
1433   while(CLG_(current_state).sig !=0)
1434     CLG_(post_signal)(CLG_(current_tid),CLG_(current_state).sig);
1435 
1436   /* unwind regular call stack */
1437   while(CLG_(current_call_stack).sp>0)
1438     CLG_(pop_call_stack)();
1439 
1440   /* reset context and function stack for context generation */
1441   CLG_(init_exec_state)( &CLG_(current_state) );
1442   CLG_(current_fn_stack).top = CLG_(current_fn_stack).bottom;
1443 }
1444 
1445 static
zero_state_cost(thread_info * t)1446 void zero_state_cost(thread_info* t)
1447 {
1448     CLG_(zero_cost)( CLG_(sets).full, CLG_(current_state).cost );
1449 }
1450 
1451 /* Ups, this can go very wrong...
1452    FIXME: We should export this function or provide other means to get a handle */
1453 extern void VG_(discard_translations) ( Addr64 start, ULong range, const HChar* who );
1454 
CLG_(set_instrument_state)1455 void CLG_(set_instrument_state)(const HChar* reason, Bool state)
1456 {
1457   if (CLG_(instrument_state) == state) {
1458     CLG_DEBUG(2, "%s: instrumentation already %s\n",
1459 	     reason, state ? "ON" : "OFF");
1460     return;
1461   }
1462   CLG_(instrument_state) = state;
1463   CLG_DEBUG(2, "%s: Switching instrumentation %s ...\n",
1464 	   reason, state ? "ON" : "OFF");
1465 
1466   VG_(discard_translations)( (Addr64)0x1000, (ULong) ~0xfffl, "callgrind");
1467 
1468   /* reset internal state: call stacks, simulator */
1469   CLG_(forall_threads)(unwind_thread);
1470   CLG_(forall_threads)(zero_state_cost);
1471   (*CLG_(cachesim).clear)();
1472 
1473   if (VG_(clo_verbosity) > 1)
1474     VG_(message)(Vg_DebugMsg, "%s: instrumentation switched %s\n",
1475 		 reason, state ? "ON" : "OFF");
1476 }
1477 
1478 /* helper for dump_state_togdb */
dump_state_of_thread_togdb(thread_info * ti)1479 static void dump_state_of_thread_togdb(thread_info* ti)
1480 {
1481     static HChar buf[512];
1482     static FullCost sum = 0, tmp = 0;
1483     Int t, p, i;
1484     BBCC *from, *to;
1485     call_entry* ce;
1486 
1487     t = CLG_(current_tid);
1488     CLG_(init_cost_lz)( CLG_(sets).full, &sum );
1489     CLG_(copy_cost_lz)( CLG_(sets).full, &tmp, ti->lastdump_cost );
1490     CLG_(add_diff_cost)( CLG_(sets).full, sum, ti->lastdump_cost,
1491 			 ti->states.entry[0]->cost);
1492     CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost, tmp );
1493     CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), sum);
1494     VG_(gdb_printf)("events-%d: %s\n", t, buf);
1495     VG_(gdb_printf)("frames-%d: %d\n", t, CLG_(current_call_stack).sp);
1496 
1497     ce = 0;
1498     for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1499       ce = CLG_(get_call_entry)(i);
1500       /* if this frame is skipped, we don't have counters */
1501       if (!ce->jcc) continue;
1502 
1503       from = ce->jcc->from;
1504       VG_(gdb_printf)("function-%d-%d: %s\n",t, i, from->cxt->fn[0]->name);
1505       VG_(gdb_printf)("calls-%d-%d: %llu\n",t, i, ce->jcc->call_counter);
1506 
1507       /* FIXME: EventSets! */
1508       CLG_(copy_cost)( CLG_(sets).full, sum, ce->jcc->cost );
1509       CLG_(copy_cost)( CLG_(sets).full, tmp, ce->enter_cost );
1510       CLG_(add_diff_cost)( CLG_(sets).full, sum,
1511 			  ce->enter_cost, CLG_(current_state).cost );
1512       CLG_(copy_cost)( CLG_(sets).full, ce->enter_cost, tmp );
1513 
1514       p = VG_(sprintf)(buf, "events-%d-%d: ",t, i);
1515       CLG_(sprint_mappingcost)(buf + p, CLG_(dumpmap), sum );
1516       VG_(gdb_printf)("%s\n", buf);
1517     }
1518     if (ce && ce->jcc) {
1519       to = ce->jcc->to;
1520       VG_(gdb_printf)("function-%d-%d: %s\n",t, i, to->cxt->fn[0]->name );
1521     }
1522 }
1523 
1524 /* Dump current state */
dump_state_togdb(void)1525 static void dump_state_togdb(void)
1526 {
1527     static HChar buf[512];
1528     thread_info** th;
1529     int t, p;
1530     Int orig_tid = CLG_(current_tid);
1531 
1532     VG_(gdb_printf)("instrumentation: %s\n",
1533 		    CLG_(instrument_state) ? "on":"off");
1534     if (!CLG_(instrument_state)) return;
1535 
1536     VG_(gdb_printf)("executed-bbs: %llu\n", CLG_(stat).bb_executions);
1537     VG_(gdb_printf)("executed-calls: %llu\n", CLG_(stat).call_counter);
1538     VG_(gdb_printf)("distinct-bbs: %d\n", CLG_(stat).distinct_bbs);
1539     VG_(gdb_printf)("distinct-calls: %d\n", CLG_(stat).distinct_jccs);
1540     VG_(gdb_printf)("distinct-functions: %d\n", CLG_(stat).distinct_fns);
1541     VG_(gdb_printf)("distinct-contexts: %d\n", CLG_(stat).distinct_contexts);
1542 
1543     /* "events:" line. Given here because it will be dynamic in the future */
1544     p = VG_(sprintf)(buf, "events: ");
1545     CLG_(sprint_eventmapping)(buf+p, CLG_(dumpmap));
1546     VG_(gdb_printf)("%s\n", buf);
1547     /* "part:" line (number of last part. Is 0 at start */
1548     VG_(gdb_printf)("part: %d\n", CLG_(get_dump_counter)());
1549 
1550     /* threads */
1551     th = CLG_(get_threads)();
1552     p = VG_(sprintf)(buf, "threads:");
1553     for(t=1;t<VG_N_THREADS;t++) {
1554 	if (!th[t]) continue;
1555 	p += VG_(sprintf)(buf+p, " %d", t);
1556     }
1557     VG_(gdb_printf)("%s\n", buf);
1558     VG_(gdb_printf)("current-tid: %d\n", orig_tid);
1559     CLG_(forall_threads)(dump_state_of_thread_togdb);
1560 }
1561 
1562 
print_monitor_help(void)1563 static void print_monitor_help ( void )
1564 {
1565    VG_(gdb_printf) ("\n");
1566    VG_(gdb_printf) ("callgrind monitor commands:\n");
1567    VG_(gdb_printf) ("  dump [<dump_hint>]\n");
1568    VG_(gdb_printf) ("        dump counters\n");
1569    VG_(gdb_printf) ("  zero\n");
1570    VG_(gdb_printf) ("        zero counters\n");
1571    VG_(gdb_printf) ("  status\n");
1572    VG_(gdb_printf) ("        print status\n");
1573    VG_(gdb_printf) ("  instrumentation [on|off]\n");
1574    VG_(gdb_printf) ("        get/set (if on/off given) instrumentation state\n");
1575    VG_(gdb_printf) ("\n");
1576 }
1577 
1578 /* return True if request recognised, False otherwise */
handle_gdb_monitor_command(ThreadId tid,const HChar * req)1579 static Bool handle_gdb_monitor_command (ThreadId tid, const HChar *req)
1580 {
1581    HChar* wcmd;
1582    HChar s[VG_(strlen(req)) + 1]; /* copy for strtok_r */
1583    HChar *ssaveptr;
1584 
1585    VG_(strcpy) (s, req);
1586 
1587    wcmd = VG_(strtok_r) (s, " ", &ssaveptr);
1588    switch (VG_(keyword_id) ("help dump zero status instrumentation",
1589                             wcmd, kwd_report_duplicated_matches)) {
1590    case -2: /* multiple matches */
1591       return True;
1592    case -1: /* not found */
1593       return False;
1594    case  0: /* help */
1595       print_monitor_help();
1596       return True;
1597    case  1: { /* dump */
1598       CLG_(dump_profile)(req, False);
1599       return True;
1600    }
1601    case  2: { /* zero */
1602       CLG_(zero_all_cost)(False);
1603       return True;
1604    }
1605 
1606    case 3: { /* status */
1607      HChar* arg = VG_(strtok_r) (0, " ", &ssaveptr);
1608      if (arg && (VG_(strcmp)(arg, "internal") == 0)) {
1609        /* internal interface to callgrind_control */
1610        dump_state_togdb();
1611        return True;
1612      }
1613 
1614      if (!CLG_(instrument_state)) {
1615        VG_(gdb_printf)("No status available as instrumentation is switched off\n");
1616      } else {
1617        // Status information to be improved ...
1618        thread_info** th = CLG_(get_threads)();
1619        Int t, tcount = 0;
1620        for(t=1;t<VG_N_THREADS;t++)
1621 	 if (th[t]) tcount++;
1622        VG_(gdb_printf)("%d thread(s) running.\n", tcount);
1623      }
1624      return True;
1625    }
1626 
1627    case 4: { /* instrumentation */
1628      HChar* arg = VG_(strtok_r) (0, " ", &ssaveptr);
1629      if (!arg) {
1630        VG_(gdb_printf)("instrumentation: %s\n",
1631 		       CLG_(instrument_state) ? "on":"off");
1632      }
1633      else
1634        CLG_(set_instrument_state)("Command", VG_(strcmp)(arg,"off")!=0);
1635      return True;
1636    }
1637 
1638    default:
1639       tl_assert(0);
1640       return False;
1641    }
1642 }
1643 
1644 static
CLG_(handle_client_request)1645 Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
1646 {
1647    if (!VG_IS_TOOL_USERREQ('C','T',args[0])
1648        && VG_USERREQ__GDB_MONITOR_COMMAND   != args[0])
1649       return False;
1650 
1651    switch(args[0]) {
1652    case VG_USERREQ__DUMP_STATS:
1653       CLG_(dump_profile)("Client Request", True);
1654       *ret = 0;                 /* meaningless */
1655       break;
1656 
1657    case VG_USERREQ__DUMP_STATS_AT:
1658      {
1659        HChar buf[512];
1660        VG_(sprintf)(buf,"Client Request: %s", (HChar*)args[1]);
1661        CLG_(dump_profile)(buf, True);
1662        *ret = 0;                 /* meaningless */
1663      }
1664      break;
1665 
1666    case VG_USERREQ__ZERO_STATS:
1667      CLG_(zero_all_cost)(True);
1668       *ret = 0;                 /* meaningless */
1669       break;
1670 
1671    case VG_USERREQ__TOGGLE_COLLECT:
1672      CLG_(current_state).collect = !CLG_(current_state).collect;
1673      CLG_DEBUG(2, "Client Request: toggled collection state to %s\n",
1674 	      CLG_(current_state).collect ? "ON" : "OFF");
1675      *ret = 0;                 /* meaningless */
1676      break;
1677 
1678    case VG_USERREQ__START_INSTRUMENTATION:
1679      CLG_(set_instrument_state)("Client Request", True);
1680      *ret = 0;                 /* meaningless */
1681      break;
1682 
1683    case VG_USERREQ__STOP_INSTRUMENTATION:
1684      CLG_(set_instrument_state)("Client Request", False);
1685      *ret = 0;                 /* meaningless */
1686      break;
1687 
1688    case VG_USERREQ__GDB_MONITOR_COMMAND: {
1689       Bool handled = handle_gdb_monitor_command (tid, (HChar*)args[1]);
1690       if (handled)
1691          *ret = 1;
1692       else
1693          *ret = 0;
1694       return handled;
1695    }
1696    default:
1697       return False;
1698    }
1699 
1700    return True;
1701 }
1702 
1703 
1704 /* Syscall Timing */
1705 
1706 /* struct timeval syscalltime[VG_N_THREADS]; */
1707 #if CLG_MICROSYSTIME
1708 #include <sys/time.h>
1709 #include <sys/syscall.h>
1710 extern Int VG_(do_syscall) ( UInt, ... );
1711 
1712 ULong syscalltime[VG_N_THREADS];
1713 #else
1714 UInt syscalltime[VG_N_THREADS];
1715 #endif
1716 
1717 static
CLG_(pre_syscalltime)1718 void CLG_(pre_syscalltime)(ThreadId tid, UInt syscallno,
1719                            UWord* args, UInt nArgs)
1720 {
1721   if (CLG_(clo).collect_systime) {
1722 #if CLG_MICROSYSTIME
1723     struct vki_timeval tv_now;
1724     VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
1725     syscalltime[tid] = tv_now.tv_sec * 1000000ULL + tv_now.tv_usec;
1726 #else
1727     syscalltime[tid] = VG_(read_millisecond_timer)();
1728 #endif
1729   }
1730 }
1731 
1732 static
CLG_(post_syscalltime)1733 void CLG_(post_syscalltime)(ThreadId tid, UInt syscallno,
1734                             UWord* args, UInt nArgs, SysRes res)
1735 {
1736   if (CLG_(clo).collect_systime &&
1737       CLG_(current_state).bbcc) {
1738       Int o;
1739 #if CLG_MICROSYSTIME
1740     struct vki_timeval tv_now;
1741     ULong diff;
1742 
1743     VG_(do_syscall)(__NR_gettimeofday, (UInt)&tv_now, (UInt)NULL);
1744     diff = (tv_now.tv_sec * 1000000ULL + tv_now.tv_usec) - syscalltime[tid];
1745 #else
1746     UInt diff = VG_(read_millisecond_timer)() - syscalltime[tid];
1747 #endif
1748 
1749     /* offset o is for "SysCount", o+1 for "SysTime" */
1750     o = fullOffset(EG_SYS);
1751     CLG_ASSERT(o>=0);
1752     CLG_DEBUG(0,"   Time (Off %d) for Syscall %d: %ull\n", o, syscallno, diff);
1753 
1754     CLG_(current_state).cost[o] ++;
1755     CLG_(current_state).cost[o+1] += diff;
1756     if (!CLG_(current_state).bbcc->skipped)
1757       CLG_(init_cost_lz)(CLG_(sets).full,
1758 			&(CLG_(current_state).bbcc->skipped));
1759     CLG_(current_state).bbcc->skipped[o] ++;
1760     CLG_(current_state).bbcc->skipped[o+1] += diff;
1761   }
1762 }
1763 
ULong_width(ULong n)1764 static UInt ULong_width(ULong n)
1765 {
1766    UInt w = 0;
1767    while (n > 0) {
1768       n = n / 10;
1769       w++;
1770    }
1771    if (w == 0) w = 1;
1772    return w + (w-1)/3;   // add space for commas
1773 }
1774 
1775 static
branchsim_printstat(int l1,int l2,int l3)1776 void branchsim_printstat(int l1, int l2, int l3)
1777 {
1778     static HChar buf1[128], buf2[128], buf3[128];
1779     static HChar fmt[128];
1780     FullCost total;
1781     ULong Bc_total_b, Bc_total_mp, Bi_total_b, Bi_total_mp;
1782     ULong B_total_b, B_total_mp;
1783 
1784     total = CLG_(total_cost);
1785     Bc_total_b  = total[ fullOffset(EG_BC)   ];
1786     Bc_total_mp = total[ fullOffset(EG_BC)+1 ];
1787     Bi_total_b  = total[ fullOffset(EG_BI)   ];
1788     Bi_total_mp = total[ fullOffset(EG_BI)+1 ];
1789 
1790     /* Make format string, getting width right for numbers */
1791     VG_(sprintf)(fmt, "%%s %%,%dllu  (%%,%dllu cond + %%,%dllu ind)\n",
1792                  l1, l2, l3);
1793 
1794     if (0 == Bc_total_b)  Bc_total_b = 1;
1795     if (0 == Bi_total_b)  Bi_total_b = 1;
1796     B_total_b  = Bc_total_b  + Bi_total_b;
1797     B_total_mp = Bc_total_mp + Bi_total_mp;
1798 
1799     VG_(umsg)("\n");
1800     VG_(umsg)(fmt, "Branches:     ",
1801               B_total_b, Bc_total_b, Bi_total_b);
1802 
1803     VG_(umsg)(fmt, "Mispredicts:  ",
1804               B_total_mp, Bc_total_mp, Bi_total_mp);
1805 
1806     VG_(percentify)(B_total_mp,  B_total_b,  1, l1+1, buf1);
1807     VG_(percentify)(Bc_total_mp, Bc_total_b, 1, l2+1, buf2);
1808     VG_(percentify)(Bi_total_mp, Bi_total_b, 1, l3+1, buf3);
1809 
1810     VG_(umsg)("Mispred rate:  %s (%s     + %s   )\n", buf1, buf2,buf3);
1811 }
1812 
1813 static
clg_print_stats(void)1814 void clg_print_stats(void)
1815 {
1816    int BB_lookups =
1817      CLG_(stat).full_debug_BBs +
1818      CLG_(stat).fn_name_debug_BBs +
1819      CLG_(stat).file_line_debug_BBs +
1820      CLG_(stat).no_debug_BBs;
1821 
1822    /* Hash table stats */
1823    VG_(message)(Vg_DebugMsg, "Distinct objects: %d\n",
1824 		CLG_(stat).distinct_objs);
1825    VG_(message)(Vg_DebugMsg, "Distinct files:   %d\n",
1826 		CLG_(stat).distinct_files);
1827    VG_(message)(Vg_DebugMsg, "Distinct fns:     %d\n",
1828 		CLG_(stat).distinct_fns);
1829    VG_(message)(Vg_DebugMsg, "Distinct contexts:%d\n",
1830 		CLG_(stat).distinct_contexts);
1831    VG_(message)(Vg_DebugMsg, "Distinct BBs:     %d\n",
1832 		CLG_(stat).distinct_bbs);
1833    VG_(message)(Vg_DebugMsg, "Cost entries:     %d (Chunks %d)\n",
1834 		CLG_(costarray_entries), CLG_(costarray_chunks));
1835    VG_(message)(Vg_DebugMsg, "Distinct BBCCs:   %d\n",
1836 		CLG_(stat).distinct_bbccs);
1837    VG_(message)(Vg_DebugMsg, "Distinct JCCs:    %d\n",
1838 		CLG_(stat).distinct_jccs);
1839    VG_(message)(Vg_DebugMsg, "Distinct skips:   %d\n",
1840 		CLG_(stat).distinct_skips);
1841    VG_(message)(Vg_DebugMsg, "BB lookups:       %d\n",
1842 		BB_lookups);
1843    if (BB_lookups>0) {
1844       VG_(message)(Vg_DebugMsg, "With full      debug info:%3d%% (%d)\n",
1845 		   CLG_(stat).full_debug_BBs    * 100 / BB_lookups,
1846 		   CLG_(stat).full_debug_BBs);
1847       VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)\n",
1848 		   CLG_(stat).file_line_debug_BBs * 100 / BB_lookups,
1849 		   CLG_(stat).file_line_debug_BBs);
1850       VG_(message)(Vg_DebugMsg, "With fn name   debug info:%3d%% (%d)\n",
1851 		   CLG_(stat).fn_name_debug_BBs * 100 / BB_lookups,
1852 		   CLG_(stat).fn_name_debug_BBs);
1853       VG_(message)(Vg_DebugMsg, "With no        debug info:%3d%% (%d)\n",
1854 		   CLG_(stat).no_debug_BBs      * 100 / BB_lookups,
1855 		   CLG_(stat).no_debug_BBs);
1856    }
1857    VG_(message)(Vg_DebugMsg, "BBCC Clones:       %d\n",
1858 		CLG_(stat).bbcc_clones);
1859    VG_(message)(Vg_DebugMsg, "BBs Retranslated:  %d\n",
1860 		CLG_(stat).bb_retranslations);
1861    VG_(message)(Vg_DebugMsg, "Distinct instrs:   %d\n",
1862 		CLG_(stat).distinct_instrs);
1863    VG_(message)(Vg_DebugMsg, "");
1864 
1865    VG_(message)(Vg_DebugMsg, "LRU Contxt Misses: %d\n",
1866 		CLG_(stat).cxt_lru_misses);
1867    VG_(message)(Vg_DebugMsg, "LRU BBCC Misses:   %d\n",
1868 		CLG_(stat).bbcc_lru_misses);
1869    VG_(message)(Vg_DebugMsg, "LRU JCC Misses:    %d\n",
1870 		CLG_(stat).jcc_lru_misses);
1871    VG_(message)(Vg_DebugMsg, "BBs Executed:      %llu\n",
1872 		CLG_(stat).bb_executions);
1873    VG_(message)(Vg_DebugMsg, "Calls:             %llu\n",
1874 		CLG_(stat).call_counter);
1875    VG_(message)(Vg_DebugMsg, "CondJMP followed:  %llu\n",
1876 		CLG_(stat).jcnd_counter);
1877    VG_(message)(Vg_DebugMsg, "Boring JMPs:       %llu\n",
1878 		CLG_(stat).jump_counter);
1879    VG_(message)(Vg_DebugMsg, "Recursive calls:   %llu\n",
1880 		CLG_(stat).rec_call_counter);
1881    VG_(message)(Vg_DebugMsg, "Returns:           %llu\n",
1882 		CLG_(stat).ret_counter);
1883 }
1884 
1885 
1886 static
finish(void)1887 void finish(void)
1888 {
1889   HChar buf[32+COSTS_LEN];
1890   HChar fmt[128];
1891   Int l1, l2, l3;
1892   FullCost total;
1893 
1894   CLG_DEBUG(0, "finish()\n");
1895 
1896   (*CLG_(cachesim).finish)();
1897 
1898   /* pop all remaining items from CallStack for correct sum
1899    */
1900   CLG_(forall_threads)(unwind_thread);
1901 
1902   CLG_(dump_profile)(0, False);
1903 
1904   if (VG_(clo_verbosity) == 0) return;
1905 
1906   if (VG_(clo_stats)) {
1907     VG_(message)(Vg_DebugMsg, "\n");
1908     clg_print_stats();
1909     VG_(message)(Vg_DebugMsg, "\n");
1910   }
1911 
1912   CLG_(sprint_eventmapping)(buf, CLG_(dumpmap));
1913   VG_(message)(Vg_UserMsg, "Events    : %s\n", buf);
1914   CLG_(sprint_mappingcost)(buf, CLG_(dumpmap), CLG_(total_cost));
1915   VG_(message)(Vg_UserMsg, "Collected : %s\n", buf);
1916   VG_(message)(Vg_UserMsg, "\n");
1917 
1918   /* determine value widths for statistics */
1919   total = CLG_(total_cost);
1920   l1 = ULong_width( total[fullOffset(EG_IR)] );
1921   l2 = l3 = 0;
1922   if (CLG_(clo).simulate_cache) {
1923       l2 = ULong_width( total[fullOffset(EG_DR)] );
1924       l3 = ULong_width( total[fullOffset(EG_DW)] );
1925   }
1926   if (CLG_(clo).simulate_branch) {
1927       int l2b = ULong_width( total[fullOffset(EG_BC)] );
1928       int l3b = ULong_width( total[fullOffset(EG_BI)] );
1929       if (l2b > l2) l2 = l2b;
1930       if (l3b > l3) l3 = l3b;
1931   }
1932 
1933   /* Make format string, getting width right for numbers */
1934   VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1935 
1936   /* Always print this */
1937   VG_(umsg)(fmt, "I   refs:     ", total[fullOffset(EG_IR)] );
1938 
1939   if (CLG_(clo).simulate_cache)
1940       (*CLG_(cachesim).printstat)(l1, l2, l3);
1941 
1942   if (CLG_(clo).simulate_branch)
1943       branchsim_printstat(l1, l2, l3);
1944 
1945 }
1946 
1947 
CLG_(fini)1948 void CLG_(fini)(Int exitcode)
1949 {
1950   finish();
1951 }
1952 
1953 
1954 /*--------------------------------------------------------------------*/
1955 /*--- Setup                                                        ---*/
1956 /*--------------------------------------------------------------------*/
1957 
clg_start_client_code_callback(ThreadId tid,ULong blocks_done)1958 static void clg_start_client_code_callback ( ThreadId tid, ULong blocks_done )
1959 {
1960    static ULong last_blocks_done = 0;
1961 
1962    if (0)
1963       VG_(printf)("%d R %llu\n", (Int)tid, blocks_done);
1964 
1965    /* throttle calls to CLG_(run_thread) by number of BBs executed */
1966    if (blocks_done - last_blocks_done < 5000) return;
1967    last_blocks_done = blocks_done;
1968 
1969    CLG_(run_thread)( tid );
1970 }
1971 
1972 static
CLG_(post_clo_init)1973 void CLG_(post_clo_init)(void)
1974 {
1975    if (VG_(clo_vex_control).iropt_register_updates
1976        != VexRegUpdSpAtMemAccess) {
1977       CLG_DEBUG(1, " Using user specified value for "
1978                 "--vex-iropt-register-updates\n");
1979    } else {
1980       CLG_DEBUG(1,
1981                 " Using default --vex-iropt-register-updates="
1982                 "sp-at-mem-access\n");
1983    }
1984 
1985    if (VG_(clo_vex_control).iropt_unroll_thresh != 0) {
1986       VG_(message)(Vg_UserMsg,
1987                    "callgrind only works with --vex-iropt-unroll-thresh=0\n"
1988                    "=> resetting it back to 0\n");
1989       VG_(clo_vex_control).iropt_unroll_thresh = 0;   // cannot be overriden.
1990    }
1991    if (VG_(clo_vex_control).guest_chase_thresh != 0) {
1992       VG_(message)(Vg_UserMsg,
1993                    "callgrind only works with --vex-guest-chase-thresh=0\n"
1994                    "=> resetting it back to 0\n");
1995       VG_(clo_vex_control).guest_chase_thresh = 0; // cannot be overriden.
1996    }
1997 
1998    CLG_DEBUG(1, "  dump threads: %s\n", CLG_(clo).separate_threads ? "Yes":"No");
1999    CLG_DEBUG(1, "  call sep. : %d\n", CLG_(clo).separate_callers);
2000    CLG_DEBUG(1, "  rec. sep. : %d\n", CLG_(clo).separate_recursions);
2001 
2002    if (!CLG_(clo).dump_line && !CLG_(clo).dump_instr && !CLG_(clo).dump_bb) {
2003        VG_(message)(Vg_UserMsg, "Using source line as position.\n");
2004        CLG_(clo).dump_line = True;
2005    }
2006 
2007    CLG_(init_dumps)();
2008 
2009    (*CLG_(cachesim).post_clo_init)();
2010 
2011    CLG_(init_eventsets)();
2012    CLG_(init_statistics)(& CLG_(stat));
2013    CLG_(init_cost_lz)( CLG_(sets).full, &CLG_(total_cost) );
2014 
2015    /* initialize hash tables */
2016    CLG_(init_obj_table)();
2017    CLG_(init_cxt_table)();
2018    CLG_(init_bb_hash)();
2019 
2020    CLG_(init_threads)();
2021    CLG_(run_thread)(1);
2022 
2023    CLG_(instrument_state) = CLG_(clo).instrument_atstart;
2024 
2025    if (VG_(clo_verbosity > 0)) {
2026       VG_(message)(Vg_UserMsg,
2027                    "For interactive control, run 'callgrind_control%s%s -h'.\n",
2028                    (VG_(arg_vgdb_prefix) ? " " : ""),
2029                    (VG_(arg_vgdb_prefix) ? VG_(arg_vgdb_prefix) : ""));
2030    }
2031 }
2032 
2033 static
CLG_(pre_clo_init)2034 void CLG_(pre_clo_init)(void)
2035 {
2036     VG_(details_name)            ("Callgrind");
2037     VG_(details_version)         (NULL);
2038     VG_(details_description)     ("a call-graph generating cache profiler");
2039     VG_(details_copyright_author)("Copyright (C) 2002-2013, and GNU GPL'd, "
2040 				  "by Josef Weidendorfer et al.");
2041     VG_(details_bug_reports_to)  (VG_BUGS_TO);
2042     VG_(details_avg_translation_sizeB) ( 500 );
2043 
2044     VG_(clo_vex_control).iropt_register_updates
2045        = VexRegUpdSpAtMemAccess; // overridable by the user.
2046     VG_(clo_vex_control).iropt_unroll_thresh = 0;   // cannot be overriden.
2047     VG_(clo_vex_control).guest_chase_thresh = 0;    // cannot be overriden.
2048 
2049     VG_(basic_tool_funcs)        (CLG_(post_clo_init),
2050                                   CLG_(instrument),
2051                                   CLG_(fini));
2052 
2053     VG_(needs_superblock_discards)(clg_discard_superblock_info);
2054 
2055 
2056     VG_(needs_command_line_options)(CLG_(process_cmd_line_option),
2057 				    CLG_(print_usage),
2058 				    CLG_(print_debug_usage));
2059 
2060     VG_(needs_client_requests)(CLG_(handle_client_request));
2061     VG_(needs_print_stats)    (clg_print_stats);
2062     VG_(needs_syscall_wrapper)(CLG_(pre_syscalltime),
2063 			       CLG_(post_syscalltime));
2064 
2065     VG_(track_start_client_code)  ( & clg_start_client_code_callback );
2066     VG_(track_pre_deliver_signal) ( & CLG_(pre_signal) );
2067     VG_(track_post_deliver_signal)( & CLG_(post_signal) );
2068 
2069     CLG_(set_clo_defaults)();
2070 }
2071 
2072 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init))
2073 
2074 /*--------------------------------------------------------------------*/
2075 /*--- end                                                   main.c ---*/
2076 /*--------------------------------------------------------------------*/
2077