• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- An example Valgrind tool.                          lk_main.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Lackey, an example Valgrind tool that does
8    some simple program measurement and tracing.
9 
10    Copyright (C) 2002-2011 Nicholas Nethercote
11       njn@valgrind.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 // This tool shows how to do some basic instrumentation.
32 //
33 // There are four kinds of instrumentation it can do.  They can be turned
34 // on/off independently with command line options:
35 //
36 // * --basic-counts   : do basic counts, eg. number of instructions
37 //                      executed, jumps executed, etc.
38 // * --detailed-counts: do more detailed counts:  number of loads, stores
39 //                      and ALU operations of different sizes.
40 // * --trace-mem=yes:   trace all (data) memory accesses.
41 // * --trace-superblocks=yes:
42 //                      trace all superblock entries.  Mostly of interest
43 //                      to the Valgrind developers.
44 //
45 // The code for each kind of instrumentation is guarded by a clo_* variable:
46 // clo_basic_counts, clo_detailed_counts, clo_trace_mem and clo_trace_sbs.
47 //
48 // If you want to modify any of the instrumentation code, look for the code
49 // that is guarded by the relevant clo_* variable (eg. clo_trace_mem)
50 // If you're not interested in the other kinds of instrumentation you can
51 // remove them.  If you want to do more complex modifications, please read
52 // VEX/pub/libvex_ir.h to understand the intermediate representation.
53 //
54 //
55 // Specific Details about --trace-mem=yes
56 // --------------------------------------
57 // Lackey's --trace-mem code is a good starting point for building Valgrind
58 // tools that act on memory loads and stores.  It also could be used as is,
59 // with its output used as input to a post-mortem processing step.  However,
60 // because memory traces can be very large, online analysis is generally
61 // better.
62 //
63 // It prints memory data access traces that look like this:
64 //
65 //   I  0023C790,2  # instruction read at 0x0023C790 of size 2
66 //   I  0023C792,5
67 //    S BE80199C,4  # data store at 0xBE80199C of size 4
68 //   I  0025242B,3
69 //    L BE801950,4  # data load at 0xBE801950 of size 4
70 //   I  0023D476,7
71 //    M 0025747C,1  # data modify at 0x0025747C of size 1
72 //   I  0023DC20,2
73 //    L 00254962,1
74 //    L BE801FB3,1
75 //   I  00252305,1
76 //    L 00254AEB,1
77 //    S 00257998,1
78 //
79 // Every instruction executed has an "instr" event representing it.
80 // Instructions that do memory accesses are followed by one or more "load",
81 // "store" or "modify" events.  Some instructions do more than one load or
82 // store, as in the last two examples in the above trace.
83 //
84 // Here are some examples of x86 instructions that do different combinations
85 // of loads, stores, and modifies.
86 //
87 //    Instruction          Memory accesses                  Event sequence
88 //    -----------          ---------------                  --------------
89 //    add %eax, %ebx       No loads or stores               instr
90 //
91 //    movl (%eax), %ebx    loads (%eax)                     instr, load
92 //
93 //    movl %eax, (%ebx)    stores (%ebx)                    instr, store
94 //
95 //    incl (%ecx)          modifies (%ecx)                  instr, modify
96 //
97 //    cmpsb                loads (%esi), loads(%edi)        instr, load, load
98 //
99 //    call*l (%edx)        loads (%edx), stores -4(%esp)    instr, load, store
100 //    pushl (%edx)         loads (%edx), stores -4(%esp)    instr, load, store
101 //    movsw                loads (%esi), stores (%edi)      instr, load, store
102 //
103 // Instructions using x86 "rep" prefixes are traced as if they are repeated
104 // N times.
105 //
106 // Lackey with --trace-mem gives good traces, but they are not perfect, for
107 // the following reasons:
108 //
109 // - It does not trace into the OS kernel, so system calls and other kernel
110 //   operations (eg. some scheduling and signal handling code) are ignored.
111 //
112 // - It could model loads and stores done at the system call boundary using
113 //   the pre_mem_read/post_mem_write events.  For example, if you call
114 //   fstat() you know that the passed in buffer has been written.  But it
115 //   currently does not do this.
116 //
117 // - Valgrind replaces some code (not much) with its own, notably parts of
118 //   code for scheduling operations and signal handling.  This code is not
119 //   traced.
120 //
121 // - There is no consideration of virtual-to-physical address mapping.
122 //   This may not matter for many purposes.
123 //
124 // - Valgrind modifies the instruction stream in some very minor ways.  For
125 //   example, on x86 the bts, btc, btr instructions are incorrectly
126 //   considered to always touch memory (this is a consequence of these
127 //   instructions being very difficult to simulate).
128 //
129 // - Valgrind tools layout memory differently to normal programs, so the
130 //   addresses you get will not be typical.  Thus Lackey (and all Valgrind
131 //   tools) is suitable for getting relative memory traces -- eg. if you
132 //   want to analyse locality of memory accesses -- but is not good if
133 //   absolute addresses are important.
134 //
135 // Despite all these warnings, Lackey's results should be good enough for a
136 // wide range of purposes.  For example, Cachegrind shares all the above
137 // shortcomings and it is still useful.
138 //
139 // For further inspiration, you should look at cachegrind/cg_main.c which
140 // uses the same basic technique for tracing memory accesses, but also groups
141 // events together for processing into twos and threes so that fewer C calls
142 // are made and things run faster.
143 //
144 // Specific Details about --trace-superblocks=yes
145 // ----------------------------------------------
146 // Valgrind splits code up into single entry, multiple exit blocks
147 // known as superblocks.  By itself, --trace-superblocks=yes just
148 // prints a message as each superblock is run:
149 //
150 //  SB 04013170
151 //  SB 04013177
152 //  SB 04013173
153 //  SB 04013177
154 //
155 // The hex number is the address of the first instruction in the
156 // superblock.  You can see the relationship more obviously if you use
157 // --trace-superblocks=yes and --trace-mem=yes together.  Then a "SB"
158 // message at address X is immediately followed by an "instr:" message
159 // for that address, as the first instruction in the block is
160 // executed, for example:
161 //
162 //  SB 04014073
163 //  I  04014073,3
164 //   L 7FEFFF7F8,8
165 //  I  04014076,4
166 //  I  0401407A,3
167 //  I  0401407D,3
168 //  I  04014080,3
169 //  I  04014083,6
170 
171 
172 #include "pub_tool_basics.h"
173 #include "pub_tool_tooliface.h"
174 #include "pub_tool_libcassert.h"
175 #include "pub_tool_libcprint.h"
176 #include "pub_tool_debuginfo.h"
177 #include "pub_tool_libcbase.h"
178 #include "pub_tool_options.h"
179 #include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
180 
181 /*------------------------------------------------------------*/
182 /*--- Command line options                                 ---*/
183 /*------------------------------------------------------------*/
184 
185 /* Command line options controlling instrumentation kinds, as described at
186  * the top of this file. */
187 static Bool clo_basic_counts    = True;
188 static Bool clo_detailed_counts = False;
189 static Bool clo_trace_mem       = False;
190 static Bool clo_trace_sbs       = False;
191 
192 /* The name of the function of which the number of calls (under
193  * --basic-counts=yes) is to be counted, with default. Override with command
194  * line option --fnname. */
195 static Char* clo_fnname = "main";
196 
lk_process_cmd_line_option(Char * arg)197 static Bool lk_process_cmd_line_option(Char* arg)
198 {
199    if VG_STR_CLO(arg, "--fnname", clo_fnname) {}
200    else if VG_BOOL_CLO(arg, "--basic-counts",      clo_basic_counts) {}
201    else if VG_BOOL_CLO(arg, "--detailed-counts",   clo_detailed_counts) {}
202    else if VG_BOOL_CLO(arg, "--trace-mem",         clo_trace_mem) {}
203    else if VG_BOOL_CLO(arg, "--trace-superblocks", clo_trace_sbs) {}
204    else
205       return False;
206 
207    tl_assert(clo_fnname);
208    tl_assert(clo_fnname[0]);
209    return True;
210 }
211 
lk_print_usage(void)212 static void lk_print_usage(void)
213 {
214    VG_(printf)(
215 "    --basic-counts=no|yes     count instructions, jumps, etc. [yes]\n"
216 "    --detailed-counts=no|yes  count loads, stores and alu ops [no]\n"
217 "    --trace-mem=no|yes        trace all loads and stores [no]\n"
218 "    --trace-superblocks=no|yes  trace all superblock entries [no]\n"
219 "    --fnname=<name>           count calls to <name> (only used if\n"
220 "                              --basic-count=yes)  [main]\n"
221    );
222 }
223 
lk_print_debug_usage(void)224 static void lk_print_debug_usage(void)
225 {
226    VG_(printf)(
227 "    (none)\n"
228    );
229 }
230 
231 /*------------------------------------------------------------*/
232 /*--- Stuff for --basic-counts                             ---*/
233 /*------------------------------------------------------------*/
234 
235 /* Nb: use ULongs because the numbers can get very big */
236 static ULong n_func_calls    = 0;
237 static ULong n_SBs_entered   = 0;
238 static ULong n_SBs_completed = 0;
239 static ULong n_IRStmts       = 0;
240 static ULong n_guest_instrs  = 0;
241 static ULong n_Jccs          = 0;
242 static ULong n_Jccs_untaken  = 0;
243 static ULong n_IJccs         = 0;
244 static ULong n_IJccs_untaken = 0;
245 
add_one_func_call(void)246 static void add_one_func_call(void)
247 {
248    n_func_calls++;
249 }
250 
add_one_SB_entered(void)251 static void add_one_SB_entered(void)
252 {
253    n_SBs_entered++;
254 }
255 
add_one_SB_completed(void)256 static void add_one_SB_completed(void)
257 {
258    n_SBs_completed++;
259 }
260 
add_one_IRStmt(void)261 static void add_one_IRStmt(void)
262 {
263    n_IRStmts++;
264 }
265 
add_one_guest_instr(void)266 static void add_one_guest_instr(void)
267 {
268    n_guest_instrs++;
269 }
270 
add_one_Jcc(void)271 static void add_one_Jcc(void)
272 {
273    n_Jccs++;
274 }
275 
add_one_Jcc_untaken(void)276 static void add_one_Jcc_untaken(void)
277 {
278    n_Jccs_untaken++;
279 }
280 
add_one_inverted_Jcc(void)281 static void add_one_inverted_Jcc(void)
282 {
283    n_IJccs++;
284 }
285 
add_one_inverted_Jcc_untaken(void)286 static void add_one_inverted_Jcc_untaken(void)
287 {
288    n_IJccs_untaken++;
289 }
290 
291 /*------------------------------------------------------------*/
292 /*--- Stuff for --detailed-counts                          ---*/
293 /*------------------------------------------------------------*/
294 
295 /* --- Operations --- */
296 
297 typedef enum { OpLoad=0, OpStore=1, OpAlu=2 } Op;
298 
299 #define N_OPS 3
300 
301 
302 /* --- Types --- */
303 
304 #define N_TYPES 10
305 
type2index(IRType ty)306 static Int type2index ( IRType ty )
307 {
308    switch (ty) {
309       case Ity_I1:      return 0;
310       case Ity_I8:      return 1;
311       case Ity_I16:     return 2;
312       case Ity_I32:     return 3;
313       case Ity_I64:     return 4;
314       case Ity_I128:    return 5;
315       case Ity_F32:     return 6;
316       case Ity_F64:     return 7;
317       case Ity_F128:    return 8;
318       case Ity_V128:    return 9;
319       default: tl_assert(0);
320    }
321 }
322 
nameOfTypeIndex(Int i)323 static HChar* nameOfTypeIndex ( Int i )
324 {
325    switch (i) {
326       case 0: return "I1";   break;
327       case 1: return "I8";   break;
328       case 2: return "I16";  break;
329       case 3: return "I32";  break;
330       case 4: return "I64";  break;
331       case 5: return "I128"; break;
332       case 6: return "F32";  break;
333       case 7: return "F64";  break;
334       case 8: return "F128";  break;
335       case 9: return "V128"; break;
336       default: tl_assert(0);
337    }
338 }
339 
340 
341 /* --- Counts --- */
342 
343 static ULong detailCounts[N_OPS][N_TYPES];
344 
345 /* The helper that is called from the instrumented code. */
346 static VG_REGPARM(1)
increment_detail(ULong * detail)347 void increment_detail(ULong* detail)
348 {
349    (*detail)++;
350 }
351 
352 /* A helper that adds the instrumentation for a detail. */
instrument_detail(IRSB * sb,Op op,IRType type)353 static void instrument_detail(IRSB* sb, Op op, IRType type)
354 {
355    IRDirty* di;
356    IRExpr** argv;
357    const UInt typeIx = type2index(type);
358 
359    tl_assert(op < N_OPS);
360    tl_assert(typeIx < N_TYPES);
361 
362    argv = mkIRExprVec_1( mkIRExpr_HWord( (HWord)&detailCounts[op][typeIx] ) );
363    di = unsafeIRDirty_0_N( 1, "increment_detail",
364                               VG_(fnptr_to_fnentry)( &increment_detail ),
365                               argv);
366    addStmtToIRSB( sb, IRStmt_Dirty(di) );
367 }
368 
369 /* Summarize and print the details. */
print_details(void)370 static void print_details ( void )
371 {
372    Int typeIx;
373    VG_(umsg)("   Type        Loads       Stores       AluOps\n");
374    VG_(umsg)("   -------------------------------------------\n");
375    for (typeIx = 0; typeIx < N_TYPES; typeIx++) {
376       VG_(umsg)("   %4s %'12llu %'12llu %'12llu\n",
377                 nameOfTypeIndex( typeIx ),
378                 detailCounts[OpLoad ][typeIx],
379                 detailCounts[OpStore][typeIx],
380                 detailCounts[OpAlu  ][typeIx]
381       );
382    }
383 }
384 
385 
386 /*------------------------------------------------------------*/
387 /*--- Stuff for --trace-mem                                ---*/
388 /*------------------------------------------------------------*/
389 
390 #define MAX_DSIZE    512
391 
392 typedef
393    IRExpr
394    IRAtom;
395 
396 typedef
397    enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm }
398    EventKind;
399 
400 typedef
401    struct {
402       EventKind  ekind;
403       IRAtom*    addr;
404       Int        size;
405    }
406    Event;
407 
408 /* Up to this many unnotified events are allowed.  Must be at least two,
409    so that reads and writes to the same address can be merged into a modify.
410    Beyond that, larger numbers just potentially induce more spilling due to
411    extending live ranges of address temporaries. */
412 #define N_EVENTS 4
413 
414 /* Maintain an ordered list of memory events which are outstanding, in
415    the sense that no IR has yet been generated to do the relevant
416    helper calls.  The SB is scanned top to bottom and memory events
417    are added to the end of the list, merging with the most recent
418    notified event where possible (Dw immediately following Dr and
419    having the same size and EA can be merged).
420 
421    This merging is done so that for architectures which have
422    load-op-store instructions (x86, amd64), the instr is treated as if
423    it makes just one memory reference (a modify), rather than two (a
424    read followed by a write at the same address).
425 
426    At various points the list will need to be flushed, that is, IR
427    generated from it.  That must happen before any possible exit from
428    the block (the end, or an IRStmt_Exit).  Flushing also takes place
429    when there is no space to add a new event.
430 
431    If we require the simulation statistics to be up to date with
432    respect to possible memory exceptions, then the list would have to
433    be flushed before each memory reference.  That's a pain so we don't
434    bother.
435 
436    Flushing the list consists of walking it start to end and emitting
437    instrumentation IR for each event, in the order in which they
438    appear. */
439 
440 static Event events[N_EVENTS];
441 static Int   events_used = 0;
442 
443 
trace_instr(Addr addr,SizeT size)444 static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size)
445 {
446    VG_(printf)("I  %08lx,%lu\n", addr, size);
447 }
448 
trace_load(Addr addr,SizeT size)449 static VG_REGPARM(2) void trace_load(Addr addr, SizeT size)
450 {
451    VG_(printf)(" L %08lx,%lu\n", addr, size);
452 }
453 
trace_store(Addr addr,SizeT size)454 static VG_REGPARM(2) void trace_store(Addr addr, SizeT size)
455 {
456    VG_(printf)(" S %08lx,%lu\n", addr, size);
457 }
458 
trace_modify(Addr addr,SizeT size)459 static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size)
460 {
461    VG_(printf)(" M %08lx,%lu\n", addr, size);
462 }
463 
464 
flushEvents(IRSB * sb)465 static void flushEvents(IRSB* sb)
466 {
467    Int        i;
468    Char*      helperName;
469    void*      helperAddr;
470    IRExpr**   argv;
471    IRDirty*   di;
472    Event*     ev;
473 
474    for (i = 0; i < events_used; i++) {
475 
476       ev = &events[i];
477 
478       // Decide on helper fn to call and args to pass it.
479       switch (ev->ekind) {
480          case Event_Ir: helperName = "trace_instr";
481                         helperAddr =  trace_instr;  break;
482 
483          case Event_Dr: helperName = "trace_load";
484                         helperAddr =  trace_load;   break;
485 
486          case Event_Dw: helperName = "trace_store";
487                         helperAddr =  trace_store;  break;
488 
489          case Event_Dm: helperName = "trace_modify";
490                         helperAddr =  trace_modify; break;
491          default:
492             tl_assert(0);
493       }
494 
495       // Add the helper.
496       argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) );
497       di   = unsafeIRDirty_0_N( /*regparms*/2,
498                                 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
499                                 argv );
500       addStmtToIRSB( sb, IRStmt_Dirty(di) );
501    }
502 
503    events_used = 0;
504 }
505 
506 // WARNING:  If you aren't interested in instruction reads, you can omit the
507 // code that adds calls to trace_instr() in flushEvents().  However, you
508 // must still call this function, addEvent_Ir() -- it is necessary to add
509 // the Ir events to the events list so that merging of paired load/store
510 // events into modify events works correctly.
addEvent_Ir(IRSB * sb,IRAtom * iaddr,UInt isize)511 static void addEvent_Ir ( IRSB* sb, IRAtom* iaddr, UInt isize )
512 {
513    Event* evt;
514    tl_assert(clo_trace_mem);
515    tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
516             || VG_CLREQ_SZB == isize );
517    if (events_used == N_EVENTS)
518       flushEvents(sb);
519    tl_assert(events_used >= 0 && events_used < N_EVENTS);
520    evt = &events[events_used];
521    evt->ekind = Event_Ir;
522    evt->addr  = iaddr;
523    evt->size  = isize;
524    events_used++;
525 }
526 
527 static
addEvent_Dr(IRSB * sb,IRAtom * daddr,Int dsize)528 void addEvent_Dr ( IRSB* sb, IRAtom* daddr, Int dsize )
529 {
530    Event* evt;
531    tl_assert(clo_trace_mem);
532    tl_assert(isIRAtom(daddr));
533    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
534    if (events_used == N_EVENTS)
535       flushEvents(sb);
536    tl_assert(events_used >= 0 && events_used < N_EVENTS);
537    evt = &events[events_used];
538    evt->ekind = Event_Dr;
539    evt->addr  = daddr;
540    evt->size  = dsize;
541    events_used++;
542 }
543 
544 static
addEvent_Dw(IRSB * sb,IRAtom * daddr,Int dsize)545 void addEvent_Dw ( IRSB* sb, IRAtom* daddr, Int dsize )
546 {
547    Event* lastEvt;
548    Event* evt;
549    tl_assert(clo_trace_mem);
550    tl_assert(isIRAtom(daddr));
551    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
552 
553    // Is it possible to merge this write with the preceding read?
554    lastEvt = &events[events_used-1];
555    if (events_used > 0
556     && lastEvt->ekind == Event_Dr
557     && lastEvt->size  == dsize
558     && eqIRAtom(lastEvt->addr, daddr))
559    {
560       lastEvt->ekind = Event_Dm;
561       return;
562    }
563 
564    // No.  Add as normal.
565    if (events_used == N_EVENTS)
566       flushEvents(sb);
567    tl_assert(events_used >= 0 && events_used < N_EVENTS);
568    evt = &events[events_used];
569    evt->ekind = Event_Dw;
570    evt->size  = dsize;
571    evt->addr  = daddr;
572    events_used++;
573 }
574 
575 
576 /*------------------------------------------------------------*/
577 /*--- Stuff for --trace-superblocks                        ---*/
578 /*------------------------------------------------------------*/
579 
trace_superblock(Addr addr)580 static void trace_superblock(Addr addr)
581 {
582    VG_(printf)("SB %08lx\n", addr);
583 }
584 
585 
586 /*------------------------------------------------------------*/
587 /*--- Basic tool functions                                 ---*/
588 /*------------------------------------------------------------*/
589 
lk_post_clo_init(void)590 static void lk_post_clo_init(void)
591 {
592    Int op, tyIx;
593 
594    if (clo_detailed_counts) {
595       for (op = 0; op < N_OPS; op++)
596          for (tyIx = 0; tyIx < N_TYPES; tyIx++)
597             detailCounts[op][tyIx] = 0;
598    }
599 }
600 
601 static
lk_instrument(VgCallbackClosure * closure,IRSB * sbIn,VexGuestLayout * layout,VexGuestExtents * vge,IRType gWordTy,IRType hWordTy)602 IRSB* lk_instrument ( VgCallbackClosure* closure,
603                       IRSB* sbIn,
604                       VexGuestLayout* layout,
605                       VexGuestExtents* vge,
606                       IRType gWordTy, IRType hWordTy )
607 {
608    IRDirty*   di;
609    Int        i;
610    IRSB*      sbOut;
611    Char       fnname[100];
612    IRType     type;
613    IRTypeEnv* tyenv = sbIn->tyenv;
614    Addr       iaddr = 0, dst;
615    UInt       ilen = 0;
616    Bool       condition_inverted = False;
617 
618    if (gWordTy != hWordTy) {
619       /* We don't currently support this case. */
620       VG_(tool_panic)("host/guest word size mismatch");
621    }
622 
623    /* Set up SB */
624    sbOut = deepCopyIRSBExceptStmts(sbIn);
625 
626    // Copy verbatim any IR preamble preceding the first IMark
627    i = 0;
628    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
629       addStmtToIRSB( sbOut, sbIn->stmts[i] );
630       i++;
631    }
632 
633    if (clo_basic_counts) {
634       /* Count this superblock. */
635       di = unsafeIRDirty_0_N( 0, "add_one_SB_entered",
636                                  VG_(fnptr_to_fnentry)( &add_one_SB_entered ),
637                                  mkIRExprVec_0() );
638       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
639    }
640 
641    if (clo_trace_sbs) {
642       /* Print this superblock's address. */
643       di = unsafeIRDirty_0_N(
644               0, "trace_superblock",
645               VG_(fnptr_to_fnentry)( &trace_superblock ),
646               mkIRExprVec_1( mkIRExpr_HWord( vge->base[0] ) )
647            );
648       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
649    }
650 
651    if (clo_trace_mem) {
652       events_used = 0;
653    }
654 
655    for (/*use current i*/; i < sbIn->stmts_used; i++) {
656       IRStmt* st = sbIn->stmts[i];
657       if (!st || st->tag == Ist_NoOp) continue;
658 
659       if (clo_basic_counts) {
660          /* Count one VEX statement. */
661          di = unsafeIRDirty_0_N( 0, "add_one_IRStmt",
662                                     VG_(fnptr_to_fnentry)( &add_one_IRStmt ),
663                                     mkIRExprVec_0() );
664          addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
665       }
666 
667       switch (st->tag) {
668          case Ist_NoOp:
669          case Ist_AbiHint:
670          case Ist_Put:
671          case Ist_PutI:
672          case Ist_MBE:
673             addStmtToIRSB( sbOut, st );
674             break;
675 
676          case Ist_IMark:
677             if (clo_basic_counts) {
678                /* Needed to be able to check for inverted condition in Ist_Exit */
679                iaddr = st->Ist.IMark.addr;
680                ilen  = st->Ist.IMark.len;
681 
682                /* Count guest instruction. */
683                di = unsafeIRDirty_0_N( 0, "add_one_guest_instr",
684                                           VG_(fnptr_to_fnentry)( &add_one_guest_instr ),
685                                           mkIRExprVec_0() );
686                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
687 
688                /* An unconditional branch to a known destination in the
689                 * guest's instructions can be represented, in the IRSB to
690                 * instrument, by the VEX statements that are the
691                 * translation of that known destination. This feature is
692                 * called 'SB chasing' and can be influenced by command
693                 * line option --vex-guest-chase-thresh.
694                 *
695                 * To get an accurate count of the calls to a specific
696                 * function, taking SB chasing into account, we need to
697                 * check for each guest instruction (Ist_IMark) if it is
698                 * the entry point of a function.
699                 */
700                tl_assert(clo_fnname);
701                tl_assert(clo_fnname[0]);
702                if (VG_(get_fnname_if_entry)(st->Ist.IMark.addr,
703                                             fnname, sizeof(fnname))
704                    && 0 == VG_(strcmp)(fnname, clo_fnname)) {
705                   di = unsafeIRDirty_0_N(
706                           0, "add_one_func_call",
707                              VG_(fnptr_to_fnentry)( &add_one_func_call ),
708                              mkIRExprVec_0() );
709                   addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
710                }
711             }
712             if (clo_trace_mem) {
713                // WARNING: do not remove this function call, even if you
714                // aren't interested in instruction reads.  See the comment
715                // above the function itself for more detail.
716                addEvent_Ir( sbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ),
717                             st->Ist.IMark.len );
718             }
719             addStmtToIRSB( sbOut, st );
720             break;
721 
722          case Ist_WrTmp:
723             // Add a call to trace_load() if --trace-mem=yes.
724             if (clo_trace_mem) {
725                IRExpr* data = st->Ist.WrTmp.data;
726                if (data->tag == Iex_Load) {
727                   addEvent_Dr( sbOut, data->Iex.Load.addr,
728                                sizeofIRType(data->Iex.Load.ty) );
729                }
730             }
731             if (clo_detailed_counts) {
732                IRExpr* expr = st->Ist.WrTmp.data;
733                type = typeOfIRExpr(sbOut->tyenv, expr);
734                tl_assert(type != Ity_INVALID);
735                switch (expr->tag) {
736                   case Iex_Load:
737                      instrument_detail( sbOut, OpLoad, type );
738                      break;
739                   case Iex_Unop:
740                   case Iex_Binop:
741                   case Iex_Triop:
742                   case Iex_Qop:
743                   case Iex_Mux0X:
744                      instrument_detail( sbOut, OpAlu, type );
745                      break;
746                   default:
747                      break;
748                }
749             }
750             addStmtToIRSB( sbOut, st );
751             break;
752 
753          case Ist_Store:
754             if (clo_trace_mem) {
755                IRExpr* data  = st->Ist.Store.data;
756                addEvent_Dw( sbOut, st->Ist.Store.addr,
757                             sizeofIRType(typeOfIRExpr(tyenv, data)) );
758             }
759             if (clo_detailed_counts) {
760                type = typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data);
761                tl_assert(type != Ity_INVALID);
762                instrument_detail( sbOut, OpStore, type );
763             }
764             addStmtToIRSB( sbOut, st );
765             break;
766 
767          case Ist_Dirty: {
768             if (clo_trace_mem) {
769                Int      dsize;
770                IRDirty* d = st->Ist.Dirty.details;
771                if (d->mFx != Ifx_None) {
772                   // This dirty helper accesses memory.  Collect the details.
773                   tl_assert(d->mAddr != NULL);
774                   tl_assert(d->mSize != 0);
775                   dsize = d->mSize;
776                   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
777                      addEvent_Dr( sbOut, d->mAddr, dsize );
778                   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
779                      addEvent_Dw( sbOut, d->mAddr, dsize );
780                } else {
781                   tl_assert(d->mAddr == NULL);
782                   tl_assert(d->mSize == 0);
783                }
784             }
785             addStmtToIRSB( sbOut, st );
786             break;
787          }
788 
789          case Ist_CAS: {
790             /* We treat it as a read and a write of the location.  I
791                think that is the same behaviour as it was before IRCAS
792                was introduced, since prior to that point, the Vex
793                front ends would translate a lock-prefixed instruction
794                into a (normal) read followed by a (normal) write. */
795             Int    dataSize;
796             IRType dataTy;
797             IRCAS* cas = st->Ist.CAS.details;
798             tl_assert(cas->addr != NULL);
799             tl_assert(cas->dataLo != NULL);
800             dataTy   = typeOfIRExpr(tyenv, cas->dataLo);
801             dataSize = sizeofIRType(dataTy);
802             if (cas->dataHi != NULL)
803                dataSize *= 2; /* since it's a doubleword-CAS */
804             if (clo_trace_mem) {
805                addEvent_Dr( sbOut, cas->addr, dataSize );
806                addEvent_Dw( sbOut, cas->addr, dataSize );
807             }
808             if (clo_detailed_counts) {
809                instrument_detail( sbOut, OpLoad, dataTy );
810                if (cas->dataHi != NULL) /* dcas */
811                   instrument_detail( sbOut, OpLoad, dataTy );
812                instrument_detail( sbOut, OpStore, dataTy );
813                if (cas->dataHi != NULL) /* dcas */
814                   instrument_detail( sbOut, OpStore, dataTy );
815             }
816             addStmtToIRSB( sbOut, st );
817             break;
818          }
819 
820          case Ist_LLSC: {
821             IRType dataTy;
822             if (st->Ist.LLSC.storedata == NULL) {
823                /* LL */
824                dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
825                if (clo_trace_mem)
826                   addEvent_Dr( sbOut, st->Ist.LLSC.addr,
827                                       sizeofIRType(dataTy) );
828                if (clo_detailed_counts)
829                   instrument_detail( sbOut, OpLoad, dataTy );
830             } else {
831                /* SC */
832                dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
833                if (clo_trace_mem)
834                   addEvent_Dw( sbOut, st->Ist.LLSC.addr,
835                                       sizeofIRType(dataTy) );
836                if (clo_detailed_counts)
837                   instrument_detail( sbOut, OpStore, dataTy );
838             }
839             addStmtToIRSB( sbOut, st );
840             break;
841          }
842 
843          case Ist_Exit:
844             if (clo_basic_counts) {
845                // The condition of a branch was inverted by VEX if a taken
846                // branch is in fact a fall trough according to client address
847                tl_assert(iaddr != 0);
848                dst = (sizeof(Addr) == 4) ? st->Ist.Exit.dst->Ico.U32 :
849                                            st->Ist.Exit.dst->Ico.U64;
850                condition_inverted = (dst == iaddr + ilen);
851 
852                /* Count Jcc */
853                if (!condition_inverted)
854                   di = unsafeIRDirty_0_N( 0, "add_one_Jcc",
855                                           VG_(fnptr_to_fnentry)( &add_one_Jcc ),
856                                           mkIRExprVec_0() );
857                else
858                   di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc",
859                                           VG_(fnptr_to_fnentry)(
860                                              &add_one_inverted_Jcc ),
861                                           mkIRExprVec_0() );
862 
863                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
864             }
865             if (clo_trace_mem) {
866                flushEvents(sbOut);
867             }
868 
869             addStmtToIRSB( sbOut, st );      // Original statement
870 
871             if (clo_basic_counts) {
872                /* Count non-taken Jcc */
873                if (!condition_inverted)
874                   di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken",
875                                           VG_(fnptr_to_fnentry)(
876                                              &add_one_Jcc_untaken ),
877                                           mkIRExprVec_0() );
878                else
879                   di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc_untaken",
880                                           VG_(fnptr_to_fnentry)(
881                                              &add_one_inverted_Jcc_untaken ),
882                                           mkIRExprVec_0() );
883 
884                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
885             }
886             break;
887 
888          default:
889             tl_assert(0);
890       }
891    }
892 
893    if (clo_basic_counts) {
894       /* Count this basic block. */
895       di = unsafeIRDirty_0_N( 0, "add_one_SB_completed",
896                                  VG_(fnptr_to_fnentry)( &add_one_SB_completed ),
897                                  mkIRExprVec_0() );
898       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
899    }
900 
901    if (clo_trace_mem) {
902       /* At the end of the sbIn.  Flush outstandings. */
903       flushEvents(sbOut);
904    }
905 
906    return sbOut;
907 }
908 
lk_fini(Int exitcode)909 static void lk_fini(Int exitcode)
910 {
911    char percentify_buf[5]; /* Two digits, '%' and 0. */
912    const int percentify_size = sizeof(percentify_buf) - 1;
913    const int percentify_decs = 0;
914 
915    tl_assert(clo_fnname);
916    tl_assert(clo_fnname[0]);
917 
918    if (clo_basic_counts) {
919       ULong total_Jccs = n_Jccs + n_IJccs;
920       ULong taken_Jccs = (n_Jccs - n_Jccs_untaken) + n_IJccs_untaken;
921 
922       VG_(umsg)("Counted %'llu call%s to %s()\n",
923                 n_func_calls, ( n_func_calls==1 ? "" : "s" ), clo_fnname);
924 
925       VG_(umsg)("\n");
926       VG_(umsg)("Jccs:\n");
927       VG_(umsg)("  total:         %'llu\n", total_Jccs);
928       VG_(percentify)(taken_Jccs, (total_Jccs ? total_Jccs : 1),
929          percentify_decs, percentify_size, percentify_buf);
930       VG_(umsg)("  taken:         %'llu (%s)\n",
931          taken_Jccs, percentify_buf);
932 
933       VG_(umsg)("\n");
934       VG_(umsg)("Executed:\n");
935       VG_(umsg)("  SBs entered:   %'llu\n", n_SBs_entered);
936       VG_(umsg)("  SBs completed: %'llu\n", n_SBs_completed);
937       VG_(umsg)("  guest instrs:  %'llu\n", n_guest_instrs);
938       VG_(umsg)("  IRStmts:       %'llu\n", n_IRStmts);
939 
940       VG_(umsg)("\n");
941       VG_(umsg)("Ratios:\n");
942       tl_assert(n_SBs_entered); // Paranoia time.
943       VG_(umsg)("  guest instrs : SB entered  = %'llu : 10\n",
944          10 * n_guest_instrs / n_SBs_entered);
945       VG_(umsg)("       IRStmts : SB entered  = %'llu : 10\n",
946          10 * n_IRStmts / n_SBs_entered);
947       tl_assert(n_guest_instrs); // Paranoia time.
948       VG_(umsg)("       IRStmts : guest instr = %'llu : 10\n",
949          10 * n_IRStmts / n_guest_instrs);
950    }
951 
952    if (clo_detailed_counts) {
953       VG_(umsg)("\n");
954       VG_(umsg)("IR-level counts by type:\n");
955       print_details();
956    }
957 
958    if (clo_basic_counts) {
959       VG_(umsg)("\n");
960       VG_(umsg)("Exit code:       %d\n", exitcode);
961    }
962 }
963 
lk_pre_clo_init(void)964 static void lk_pre_clo_init(void)
965 {
966    VG_(details_name)            ("Lackey");
967    VG_(details_version)         (NULL);
968    VG_(details_description)     ("an example Valgrind tool");
969    VG_(details_copyright_author)(
970       "Copyright (C) 2002-2011, and GNU GPL'd, by Nicholas Nethercote.");
971    VG_(details_bug_reports_to)  (VG_BUGS_TO);
972    VG_(details_avg_translation_sizeB) ( 200 );
973 
974    VG_(basic_tool_funcs)          (lk_post_clo_init,
975                                    lk_instrument,
976                                    lk_fini);
977    VG_(needs_command_line_options)(lk_process_cmd_line_option,
978                                    lk_print_usage,
979                                    lk_print_debug_usage);
980 }
981 
982 VG_DETERMINE_INTERFACE_VERSION(lk_pre_clo_init)
983 
984 /*--------------------------------------------------------------------*/
985 /*--- end                                                lk_main.c ---*/
986 /*--------------------------------------------------------------------*/
987