• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- An example Valgrind tool.                          lk_main.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Lackey, an example Valgrind tool that does
8    some simple program measurement and tracing.
9 
10    Copyright (C) 2002-2010 Nicholas Nethercote
11       njn@valgrind.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 // This tool shows how to do some basic instrumentation.
32 //
33 // There are four kinds of instrumentation it can do.  They can be turned
34 // on/off independently with command line options:
35 //
36 // * --basic-counts   : do basic counts, eg. number of instructions
37 //                      executed, jumps executed, etc.
38 // * --detailed-counts: do more detailed counts:  number of loads, stores
39 //                      and ALU operations of different sizes.
40 // * --trace-mem=yes:   trace all (data) memory accesses.
41 // * --trace-superblocks=yes:
42 //                      trace all superblock entries.  Mostly of interest
43 //                      to the Valgrind developers.
44 //
45 // The code for each kind of instrumentation is guarded by a clo_* variable:
46 // clo_basic_counts, clo_detailed_counts, clo_trace_mem and clo_trace_sbs.
47 //
48 // If you want to modify any of the instrumentation code, look for the code
49 // that is guarded by the relevant clo_* variable (eg. clo_trace_mem)
50 // If you're not interested in the other kinds of instrumentation you can
51 // remove them.  If you want to do more complex modifications, please read
52 // VEX/pub/libvex_ir.h to understand the intermediate representation.
53 //
54 //
55 // Specific Details about --trace-mem=yes
56 // --------------------------------------
57 // Lackey's --trace-mem code is a good starting point for building Valgrind
58 // tools that act on memory loads and stores.  It also could be used as is,
59 // with its output used as input to a post-mortem processing step.  However,
60 // because memory traces can be very large, online analysis is generally
61 // better.
62 //
63 // It prints memory data access traces that look like this:
64 //
65 //   I  0023C790,2  # instruction read at 0x0023C790 of size 2
66 //   I  0023C792,5
67 //    S BE80199C,4  # data store at 0xBE80199C of size 4
68 //   I  0025242B,3
69 //    L BE801950,4  # data load at 0xBE801950 of size 4
70 //   I  0023D476,7
71 //    M 0025747C,1  # data modify at 0x0025747C of size 1
72 //   I  0023DC20,2
73 //    L 00254962,1
74 //    L BE801FB3,1
75 //   I  00252305,1
76 //    L 00254AEB,1
77 //    S 00257998,1
78 //
79 // Every instruction executed has an "instr" event representing it.
80 // Instructions that do memory accesses are followed by one or more "load",
81 // "store" or "modify" events.  Some instructions do more than one load or
82 // store, as in the last two examples in the above trace.
83 //
84 // Here are some examples of x86 instructions that do different combinations
85 // of loads, stores, and modifies.
86 //
87 //    Instruction          Memory accesses                  Event sequence
88 //    -----------          ---------------                  --------------
89 //    add %eax, %ebx       No loads or stores               instr
90 //
91 //    movl (%eax), %ebx    loads (%eax)                     instr, load
92 //
93 //    movl %eax, (%ebx)    stores (%ebx)                    instr, store
94 //
95 //    incl (%ecx)          modifies (%ecx)                  instr, modify
96 //
97 //    cmpsb                loads (%esi), loads(%edi)        instr, load, load
98 //
99 //    call*l (%edx)        loads (%edx), stores -4(%esp)    instr, load, store
100 //    pushl (%edx)         loads (%edx), stores -4(%esp)    instr, load, store
101 //    movsw                loads (%esi), stores (%edi)      instr, load, store
102 //
103 // Instructions using x86 "rep" prefixes are traced as if they are repeated
104 // N times.
105 //
106 // Lackey with --trace-mem gives good traces, but they are not perfect, for
107 // the following reasons:
108 //
109 // - It does not trace into the OS kernel, so system calls and other kernel
110 //   operations (eg. some scheduling and signal handling code) are ignored.
111 //
112 // - It could model loads and stores done at the system call boundary using
113 //   the pre_mem_read/post_mem_write events.  For example, if you call
114 //   fstat() you know that the passed in buffer has been written.  But it
115 //   currently does not do this.
116 //
117 // - Valgrind replaces some code (not much) with its own, notably parts of
118 //   code for scheduling operations and signal handling.  This code is not
119 //   traced.
120 //
121 // - There is no consideration of virtual-to-physical address mapping.
122 //   This may not matter for many purposes.
123 //
124 // - Valgrind modifies the instruction stream in some very minor ways.  For
125 //   example, on x86 the bts, btc, btr instructions are incorrectly
126 //   considered to always touch memory (this is a consequence of these
127 //   instructions being very difficult to simulate).
128 //
129 // - Valgrind tools layout memory differently to normal programs, so the
130 //   addresses you get will not be typical.  Thus Lackey (and all Valgrind
131 //   tools) is suitable for getting relative memory traces -- eg. if you
132 //   want to analyse locality of memory accesses -- but is not good if
133 //   absolute addresses are important.
134 //
135 // Despite all these warnings, Lackey's results should be good enough for a
136 // wide range of purposes.  For example, Cachegrind shares all the above
137 // shortcomings and it is still useful.
138 //
139 // For further inspiration, you should look at cachegrind/cg_main.c which
140 // uses the same basic technique for tracing memory accesses, but also groups
141 // events together for processing into twos and threes so that fewer C calls
142 // are made and things run faster.
143 //
144 // Specific Details about --trace-superblocks=yes
145 // ----------------------------------------------
146 // Valgrind splits code up into single entry, multiple exit blocks
147 // known as superblocks.  By itself, --trace-superblocks=yes just
148 // prints a message as each superblock is run:
149 //
150 //  SB 04013170
151 //  SB 04013177
152 //  SB 04013173
153 //  SB 04013177
154 //
155 // The hex number is the address of the first instruction in the
156 // superblock.  You can see the relationship more obviously if you use
157 // --trace-superblocks=yes and --trace-mem=yes together.  Then a "SB"
158 // message at address X is immediately followed by an "instr:" message
159 // for that address, as the first instruction in the block is
160 // executed, for example:
161 //
162 //  SB 04014073
163 //  I  04014073,3
164 //   L 7FEFFF7F8,8
165 //  I  04014076,4
166 //  I  0401407A,3
167 //  I  0401407D,3
168 //  I  04014080,3
169 //  I  04014083,6
170 
171 
172 #include "pub_tool_basics.h"
173 #include "pub_tool_tooliface.h"
174 #include "pub_tool_libcassert.h"
175 #include "pub_tool_libcprint.h"
176 #include "pub_tool_debuginfo.h"
177 #include "pub_tool_libcbase.h"
178 #include "pub_tool_options.h"
179 #include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
180 
181 /*------------------------------------------------------------*/
182 /*--- Command line options                                 ---*/
183 /*------------------------------------------------------------*/
184 
185 /* Command line options controlling instrumentation kinds, as described at
186  * the top of this file. */
187 static Bool clo_basic_counts    = True;
188 static Bool clo_detailed_counts = False;
189 static Bool clo_trace_mem       = False;
190 static Bool clo_trace_sbs       = False;
191 
192 /* The name of the function of which the number of calls (under
193  * --basic-counts=yes) is to be counted, with default. Override with command
194  * line option --fnname. */
195 static Char* clo_fnname = "main";
196 
lk_process_cmd_line_option(Char * arg)197 static Bool lk_process_cmd_line_option(Char* arg)
198 {
199    if VG_STR_CLO(arg, "--fnname", clo_fnname) {}
200    else if VG_BOOL_CLO(arg, "--basic-counts",      clo_basic_counts) {}
201    else if VG_BOOL_CLO(arg, "--detailed-counts",   clo_detailed_counts) {}
202    else if VG_BOOL_CLO(arg, "--trace-mem",         clo_trace_mem) {}
203    else if VG_BOOL_CLO(arg, "--trace-superblocks", clo_trace_sbs) {}
204    else
205       return False;
206 
207    tl_assert(clo_fnname);
208    tl_assert(clo_fnname[0]);
209    return True;
210 }
211 
lk_print_usage(void)212 static void lk_print_usage(void)
213 {
214    VG_(printf)(
215 "    --basic-counts=no|yes     count instructions, jumps, etc. [yes]\n"
216 "    --detailed-counts=no|yes  count loads, stores and alu ops [no]\n"
217 "    --trace-mem=no|yes        trace all loads and stores [no]\n"
218 "    --trace-superblocks=no|yes  trace all superblock entries [no]\n"
219 "    --fnname=<name>           count calls to <name> (only used if\n"
220 "                              --basic-count=yes)  [main]\n"
221    );
222 }
223 
lk_print_debug_usage(void)224 static void lk_print_debug_usage(void)
225 {
226    VG_(printf)(
227 "    (none)\n"
228    );
229 }
230 
231 /*------------------------------------------------------------*/
232 /*--- Stuff for --basic-counts                             ---*/
233 /*------------------------------------------------------------*/
234 
235 /* Nb: use ULongs because the numbers can get very big */
236 static ULong n_func_calls    = 0;
237 static ULong n_SBs_entered   = 0;
238 static ULong n_SBs_completed = 0;
239 static ULong n_IRStmts       = 0;
240 static ULong n_guest_instrs  = 0;
241 static ULong n_Jccs          = 0;
242 static ULong n_Jccs_untaken  = 0;
243 static ULong n_IJccs         = 0;
244 static ULong n_IJccs_untaken = 0;
245 
add_one_func_call(void)246 static void add_one_func_call(void)
247 {
248    n_func_calls++;
249 }
250 
add_one_SB_entered(void)251 static void add_one_SB_entered(void)
252 {
253    n_SBs_entered++;
254 }
255 
add_one_SB_completed(void)256 static void add_one_SB_completed(void)
257 {
258    n_SBs_completed++;
259 }
260 
add_one_IRStmt(void)261 static void add_one_IRStmt(void)
262 {
263    n_IRStmts++;
264 }
265 
add_one_guest_instr(void)266 static void add_one_guest_instr(void)
267 {
268    n_guest_instrs++;
269 }
270 
add_one_Jcc(void)271 static void add_one_Jcc(void)
272 {
273    n_Jccs++;
274 }
275 
add_one_Jcc_untaken(void)276 static void add_one_Jcc_untaken(void)
277 {
278    n_Jccs_untaken++;
279 }
280 
add_one_inverted_Jcc(void)281 static void add_one_inverted_Jcc(void)
282 {
283    n_IJccs++;
284 }
285 
add_one_inverted_Jcc_untaken(void)286 static void add_one_inverted_Jcc_untaken(void)
287 {
288    n_IJccs_untaken++;
289 }
290 
291 /*------------------------------------------------------------*/
292 /*--- Stuff for --detailed-counts                          ---*/
293 /*------------------------------------------------------------*/
294 
295 /* --- Operations --- */
296 
297 typedef enum { OpLoad=0, OpStore=1, OpAlu=2 } Op;
298 
299 #define N_OPS 3
300 
301 
302 /* --- Types --- */
303 
304 #define N_TYPES 9
305 
type2index(IRType ty)306 static Int type2index ( IRType ty )
307 {
308    switch (ty) {
309       case Ity_I1:      return 0;
310       case Ity_I8:      return 1;
311       case Ity_I16:     return 2;
312       case Ity_I32:     return 3;
313       case Ity_I64:     return 4;
314       case Ity_I128:    return 5;
315       case Ity_F32:     return 6;
316       case Ity_F64:     return 7;
317       case Ity_V128:    return 8;
318       default: tl_assert(0);
319    }
320 }
321 
nameOfTypeIndex(Int i)322 static HChar* nameOfTypeIndex ( Int i )
323 {
324    switch (i) {
325       case 0: return "I1";   break;
326       case 1: return "I8";   break;
327       case 2: return "I16";  break;
328       case 3: return "I32";  break;
329       case 4: return "I64";  break;
330       case 5: return "I128"; break;
331       case 6: return "F32";  break;
332       case 7: return "F64";  break;
333       case 8: return "V128"; break;
334       default: tl_assert(0);
335    }
336 }
337 
338 
339 /* --- Counts --- */
340 
341 static ULong detailCounts[N_OPS][N_TYPES];
342 
343 /* The helper that is called from the instrumented code. */
344 static VG_REGPARM(1)
increment_detail(ULong * detail)345 void increment_detail(ULong* detail)
346 {
347    (*detail)++;
348 }
349 
350 /* A helper that adds the instrumentation for a detail. */
instrument_detail(IRSB * sb,Op op,IRType type)351 static void instrument_detail(IRSB* sb, Op op, IRType type)
352 {
353    IRDirty* di;
354    IRExpr** argv;
355    const UInt typeIx = type2index(type);
356 
357    tl_assert(op < N_OPS);
358    tl_assert(typeIx < N_TYPES);
359 
360    argv = mkIRExprVec_1( mkIRExpr_HWord( (HWord)&detailCounts[op][typeIx] ) );
361    di = unsafeIRDirty_0_N( 1, "increment_detail",
362                               VG_(fnptr_to_fnentry)( &increment_detail ),
363                               argv);
364    addStmtToIRSB( sb, IRStmt_Dirty(di) );
365 }
366 
367 /* Summarize and print the details. */
print_details(void)368 static void print_details ( void )
369 {
370    Int typeIx;
371    VG_(umsg)("   Type        Loads       Stores       AluOps\n");
372    VG_(umsg)("   -------------------------------------------\n");
373    for (typeIx = 0; typeIx < N_TYPES; typeIx++) {
374       VG_(umsg)("   %4s %'12llu %'12llu %'12llu\n",
375                 nameOfTypeIndex( typeIx ),
376                 detailCounts[OpLoad ][typeIx],
377                 detailCounts[OpStore][typeIx],
378                 detailCounts[OpAlu  ][typeIx]
379       );
380    }
381 }
382 
383 
384 /*------------------------------------------------------------*/
385 /*--- Stuff for --trace-mem                                ---*/
386 /*------------------------------------------------------------*/
387 
388 #define MAX_DSIZE    512
389 
390 typedef
391    IRExpr
392    IRAtom;
393 
394 typedef
395    enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm }
396    EventKind;
397 
398 typedef
399    struct {
400       EventKind  ekind;
401       IRAtom*    addr;
402       Int        size;
403    }
404    Event;
405 
406 /* Up to this many unnotified events are allowed.  Must be at least two,
407    so that reads and writes to the same address can be merged into a modify.
408    Beyond that, larger numbers just potentially induce more spilling due to
409    extending live ranges of address temporaries. */
410 #define N_EVENTS 4
411 
412 /* Maintain an ordered list of memory events which are outstanding, in
413    the sense that no IR has yet been generated to do the relevant
414    helper calls.  The SB is scanned top to bottom and memory events
415    are added to the end of the list, merging with the most recent
416    notified event where possible (Dw immediately following Dr and
417    having the same size and EA can be merged).
418 
419    This merging is done so that for architectures which have
420    load-op-store instructions (x86, amd64), the instr is treated as if
421    it makes just one memory reference (a modify), rather than two (a
422    read followed by a write at the same address).
423 
424    At various points the list will need to be flushed, that is, IR
425    generated from it.  That must happen before any possible exit from
426    the block (the end, or an IRStmt_Exit).  Flushing also takes place
427    when there is no space to add a new event.
428 
429    If we require the simulation statistics to be up to date with
430    respect to possible memory exceptions, then the list would have to
431    be flushed before each memory reference.  That's a pain so we don't
432    bother.
433 
434    Flushing the list consists of walking it start to end and emitting
435    instrumentation IR for each event, in the order in which they
436    appear. */
437 
438 static Event events[N_EVENTS];
439 static Int   events_used = 0;
440 
441 
trace_instr(Addr addr,SizeT size)442 static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size)
443 {
444    VG_(printf)("I  %08lx,%lu\n", addr, size);
445 }
446 
trace_load(Addr addr,SizeT size)447 static VG_REGPARM(2) void trace_load(Addr addr, SizeT size)
448 {
449    VG_(printf)(" L %08lx,%lu\n", addr, size);
450 }
451 
trace_store(Addr addr,SizeT size)452 static VG_REGPARM(2) void trace_store(Addr addr, SizeT size)
453 {
454    VG_(printf)(" S %08lx,%lu\n", addr, size);
455 }
456 
trace_modify(Addr addr,SizeT size)457 static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size)
458 {
459    VG_(printf)(" M %08lx,%lu\n", addr, size);
460 }
461 
462 
flushEvents(IRSB * sb)463 static void flushEvents(IRSB* sb)
464 {
465    Int        i;
466    Char*      helperName;
467    void*      helperAddr;
468    IRExpr**   argv;
469    IRDirty*   di;
470    Event*     ev;
471 
472    for (i = 0; i < events_used; i++) {
473 
474       ev = &events[i];
475 
476       // Decide on helper fn to call and args to pass it.
477       switch (ev->ekind) {
478          case Event_Ir: helperName = "trace_instr";
479                         helperAddr =  trace_instr;  break;
480 
481          case Event_Dr: helperName = "trace_load";
482                         helperAddr =  trace_load;   break;
483 
484          case Event_Dw: helperName = "trace_store";
485                         helperAddr =  trace_store;  break;
486 
487          case Event_Dm: helperName = "trace_modify";
488                         helperAddr =  trace_modify; break;
489          default:
490             tl_assert(0);
491       }
492 
493       // Add the helper.
494       argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) );
495       di   = unsafeIRDirty_0_N( /*regparms*/2,
496                                 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
497                                 argv );
498       addStmtToIRSB( sb, IRStmt_Dirty(di) );
499    }
500 
501    events_used = 0;
502 }
503 
504 // WARNING:  If you aren't interested in instruction reads, you can omit the
505 // code that adds calls to trace_instr() in flushEvents().  However, you
506 // must still call this function, addEvent_Ir() -- it is necessary to add
507 // the Ir events to the events list so that merging of paired load/store
508 // events into modify events works correctly.
addEvent_Ir(IRSB * sb,IRAtom * iaddr,UInt isize)509 static void addEvent_Ir ( IRSB* sb, IRAtom* iaddr, UInt isize )
510 {
511    Event* evt;
512    tl_assert(clo_trace_mem);
513    tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
514             || VG_CLREQ_SZB == isize );
515    if (events_used == N_EVENTS)
516       flushEvents(sb);
517    tl_assert(events_used >= 0 && events_used < N_EVENTS);
518    evt = &events[events_used];
519    evt->ekind = Event_Ir;
520    evt->addr  = iaddr;
521    evt->size  = isize;
522    events_used++;
523 }
524 
525 static
addEvent_Dr(IRSB * sb,IRAtom * daddr,Int dsize)526 void addEvent_Dr ( IRSB* sb, IRAtom* daddr, Int dsize )
527 {
528    Event* evt;
529    tl_assert(clo_trace_mem);
530    tl_assert(isIRAtom(daddr));
531    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
532    if (events_used == N_EVENTS)
533       flushEvents(sb);
534    tl_assert(events_used >= 0 && events_used < N_EVENTS);
535    evt = &events[events_used];
536    evt->ekind = Event_Dr;
537    evt->addr  = daddr;
538    evt->size  = dsize;
539    events_used++;
540 }
541 
542 static
addEvent_Dw(IRSB * sb,IRAtom * daddr,Int dsize)543 void addEvent_Dw ( IRSB* sb, IRAtom* daddr, Int dsize )
544 {
545    Event* lastEvt;
546    Event* evt;
547    tl_assert(clo_trace_mem);
548    tl_assert(isIRAtom(daddr));
549    tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
550 
551    // Is it possible to merge this write with the preceding read?
552    lastEvt = &events[events_used-1];
553    if (events_used > 0
554     && lastEvt->ekind == Event_Dr
555     && lastEvt->size  == dsize
556     && eqIRAtom(lastEvt->addr, daddr))
557    {
558       lastEvt->ekind = Event_Dm;
559       return;
560    }
561 
562    // No.  Add as normal.
563    if (events_used == N_EVENTS)
564       flushEvents(sb);
565    tl_assert(events_used >= 0 && events_used < N_EVENTS);
566    evt = &events[events_used];
567    evt->ekind = Event_Dw;
568    evt->size  = dsize;
569    evt->addr  = daddr;
570    events_used++;
571 }
572 
573 
574 /*------------------------------------------------------------*/
575 /*--- Stuff for --trace-superblocks                        ---*/
576 /*------------------------------------------------------------*/
577 
trace_superblock(Addr addr)578 static void trace_superblock(Addr addr)
579 {
580    VG_(printf)("SB %08lx\n", addr);
581 }
582 
583 
584 /*------------------------------------------------------------*/
585 /*--- Basic tool functions                                 ---*/
586 /*------------------------------------------------------------*/
587 
lk_post_clo_init(void)588 static void lk_post_clo_init(void)
589 {
590    Int op, tyIx;
591 
592    if (clo_detailed_counts) {
593       for (op = 0; op < N_OPS; op++)
594          for (tyIx = 0; tyIx < N_TYPES; tyIx++)
595             detailCounts[op][tyIx] = 0;
596    }
597 }
598 
599 static
lk_instrument(VgCallbackClosure * closure,IRSB * sbIn,VexGuestLayout * layout,VexGuestExtents * vge,IRType gWordTy,IRType hWordTy)600 IRSB* lk_instrument ( VgCallbackClosure* closure,
601                       IRSB* sbIn,
602                       VexGuestLayout* layout,
603                       VexGuestExtents* vge,
604                       IRType gWordTy, IRType hWordTy )
605 {
606    IRDirty*   di;
607    Int        i;
608    IRSB*      sbOut;
609    Char       fnname[100];
610    IRType     type;
611    IRTypeEnv* tyenv = sbIn->tyenv;
612    Addr       iaddr = 0, dst;
613    UInt       ilen = 0;
614    Bool       condition_inverted = False;
615 
616    if (gWordTy != hWordTy) {
617       /* We don't currently support this case. */
618       VG_(tool_panic)("host/guest word size mismatch");
619    }
620 
621    /* Set up SB */
622    sbOut = deepCopyIRSBExceptStmts(sbIn);
623 
624    // Copy verbatim any IR preamble preceding the first IMark
625    i = 0;
626    while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
627       addStmtToIRSB( sbOut, sbIn->stmts[i] );
628       i++;
629    }
630 
631    if (clo_basic_counts) {
632       /* Count this superblock. */
633       di = unsafeIRDirty_0_N( 0, "add_one_SB_entered",
634                                  VG_(fnptr_to_fnentry)( &add_one_SB_entered ),
635                                  mkIRExprVec_0() );
636       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
637    }
638 
639    if (clo_trace_sbs) {
640       /* Print this superblock's address. */
641       di = unsafeIRDirty_0_N(
642               0, "trace_superblock",
643               VG_(fnptr_to_fnentry)( &trace_superblock ),
644               mkIRExprVec_1( mkIRExpr_HWord( vge->base[0] ) )
645            );
646       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
647    }
648 
649    if (clo_trace_mem) {
650       events_used = 0;
651    }
652 
653    for (/*use current i*/; i < sbIn->stmts_used; i++) {
654       IRStmt* st = sbIn->stmts[i];
655       if (!st || st->tag == Ist_NoOp) continue;
656 
657       if (clo_basic_counts) {
658          /* Count one VEX statement. */
659          di = unsafeIRDirty_0_N( 0, "add_one_IRStmt",
660                                     VG_(fnptr_to_fnentry)( &add_one_IRStmt ),
661                                     mkIRExprVec_0() );
662          addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
663       }
664 
665       switch (st->tag) {
666          case Ist_NoOp:
667          case Ist_AbiHint:
668          case Ist_Put:
669          case Ist_PutI:
670          case Ist_MBE:
671             addStmtToIRSB( sbOut, st );
672             break;
673 
674          case Ist_IMark:
675             if (clo_basic_counts) {
676                /* Needed to be able to check for inverted condition in Ist_Exit */
677                iaddr = st->Ist.IMark.addr;
678                ilen  = st->Ist.IMark.len;
679 
680                /* Count guest instruction. */
681                di = unsafeIRDirty_0_N( 0, "add_one_guest_instr",
682                                           VG_(fnptr_to_fnentry)( &add_one_guest_instr ),
683                                           mkIRExprVec_0() );
684                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
685 
686                /* An unconditional branch to a known destination in the
687                 * guest's instructions can be represented, in the IRSB to
688                 * instrument, by the VEX statements that are the
689                 * translation of that known destination. This feature is
690                 * called 'SB chasing' and can be influenced by command
691                 * line option --vex-guest-chase-thresh.
692                 *
693                 * To get an accurate count of the calls to a specific
694                 * function, taking SB chasing into account, we need to
695                 * check for each guest instruction (Ist_IMark) if it is
696                 * the entry point of a function.
697                 */
698                tl_assert(clo_fnname);
699                tl_assert(clo_fnname[0]);
700                if (VG_(get_fnname_if_entry)(st->Ist.IMark.addr,
701                                             fnname, sizeof(fnname))
702                    && 0 == VG_(strcmp)(fnname, clo_fnname)) {
703                   di = unsafeIRDirty_0_N(
704                           0, "add_one_func_call",
705                              VG_(fnptr_to_fnentry)( &add_one_func_call ),
706                              mkIRExprVec_0() );
707                   addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
708                }
709             }
710             if (clo_trace_mem) {
711                // WARNING: do not remove this function call, even if you
712                // aren't interested in instruction reads.  See the comment
713                // above the function itself for more detail.
714                addEvent_Ir( sbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ),
715                             st->Ist.IMark.len );
716             }
717             addStmtToIRSB( sbOut, st );
718             break;
719 
720          case Ist_WrTmp:
721             // Add a call to trace_load() if --trace-mem=yes.
722             if (clo_trace_mem) {
723                IRExpr* data = st->Ist.WrTmp.data;
724                if (data->tag == Iex_Load) {
725                   addEvent_Dr( sbOut, data->Iex.Load.addr,
726                                sizeofIRType(data->Iex.Load.ty) );
727                }
728             }
729             if (clo_detailed_counts) {
730                IRExpr* expr = st->Ist.WrTmp.data;
731                type = typeOfIRExpr(sbOut->tyenv, expr);
732                tl_assert(type != Ity_INVALID);
733                switch (expr->tag) {
734                   case Iex_Load:
735                      instrument_detail( sbOut, OpLoad, type );
736                      break;
737                   case Iex_Unop:
738                   case Iex_Binop:
739                   case Iex_Triop:
740                   case Iex_Qop:
741                   case Iex_Mux0X:
742                      instrument_detail( sbOut, OpAlu, type );
743                      break;
744                   default:
745                      break;
746                }
747             }
748             addStmtToIRSB( sbOut, st );
749             break;
750 
751          case Ist_Store:
752             if (clo_trace_mem) {
753                IRExpr* data  = st->Ist.Store.data;
754                addEvent_Dw( sbOut, st->Ist.Store.addr,
755                             sizeofIRType(typeOfIRExpr(tyenv, data)) );
756             }
757             if (clo_detailed_counts) {
758                type = typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data);
759                tl_assert(type != Ity_INVALID);
760                instrument_detail( sbOut, OpStore, type );
761             }
762             addStmtToIRSB( sbOut, st );
763             break;
764 
765          case Ist_Dirty: {
766             if (clo_trace_mem) {
767                Int      dsize;
768                IRDirty* d = st->Ist.Dirty.details;
769                if (d->mFx != Ifx_None) {
770                   // This dirty helper accesses memory.  Collect the details.
771                   tl_assert(d->mAddr != NULL);
772                   tl_assert(d->mSize != 0);
773                   dsize = d->mSize;
774                   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
775                      addEvent_Dr( sbOut, d->mAddr, dsize );
776                   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
777                      addEvent_Dw( sbOut, d->mAddr, dsize );
778                } else {
779                   tl_assert(d->mAddr == NULL);
780                   tl_assert(d->mSize == 0);
781                }
782             }
783             addStmtToIRSB( sbOut, st );
784             break;
785          }
786 
787          case Ist_CAS: {
788             /* We treat it as a read and a write of the location.  I
789                think that is the same behaviour as it was before IRCAS
790                was introduced, since prior to that point, the Vex
791                front ends would translate a lock-prefixed instruction
792                into a (normal) read followed by a (normal) write. */
793             Int    dataSize;
794             IRType dataTy;
795             IRCAS* cas = st->Ist.CAS.details;
796             tl_assert(cas->addr != NULL);
797             tl_assert(cas->dataLo != NULL);
798             dataTy   = typeOfIRExpr(tyenv, cas->dataLo);
799             dataSize = sizeofIRType(dataTy);
800             if (cas->dataHi != NULL)
801                dataSize *= 2; /* since it's a doubleword-CAS */
802             if (clo_trace_mem) {
803                addEvent_Dr( sbOut, cas->addr, dataSize );
804                addEvent_Dw( sbOut, cas->addr, dataSize );
805             }
806             if (clo_detailed_counts) {
807                instrument_detail( sbOut, OpLoad, dataTy );
808                if (cas->dataHi != NULL) /* dcas */
809                   instrument_detail( sbOut, OpLoad, dataTy );
810                instrument_detail( sbOut, OpStore, dataTy );
811                if (cas->dataHi != NULL) /* dcas */
812                   instrument_detail( sbOut, OpStore, dataTy );
813             }
814             addStmtToIRSB( sbOut, st );
815             break;
816          }
817 
818          case Ist_LLSC: {
819             IRType dataTy;
820             if (st->Ist.LLSC.storedata == NULL) {
821                /* LL */
822                dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
823                if (clo_trace_mem)
824                   addEvent_Dr( sbOut, st->Ist.LLSC.addr,
825                                       sizeofIRType(dataTy) );
826                if (clo_detailed_counts)
827                   instrument_detail( sbOut, OpLoad, dataTy );
828             } else {
829                /* SC */
830                dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
831                if (clo_trace_mem)
832                   addEvent_Dw( sbOut, st->Ist.LLSC.addr,
833                                       sizeofIRType(dataTy) );
834                if (clo_detailed_counts)
835                   instrument_detail( sbOut, OpStore, dataTy );
836             }
837             addStmtToIRSB( sbOut, st );
838             break;
839          }
840 
841          case Ist_Exit:
842             if (clo_basic_counts) {
843                // The condition of a branch was inverted by VEX if a taken
844                // branch is in fact a fall trough according to client address
845                tl_assert(iaddr != 0);
846                dst = (sizeof(Addr) == 4) ? st->Ist.Exit.dst->Ico.U32 :
847                                            st->Ist.Exit.dst->Ico.U64;
848                condition_inverted = (dst == iaddr + ilen);
849 
850                /* Count Jcc */
851                if (!condition_inverted)
852                   di = unsafeIRDirty_0_N( 0, "add_one_Jcc",
853                                           VG_(fnptr_to_fnentry)( &add_one_Jcc ),
854                                           mkIRExprVec_0() );
855                else
856                   di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc",
857                                           VG_(fnptr_to_fnentry)(
858                                              &add_one_inverted_Jcc ),
859                                           mkIRExprVec_0() );
860 
861                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
862             }
863             if (clo_trace_mem) {
864                flushEvents(sbOut);
865             }
866 
867             addStmtToIRSB( sbOut, st );      // Original statement
868 
869             if (clo_basic_counts) {
870                /* Count non-taken Jcc */
871                if (!condition_inverted)
872                   di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken",
873                                           VG_(fnptr_to_fnentry)(
874                                              &add_one_Jcc_untaken ),
875                                           mkIRExprVec_0() );
876                else
877                   di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc_untaken",
878                                           VG_(fnptr_to_fnentry)(
879                                              &add_one_inverted_Jcc_untaken ),
880                                           mkIRExprVec_0() );
881 
882                addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
883             }
884             break;
885 
886          default:
887             tl_assert(0);
888       }
889    }
890 
891    if (clo_basic_counts) {
892       /* Count this basic block. */
893       di = unsafeIRDirty_0_N( 0, "add_one_SB_completed",
894                                  VG_(fnptr_to_fnentry)( &add_one_SB_completed ),
895                                  mkIRExprVec_0() );
896       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
897    }
898 
899    if (clo_trace_mem) {
900       /* At the end of the sbIn.  Flush outstandings. */
901       flushEvents(sbOut);
902    }
903 
904    return sbOut;
905 }
906 
lk_fini(Int exitcode)907 static void lk_fini(Int exitcode)
908 {
909    char percentify_buf[4]; /* Two digits, '%' and 0. */
910    const int percentify_size = sizeof(percentify_buf);
911    const int percentify_decs = 0;
912 
913    tl_assert(clo_fnname);
914    tl_assert(clo_fnname[0]);
915 
916    if (clo_basic_counts) {
917       ULong total_Jccs = n_Jccs + n_IJccs;
918       ULong taken_Jccs = (n_Jccs - n_Jccs_untaken) + n_IJccs_untaken;
919 
920       VG_(umsg)("Counted %'llu call%s to %s()\n",
921                 n_func_calls, ( n_func_calls==1 ? "" : "s" ), clo_fnname);
922 
923       VG_(umsg)("\n");
924       VG_(umsg)("Jccs:\n");
925       VG_(umsg)("  total:         %'llu\n", total_Jccs);
926       VG_(percentify)(taken_Jccs, (total_Jccs ? total_Jccs : 1),
927          percentify_decs, percentify_size, percentify_buf);
928       VG_(umsg)("  taken:         %'llu (%s)\n",
929          taken_Jccs, percentify_buf);
930 
931       VG_(umsg)("\n");
932       VG_(umsg)("Executed:\n");
933       VG_(umsg)("  SBs entered:   %'llu\n", n_SBs_entered);
934       VG_(umsg)("  SBs completed: %'llu\n", n_SBs_completed);
935       VG_(umsg)("  guest instrs:  %'llu\n", n_guest_instrs);
936       VG_(umsg)("  IRStmts:       %'llu\n", n_IRStmts);
937 
938       VG_(umsg)("\n");
939       VG_(umsg)("Ratios:\n");
940       tl_assert(n_SBs_entered); // Paranoia time.
941       VG_(umsg)("  guest instrs : SB entered  = %'llu : 10\n",
942          10 * n_guest_instrs / n_SBs_entered);
943       VG_(umsg)("       IRStmts : SB entered  = %'llu : 10\n",
944          10 * n_IRStmts / n_SBs_entered);
945       tl_assert(n_guest_instrs); // Paranoia time.
946       VG_(umsg)("       IRStmts : guest instr = %'llu : 10\n",
947          10 * n_IRStmts / n_guest_instrs);
948    }
949 
950    if (clo_detailed_counts) {
951       VG_(umsg)("\n");
952       VG_(umsg)("IR-level counts by type:\n");
953       print_details();
954    }
955 
956    if (clo_basic_counts) {
957       VG_(umsg)("\n");
958       VG_(umsg)("Exit code:       %d\n", exitcode);
959    }
960 }
961 
lk_pre_clo_init(void)962 static void lk_pre_clo_init(void)
963 {
964    VG_(details_name)            ("Lackey");
965    VG_(details_version)         (NULL);
966    VG_(details_description)     ("an example Valgrind tool");
967    VG_(details_copyright_author)(
968       "Copyright (C) 2002-2010, and GNU GPL'd, by Nicholas Nethercote.");
969    VG_(details_bug_reports_to)  (VG_BUGS_TO);
970    VG_(details_avg_translation_sizeB) ( 200 );
971 
972    VG_(basic_tool_funcs)          (lk_post_clo_init,
973                                    lk_instrument,
974                                    lk_fini);
975    VG_(needs_command_line_options)(lk_process_cmd_line_option,
976                                    lk_print_usage,
977                                    lk_print_debug_usage);
978 }
979 
980 VG_DETERMINE_INTERFACE_VERSION(lk_pre_clo_init)
981 
982 /*--------------------------------------------------------------------*/
983 /*--- end                                                lk_main.c ---*/
984 /*--------------------------------------------------------------------*/
985