1
2 /*--------------------------------------------------------------------*/
3 /*--- An example Valgrind tool. lk_main.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Lackey, an example Valgrind tool that does
8 some simple program measurement and tracing.
9
10 Copyright (C) 2002-2011 Nicholas Nethercote
11 njn@valgrind.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 // This tool shows how to do some basic instrumentation.
32 //
33 // There are four kinds of instrumentation it can do. They can be turned
34 // on/off independently with command line options:
35 //
36 // * --basic-counts : do basic counts, eg. number of instructions
37 // executed, jumps executed, etc.
38 // * --detailed-counts: do more detailed counts: number of loads, stores
39 // and ALU operations of different sizes.
40 // * --trace-mem=yes: trace all (data) memory accesses.
41 // * --trace-superblocks=yes:
42 // trace all superblock entries. Mostly of interest
43 // to the Valgrind developers.
44 //
45 // The code for each kind of instrumentation is guarded by a clo_* variable:
46 // clo_basic_counts, clo_detailed_counts, clo_trace_mem and clo_trace_sbs.
47 //
48 // If you want to modify any of the instrumentation code, look for the code
49 // that is guarded by the relevant clo_* variable (eg. clo_trace_mem)
50 // If you're not interested in the other kinds of instrumentation you can
51 // remove them. If you want to do more complex modifications, please read
52 // VEX/pub/libvex_ir.h to understand the intermediate representation.
53 //
54 //
55 // Specific Details about --trace-mem=yes
56 // --------------------------------------
57 // Lackey's --trace-mem code is a good starting point for building Valgrind
58 // tools that act on memory loads and stores. It also could be used as is,
59 // with its output used as input to a post-mortem processing step. However,
60 // because memory traces can be very large, online analysis is generally
61 // better.
62 //
63 // It prints memory data access traces that look like this:
64 //
65 // I 0023C790,2 # instruction read at 0x0023C790 of size 2
66 // I 0023C792,5
67 // S BE80199C,4 # data store at 0xBE80199C of size 4
68 // I 0025242B,3
69 // L BE801950,4 # data load at 0xBE801950 of size 4
70 // I 0023D476,7
71 // M 0025747C,1 # data modify at 0x0025747C of size 1
72 // I 0023DC20,2
73 // L 00254962,1
74 // L BE801FB3,1
75 // I 00252305,1
76 // L 00254AEB,1
77 // S 00257998,1
78 //
79 // Every instruction executed has an "instr" event representing it.
80 // Instructions that do memory accesses are followed by one or more "load",
81 // "store" or "modify" events. Some instructions do more than one load or
82 // store, as in the last two examples in the above trace.
83 //
84 // Here are some examples of x86 instructions that do different combinations
85 // of loads, stores, and modifies.
86 //
87 // Instruction Memory accesses Event sequence
88 // ----------- --------------- --------------
89 // add %eax, %ebx No loads or stores instr
90 //
91 // movl (%eax), %ebx loads (%eax) instr, load
92 //
93 // movl %eax, (%ebx) stores (%ebx) instr, store
94 //
95 // incl (%ecx) modifies (%ecx) instr, modify
96 //
97 // cmpsb loads (%esi), loads(%edi) instr, load, load
98 //
99 // call*l (%edx) loads (%edx), stores -4(%esp) instr, load, store
100 // pushl (%edx) loads (%edx), stores -4(%esp) instr, load, store
101 // movsw loads (%esi), stores (%edi) instr, load, store
102 //
103 // Instructions using x86 "rep" prefixes are traced as if they are repeated
104 // N times.
105 //
106 // Lackey with --trace-mem gives good traces, but they are not perfect, for
107 // the following reasons:
108 //
109 // - It does not trace into the OS kernel, so system calls and other kernel
110 // operations (eg. some scheduling and signal handling code) are ignored.
111 //
112 // - It could model loads and stores done at the system call boundary using
113 // the pre_mem_read/post_mem_write events. For example, if you call
114 // fstat() you know that the passed in buffer has been written. But it
115 // currently does not do this.
116 //
117 // - Valgrind replaces some code (not much) with its own, notably parts of
118 // code for scheduling operations and signal handling. This code is not
119 // traced.
120 //
121 // - There is no consideration of virtual-to-physical address mapping.
122 // This may not matter for many purposes.
123 //
124 // - Valgrind modifies the instruction stream in some very minor ways. For
125 // example, on x86 the bts, btc, btr instructions are incorrectly
126 // considered to always touch memory (this is a consequence of these
127 // instructions being very difficult to simulate).
128 //
129 // - Valgrind tools layout memory differently to normal programs, so the
130 // addresses you get will not be typical. Thus Lackey (and all Valgrind
131 // tools) is suitable for getting relative memory traces -- eg. if you
132 // want to analyse locality of memory accesses -- but is not good if
133 // absolute addresses are important.
134 //
135 // Despite all these warnings, Lackey's results should be good enough for a
136 // wide range of purposes. For example, Cachegrind shares all the above
137 // shortcomings and it is still useful.
138 //
139 // For further inspiration, you should look at cachegrind/cg_main.c which
140 // uses the same basic technique for tracing memory accesses, but also groups
141 // events together for processing into twos and threes so that fewer C calls
142 // are made and things run faster.
143 //
144 // Specific Details about --trace-superblocks=yes
145 // ----------------------------------------------
146 // Valgrind splits code up into single entry, multiple exit blocks
147 // known as superblocks. By itself, --trace-superblocks=yes just
148 // prints a message as each superblock is run:
149 //
150 // SB 04013170
151 // SB 04013177
152 // SB 04013173
153 // SB 04013177
154 //
155 // The hex number is the address of the first instruction in the
156 // superblock. You can see the relationship more obviously if you use
157 // --trace-superblocks=yes and --trace-mem=yes together. Then a "SB"
158 // message at address X is immediately followed by an "instr:" message
159 // for that address, as the first instruction in the block is
160 // executed, for example:
161 //
162 // SB 04014073
163 // I 04014073,3
164 // L 7FEFFF7F8,8
165 // I 04014076,4
166 // I 0401407A,3
167 // I 0401407D,3
168 // I 04014080,3
169 // I 04014083,6
170
171
172 #include "pub_tool_basics.h"
173 #include "pub_tool_tooliface.h"
174 #include "pub_tool_libcassert.h"
175 #include "pub_tool_libcprint.h"
176 #include "pub_tool_debuginfo.h"
177 #include "pub_tool_libcbase.h"
178 #include "pub_tool_options.h"
179 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
180
181 /*------------------------------------------------------------*/
182 /*--- Command line options ---*/
183 /*------------------------------------------------------------*/
184
185 /* Command line options controlling instrumentation kinds, as described at
186 * the top of this file. */
187 static Bool clo_basic_counts = True;
188 static Bool clo_detailed_counts = False;
189 static Bool clo_trace_mem = False;
190 static Bool clo_trace_sbs = False;
191
192 /* The name of the function of which the number of calls (under
193 * --basic-counts=yes) is to be counted, with default. Override with command
194 * line option --fnname. */
195 static Char* clo_fnname = "main";
196
lk_process_cmd_line_option(Char * arg)197 static Bool lk_process_cmd_line_option(Char* arg)
198 {
199 if VG_STR_CLO(arg, "--fnname", clo_fnname) {}
200 else if VG_BOOL_CLO(arg, "--basic-counts", clo_basic_counts) {}
201 else if VG_BOOL_CLO(arg, "--detailed-counts", clo_detailed_counts) {}
202 else if VG_BOOL_CLO(arg, "--trace-mem", clo_trace_mem) {}
203 else if VG_BOOL_CLO(arg, "--trace-superblocks", clo_trace_sbs) {}
204 else
205 return False;
206
207 tl_assert(clo_fnname);
208 tl_assert(clo_fnname[0]);
209 return True;
210 }
211
lk_print_usage(void)212 static void lk_print_usage(void)
213 {
214 VG_(printf)(
215 " --basic-counts=no|yes count instructions, jumps, etc. [yes]\n"
216 " --detailed-counts=no|yes count loads, stores and alu ops [no]\n"
217 " --trace-mem=no|yes trace all loads and stores [no]\n"
218 " --trace-superblocks=no|yes trace all superblock entries [no]\n"
219 " --fnname=<name> count calls to <name> (only used if\n"
220 " --basic-count=yes) [main]\n"
221 );
222 }
223
lk_print_debug_usage(void)224 static void lk_print_debug_usage(void)
225 {
226 VG_(printf)(
227 " (none)\n"
228 );
229 }
230
231 /*------------------------------------------------------------*/
232 /*--- Stuff for --basic-counts ---*/
233 /*------------------------------------------------------------*/
234
235 /* Nb: use ULongs because the numbers can get very big */
236 static ULong n_func_calls = 0;
237 static ULong n_SBs_entered = 0;
238 static ULong n_SBs_completed = 0;
239 static ULong n_IRStmts = 0;
240 static ULong n_guest_instrs = 0;
241 static ULong n_Jccs = 0;
242 static ULong n_Jccs_untaken = 0;
243 static ULong n_IJccs = 0;
244 static ULong n_IJccs_untaken = 0;
245
add_one_func_call(void)246 static void add_one_func_call(void)
247 {
248 n_func_calls++;
249 }
250
add_one_SB_entered(void)251 static void add_one_SB_entered(void)
252 {
253 n_SBs_entered++;
254 }
255
add_one_SB_completed(void)256 static void add_one_SB_completed(void)
257 {
258 n_SBs_completed++;
259 }
260
add_one_IRStmt(void)261 static void add_one_IRStmt(void)
262 {
263 n_IRStmts++;
264 }
265
add_one_guest_instr(void)266 static void add_one_guest_instr(void)
267 {
268 n_guest_instrs++;
269 }
270
add_one_Jcc(void)271 static void add_one_Jcc(void)
272 {
273 n_Jccs++;
274 }
275
add_one_Jcc_untaken(void)276 static void add_one_Jcc_untaken(void)
277 {
278 n_Jccs_untaken++;
279 }
280
add_one_inverted_Jcc(void)281 static void add_one_inverted_Jcc(void)
282 {
283 n_IJccs++;
284 }
285
add_one_inverted_Jcc_untaken(void)286 static void add_one_inverted_Jcc_untaken(void)
287 {
288 n_IJccs_untaken++;
289 }
290
291 /*------------------------------------------------------------*/
292 /*--- Stuff for --detailed-counts ---*/
293 /*------------------------------------------------------------*/
294
295 /* --- Operations --- */
296
297 typedef enum { OpLoad=0, OpStore=1, OpAlu=2 } Op;
298
299 #define N_OPS 3
300
301
302 /* --- Types --- */
303
304 #define N_TYPES 10
305
type2index(IRType ty)306 static Int type2index ( IRType ty )
307 {
308 switch (ty) {
309 case Ity_I1: return 0;
310 case Ity_I8: return 1;
311 case Ity_I16: return 2;
312 case Ity_I32: return 3;
313 case Ity_I64: return 4;
314 case Ity_I128: return 5;
315 case Ity_F32: return 6;
316 case Ity_F64: return 7;
317 case Ity_F128: return 8;
318 case Ity_V128: return 9;
319 default: tl_assert(0);
320 }
321 }
322
nameOfTypeIndex(Int i)323 static HChar* nameOfTypeIndex ( Int i )
324 {
325 switch (i) {
326 case 0: return "I1"; break;
327 case 1: return "I8"; break;
328 case 2: return "I16"; break;
329 case 3: return "I32"; break;
330 case 4: return "I64"; break;
331 case 5: return "I128"; break;
332 case 6: return "F32"; break;
333 case 7: return "F64"; break;
334 case 8: return "F128"; break;
335 case 9: return "V128"; break;
336 default: tl_assert(0);
337 }
338 }
339
340
341 /* --- Counts --- */
342
343 static ULong detailCounts[N_OPS][N_TYPES];
344
345 /* The helper that is called from the instrumented code. */
346 static VG_REGPARM(1)
increment_detail(ULong * detail)347 void increment_detail(ULong* detail)
348 {
349 (*detail)++;
350 }
351
352 /* A helper that adds the instrumentation for a detail. */
instrument_detail(IRSB * sb,Op op,IRType type)353 static void instrument_detail(IRSB* sb, Op op, IRType type)
354 {
355 IRDirty* di;
356 IRExpr** argv;
357 const UInt typeIx = type2index(type);
358
359 tl_assert(op < N_OPS);
360 tl_assert(typeIx < N_TYPES);
361
362 argv = mkIRExprVec_1( mkIRExpr_HWord( (HWord)&detailCounts[op][typeIx] ) );
363 di = unsafeIRDirty_0_N( 1, "increment_detail",
364 VG_(fnptr_to_fnentry)( &increment_detail ),
365 argv);
366 addStmtToIRSB( sb, IRStmt_Dirty(di) );
367 }
368
369 /* Summarize and print the details. */
print_details(void)370 static void print_details ( void )
371 {
372 Int typeIx;
373 VG_(umsg)(" Type Loads Stores AluOps\n");
374 VG_(umsg)(" -------------------------------------------\n");
375 for (typeIx = 0; typeIx < N_TYPES; typeIx++) {
376 VG_(umsg)(" %4s %'12llu %'12llu %'12llu\n",
377 nameOfTypeIndex( typeIx ),
378 detailCounts[OpLoad ][typeIx],
379 detailCounts[OpStore][typeIx],
380 detailCounts[OpAlu ][typeIx]
381 );
382 }
383 }
384
385
386 /*------------------------------------------------------------*/
387 /*--- Stuff for --trace-mem ---*/
388 /*------------------------------------------------------------*/
389
390 #define MAX_DSIZE 512
391
392 typedef
393 IRExpr
394 IRAtom;
395
396 typedef
397 enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm }
398 EventKind;
399
400 typedef
401 struct {
402 EventKind ekind;
403 IRAtom* addr;
404 Int size;
405 }
406 Event;
407
408 /* Up to this many unnotified events are allowed. Must be at least two,
409 so that reads and writes to the same address can be merged into a modify.
410 Beyond that, larger numbers just potentially induce more spilling due to
411 extending live ranges of address temporaries. */
412 #define N_EVENTS 4
413
414 /* Maintain an ordered list of memory events which are outstanding, in
415 the sense that no IR has yet been generated to do the relevant
416 helper calls. The SB is scanned top to bottom and memory events
417 are added to the end of the list, merging with the most recent
418 notified event where possible (Dw immediately following Dr and
419 having the same size and EA can be merged).
420
421 This merging is done so that for architectures which have
422 load-op-store instructions (x86, amd64), the instr is treated as if
423 it makes just one memory reference (a modify), rather than two (a
424 read followed by a write at the same address).
425
426 At various points the list will need to be flushed, that is, IR
427 generated from it. That must happen before any possible exit from
428 the block (the end, or an IRStmt_Exit). Flushing also takes place
429 when there is no space to add a new event.
430
431 If we require the simulation statistics to be up to date with
432 respect to possible memory exceptions, then the list would have to
433 be flushed before each memory reference. That's a pain so we don't
434 bother.
435
436 Flushing the list consists of walking it start to end and emitting
437 instrumentation IR for each event, in the order in which they
438 appear. */
439
440 static Event events[N_EVENTS];
441 static Int events_used = 0;
442
443
trace_instr(Addr addr,SizeT size)444 static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size)
445 {
446 VG_(printf)("I %08lx,%lu\n", addr, size);
447 }
448
trace_load(Addr addr,SizeT size)449 static VG_REGPARM(2) void trace_load(Addr addr, SizeT size)
450 {
451 VG_(printf)(" L %08lx,%lu\n", addr, size);
452 }
453
trace_store(Addr addr,SizeT size)454 static VG_REGPARM(2) void trace_store(Addr addr, SizeT size)
455 {
456 VG_(printf)(" S %08lx,%lu\n", addr, size);
457 }
458
trace_modify(Addr addr,SizeT size)459 static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size)
460 {
461 VG_(printf)(" M %08lx,%lu\n", addr, size);
462 }
463
464
flushEvents(IRSB * sb)465 static void flushEvents(IRSB* sb)
466 {
467 Int i;
468 Char* helperName;
469 void* helperAddr;
470 IRExpr** argv;
471 IRDirty* di;
472 Event* ev;
473
474 for (i = 0; i < events_used; i++) {
475
476 ev = &events[i];
477
478 // Decide on helper fn to call and args to pass it.
479 switch (ev->ekind) {
480 case Event_Ir: helperName = "trace_instr";
481 helperAddr = trace_instr; break;
482
483 case Event_Dr: helperName = "trace_load";
484 helperAddr = trace_load; break;
485
486 case Event_Dw: helperName = "trace_store";
487 helperAddr = trace_store; break;
488
489 case Event_Dm: helperName = "trace_modify";
490 helperAddr = trace_modify; break;
491 default:
492 tl_assert(0);
493 }
494
495 // Add the helper.
496 argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) );
497 di = unsafeIRDirty_0_N( /*regparms*/2,
498 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
499 argv );
500 addStmtToIRSB( sb, IRStmt_Dirty(di) );
501 }
502
503 events_used = 0;
504 }
505
506 // WARNING: If you aren't interested in instruction reads, you can omit the
507 // code that adds calls to trace_instr() in flushEvents(). However, you
508 // must still call this function, addEvent_Ir() -- it is necessary to add
509 // the Ir events to the events list so that merging of paired load/store
510 // events into modify events works correctly.
addEvent_Ir(IRSB * sb,IRAtom * iaddr,UInt isize)511 static void addEvent_Ir ( IRSB* sb, IRAtom* iaddr, UInt isize )
512 {
513 Event* evt;
514 tl_assert(clo_trace_mem);
515 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
516 || VG_CLREQ_SZB == isize );
517 if (events_used == N_EVENTS)
518 flushEvents(sb);
519 tl_assert(events_used >= 0 && events_used < N_EVENTS);
520 evt = &events[events_used];
521 evt->ekind = Event_Ir;
522 evt->addr = iaddr;
523 evt->size = isize;
524 events_used++;
525 }
526
527 static
addEvent_Dr(IRSB * sb,IRAtom * daddr,Int dsize)528 void addEvent_Dr ( IRSB* sb, IRAtom* daddr, Int dsize )
529 {
530 Event* evt;
531 tl_assert(clo_trace_mem);
532 tl_assert(isIRAtom(daddr));
533 tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
534 if (events_used == N_EVENTS)
535 flushEvents(sb);
536 tl_assert(events_used >= 0 && events_used < N_EVENTS);
537 evt = &events[events_used];
538 evt->ekind = Event_Dr;
539 evt->addr = daddr;
540 evt->size = dsize;
541 events_used++;
542 }
543
544 static
addEvent_Dw(IRSB * sb,IRAtom * daddr,Int dsize)545 void addEvent_Dw ( IRSB* sb, IRAtom* daddr, Int dsize )
546 {
547 Event* lastEvt;
548 Event* evt;
549 tl_assert(clo_trace_mem);
550 tl_assert(isIRAtom(daddr));
551 tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
552
553 // Is it possible to merge this write with the preceding read?
554 lastEvt = &events[events_used-1];
555 if (events_used > 0
556 && lastEvt->ekind == Event_Dr
557 && lastEvt->size == dsize
558 && eqIRAtom(lastEvt->addr, daddr))
559 {
560 lastEvt->ekind = Event_Dm;
561 return;
562 }
563
564 // No. Add as normal.
565 if (events_used == N_EVENTS)
566 flushEvents(sb);
567 tl_assert(events_used >= 0 && events_used < N_EVENTS);
568 evt = &events[events_used];
569 evt->ekind = Event_Dw;
570 evt->size = dsize;
571 evt->addr = daddr;
572 events_used++;
573 }
574
575
576 /*------------------------------------------------------------*/
577 /*--- Stuff for --trace-superblocks ---*/
578 /*------------------------------------------------------------*/
579
trace_superblock(Addr addr)580 static void trace_superblock(Addr addr)
581 {
582 VG_(printf)("SB %08lx\n", addr);
583 }
584
585
586 /*------------------------------------------------------------*/
587 /*--- Basic tool functions ---*/
588 /*------------------------------------------------------------*/
589
lk_post_clo_init(void)590 static void lk_post_clo_init(void)
591 {
592 Int op, tyIx;
593
594 if (clo_detailed_counts) {
595 for (op = 0; op < N_OPS; op++)
596 for (tyIx = 0; tyIx < N_TYPES; tyIx++)
597 detailCounts[op][tyIx] = 0;
598 }
599 }
600
601 static
lk_instrument(VgCallbackClosure * closure,IRSB * sbIn,VexGuestLayout * layout,VexGuestExtents * vge,IRType gWordTy,IRType hWordTy)602 IRSB* lk_instrument ( VgCallbackClosure* closure,
603 IRSB* sbIn,
604 VexGuestLayout* layout,
605 VexGuestExtents* vge,
606 IRType gWordTy, IRType hWordTy )
607 {
608 IRDirty* di;
609 Int i;
610 IRSB* sbOut;
611 Char fnname[100];
612 IRType type;
613 IRTypeEnv* tyenv = sbIn->tyenv;
614 Addr iaddr = 0, dst;
615 UInt ilen = 0;
616 Bool condition_inverted = False;
617
618 if (gWordTy != hWordTy) {
619 /* We don't currently support this case. */
620 VG_(tool_panic)("host/guest word size mismatch");
621 }
622
623 /* Set up SB */
624 sbOut = deepCopyIRSBExceptStmts(sbIn);
625
626 // Copy verbatim any IR preamble preceding the first IMark
627 i = 0;
628 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
629 addStmtToIRSB( sbOut, sbIn->stmts[i] );
630 i++;
631 }
632
633 if (clo_basic_counts) {
634 /* Count this superblock. */
635 di = unsafeIRDirty_0_N( 0, "add_one_SB_entered",
636 VG_(fnptr_to_fnentry)( &add_one_SB_entered ),
637 mkIRExprVec_0() );
638 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
639 }
640
641 if (clo_trace_sbs) {
642 /* Print this superblock's address. */
643 di = unsafeIRDirty_0_N(
644 0, "trace_superblock",
645 VG_(fnptr_to_fnentry)( &trace_superblock ),
646 mkIRExprVec_1( mkIRExpr_HWord( vge->base[0] ) )
647 );
648 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
649 }
650
651 if (clo_trace_mem) {
652 events_used = 0;
653 }
654
655 for (/*use current i*/; i < sbIn->stmts_used; i++) {
656 IRStmt* st = sbIn->stmts[i];
657 if (!st || st->tag == Ist_NoOp) continue;
658
659 if (clo_basic_counts) {
660 /* Count one VEX statement. */
661 di = unsafeIRDirty_0_N( 0, "add_one_IRStmt",
662 VG_(fnptr_to_fnentry)( &add_one_IRStmt ),
663 mkIRExprVec_0() );
664 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
665 }
666
667 switch (st->tag) {
668 case Ist_NoOp:
669 case Ist_AbiHint:
670 case Ist_Put:
671 case Ist_PutI:
672 case Ist_MBE:
673 addStmtToIRSB( sbOut, st );
674 break;
675
676 case Ist_IMark:
677 if (clo_basic_counts) {
678 /* Needed to be able to check for inverted condition in Ist_Exit */
679 iaddr = st->Ist.IMark.addr;
680 ilen = st->Ist.IMark.len;
681
682 /* Count guest instruction. */
683 di = unsafeIRDirty_0_N( 0, "add_one_guest_instr",
684 VG_(fnptr_to_fnentry)( &add_one_guest_instr ),
685 mkIRExprVec_0() );
686 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
687
688 /* An unconditional branch to a known destination in the
689 * guest's instructions can be represented, in the IRSB to
690 * instrument, by the VEX statements that are the
691 * translation of that known destination. This feature is
692 * called 'SB chasing' and can be influenced by command
693 * line option --vex-guest-chase-thresh.
694 *
695 * To get an accurate count of the calls to a specific
696 * function, taking SB chasing into account, we need to
697 * check for each guest instruction (Ist_IMark) if it is
698 * the entry point of a function.
699 */
700 tl_assert(clo_fnname);
701 tl_assert(clo_fnname[0]);
702 if (VG_(get_fnname_if_entry)(st->Ist.IMark.addr,
703 fnname, sizeof(fnname))
704 && 0 == VG_(strcmp)(fnname, clo_fnname)) {
705 di = unsafeIRDirty_0_N(
706 0, "add_one_func_call",
707 VG_(fnptr_to_fnentry)( &add_one_func_call ),
708 mkIRExprVec_0() );
709 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
710 }
711 }
712 if (clo_trace_mem) {
713 // WARNING: do not remove this function call, even if you
714 // aren't interested in instruction reads. See the comment
715 // above the function itself for more detail.
716 addEvent_Ir( sbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ),
717 st->Ist.IMark.len );
718 }
719 addStmtToIRSB( sbOut, st );
720 break;
721
722 case Ist_WrTmp:
723 // Add a call to trace_load() if --trace-mem=yes.
724 if (clo_trace_mem) {
725 IRExpr* data = st->Ist.WrTmp.data;
726 if (data->tag == Iex_Load) {
727 addEvent_Dr( sbOut, data->Iex.Load.addr,
728 sizeofIRType(data->Iex.Load.ty) );
729 }
730 }
731 if (clo_detailed_counts) {
732 IRExpr* expr = st->Ist.WrTmp.data;
733 type = typeOfIRExpr(sbOut->tyenv, expr);
734 tl_assert(type != Ity_INVALID);
735 switch (expr->tag) {
736 case Iex_Load:
737 instrument_detail( sbOut, OpLoad, type );
738 break;
739 case Iex_Unop:
740 case Iex_Binop:
741 case Iex_Triop:
742 case Iex_Qop:
743 case Iex_Mux0X:
744 instrument_detail( sbOut, OpAlu, type );
745 break;
746 default:
747 break;
748 }
749 }
750 addStmtToIRSB( sbOut, st );
751 break;
752
753 case Ist_Store:
754 if (clo_trace_mem) {
755 IRExpr* data = st->Ist.Store.data;
756 addEvent_Dw( sbOut, st->Ist.Store.addr,
757 sizeofIRType(typeOfIRExpr(tyenv, data)) );
758 }
759 if (clo_detailed_counts) {
760 type = typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data);
761 tl_assert(type != Ity_INVALID);
762 instrument_detail( sbOut, OpStore, type );
763 }
764 addStmtToIRSB( sbOut, st );
765 break;
766
767 case Ist_Dirty: {
768 if (clo_trace_mem) {
769 Int dsize;
770 IRDirty* d = st->Ist.Dirty.details;
771 if (d->mFx != Ifx_None) {
772 // This dirty helper accesses memory. Collect the details.
773 tl_assert(d->mAddr != NULL);
774 tl_assert(d->mSize != 0);
775 dsize = d->mSize;
776 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
777 addEvent_Dr( sbOut, d->mAddr, dsize );
778 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
779 addEvent_Dw( sbOut, d->mAddr, dsize );
780 } else {
781 tl_assert(d->mAddr == NULL);
782 tl_assert(d->mSize == 0);
783 }
784 }
785 addStmtToIRSB( sbOut, st );
786 break;
787 }
788
789 case Ist_CAS: {
790 /* We treat it as a read and a write of the location. I
791 think that is the same behaviour as it was before IRCAS
792 was introduced, since prior to that point, the Vex
793 front ends would translate a lock-prefixed instruction
794 into a (normal) read followed by a (normal) write. */
795 Int dataSize;
796 IRType dataTy;
797 IRCAS* cas = st->Ist.CAS.details;
798 tl_assert(cas->addr != NULL);
799 tl_assert(cas->dataLo != NULL);
800 dataTy = typeOfIRExpr(tyenv, cas->dataLo);
801 dataSize = sizeofIRType(dataTy);
802 if (cas->dataHi != NULL)
803 dataSize *= 2; /* since it's a doubleword-CAS */
804 if (clo_trace_mem) {
805 addEvent_Dr( sbOut, cas->addr, dataSize );
806 addEvent_Dw( sbOut, cas->addr, dataSize );
807 }
808 if (clo_detailed_counts) {
809 instrument_detail( sbOut, OpLoad, dataTy );
810 if (cas->dataHi != NULL) /* dcas */
811 instrument_detail( sbOut, OpLoad, dataTy );
812 instrument_detail( sbOut, OpStore, dataTy );
813 if (cas->dataHi != NULL) /* dcas */
814 instrument_detail( sbOut, OpStore, dataTy );
815 }
816 addStmtToIRSB( sbOut, st );
817 break;
818 }
819
820 case Ist_LLSC: {
821 IRType dataTy;
822 if (st->Ist.LLSC.storedata == NULL) {
823 /* LL */
824 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
825 if (clo_trace_mem)
826 addEvent_Dr( sbOut, st->Ist.LLSC.addr,
827 sizeofIRType(dataTy) );
828 if (clo_detailed_counts)
829 instrument_detail( sbOut, OpLoad, dataTy );
830 } else {
831 /* SC */
832 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
833 if (clo_trace_mem)
834 addEvent_Dw( sbOut, st->Ist.LLSC.addr,
835 sizeofIRType(dataTy) );
836 if (clo_detailed_counts)
837 instrument_detail( sbOut, OpStore, dataTy );
838 }
839 addStmtToIRSB( sbOut, st );
840 break;
841 }
842
843 case Ist_Exit:
844 if (clo_basic_counts) {
845 // The condition of a branch was inverted by VEX if a taken
846 // branch is in fact a fall trough according to client address
847 tl_assert(iaddr != 0);
848 dst = (sizeof(Addr) == 4) ? st->Ist.Exit.dst->Ico.U32 :
849 st->Ist.Exit.dst->Ico.U64;
850 condition_inverted = (dst == iaddr + ilen);
851
852 /* Count Jcc */
853 if (!condition_inverted)
854 di = unsafeIRDirty_0_N( 0, "add_one_Jcc",
855 VG_(fnptr_to_fnentry)( &add_one_Jcc ),
856 mkIRExprVec_0() );
857 else
858 di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc",
859 VG_(fnptr_to_fnentry)(
860 &add_one_inverted_Jcc ),
861 mkIRExprVec_0() );
862
863 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
864 }
865 if (clo_trace_mem) {
866 flushEvents(sbOut);
867 }
868
869 addStmtToIRSB( sbOut, st ); // Original statement
870
871 if (clo_basic_counts) {
872 /* Count non-taken Jcc */
873 if (!condition_inverted)
874 di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken",
875 VG_(fnptr_to_fnentry)(
876 &add_one_Jcc_untaken ),
877 mkIRExprVec_0() );
878 else
879 di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc_untaken",
880 VG_(fnptr_to_fnentry)(
881 &add_one_inverted_Jcc_untaken ),
882 mkIRExprVec_0() );
883
884 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
885 }
886 break;
887
888 default:
889 tl_assert(0);
890 }
891 }
892
893 if (clo_basic_counts) {
894 /* Count this basic block. */
895 di = unsafeIRDirty_0_N( 0, "add_one_SB_completed",
896 VG_(fnptr_to_fnentry)( &add_one_SB_completed ),
897 mkIRExprVec_0() );
898 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
899 }
900
901 if (clo_trace_mem) {
902 /* At the end of the sbIn. Flush outstandings. */
903 flushEvents(sbOut);
904 }
905
906 return sbOut;
907 }
908
lk_fini(Int exitcode)909 static void lk_fini(Int exitcode)
910 {
911 char percentify_buf[5]; /* Two digits, '%' and 0. */
912 const int percentify_size = sizeof(percentify_buf) - 1;
913 const int percentify_decs = 0;
914
915 tl_assert(clo_fnname);
916 tl_assert(clo_fnname[0]);
917
918 if (clo_basic_counts) {
919 ULong total_Jccs = n_Jccs + n_IJccs;
920 ULong taken_Jccs = (n_Jccs - n_Jccs_untaken) + n_IJccs_untaken;
921
922 VG_(umsg)("Counted %'llu call%s to %s()\n",
923 n_func_calls, ( n_func_calls==1 ? "" : "s" ), clo_fnname);
924
925 VG_(umsg)("\n");
926 VG_(umsg)("Jccs:\n");
927 VG_(umsg)(" total: %'llu\n", total_Jccs);
928 VG_(percentify)(taken_Jccs, (total_Jccs ? total_Jccs : 1),
929 percentify_decs, percentify_size, percentify_buf);
930 VG_(umsg)(" taken: %'llu (%s)\n",
931 taken_Jccs, percentify_buf);
932
933 VG_(umsg)("\n");
934 VG_(umsg)("Executed:\n");
935 VG_(umsg)(" SBs entered: %'llu\n", n_SBs_entered);
936 VG_(umsg)(" SBs completed: %'llu\n", n_SBs_completed);
937 VG_(umsg)(" guest instrs: %'llu\n", n_guest_instrs);
938 VG_(umsg)(" IRStmts: %'llu\n", n_IRStmts);
939
940 VG_(umsg)("\n");
941 VG_(umsg)("Ratios:\n");
942 tl_assert(n_SBs_entered); // Paranoia time.
943 VG_(umsg)(" guest instrs : SB entered = %'llu : 10\n",
944 10 * n_guest_instrs / n_SBs_entered);
945 VG_(umsg)(" IRStmts : SB entered = %'llu : 10\n",
946 10 * n_IRStmts / n_SBs_entered);
947 tl_assert(n_guest_instrs); // Paranoia time.
948 VG_(umsg)(" IRStmts : guest instr = %'llu : 10\n",
949 10 * n_IRStmts / n_guest_instrs);
950 }
951
952 if (clo_detailed_counts) {
953 VG_(umsg)("\n");
954 VG_(umsg)("IR-level counts by type:\n");
955 print_details();
956 }
957
958 if (clo_basic_counts) {
959 VG_(umsg)("\n");
960 VG_(umsg)("Exit code: %d\n", exitcode);
961 }
962 }
963
lk_pre_clo_init(void)964 static void lk_pre_clo_init(void)
965 {
966 VG_(details_name) ("Lackey");
967 VG_(details_version) (NULL);
968 VG_(details_description) ("an example Valgrind tool");
969 VG_(details_copyright_author)(
970 "Copyright (C) 2002-2011, and GNU GPL'd, by Nicholas Nethercote.");
971 VG_(details_bug_reports_to) (VG_BUGS_TO);
972 VG_(details_avg_translation_sizeB) ( 200 );
973
974 VG_(basic_tool_funcs) (lk_post_clo_init,
975 lk_instrument,
976 lk_fini);
977 VG_(needs_command_line_options)(lk_process_cmd_line_option,
978 lk_print_usage,
979 lk_print_debug_usage);
980 }
981
982 VG_DETERMINE_INTERFACE_VERSION(lk_pre_clo_init)
983
984 /*--------------------------------------------------------------------*/
985 /*--- end lk_main.c ---*/
986 /*--------------------------------------------------------------------*/
987