1
2 /*--------------------------------------------------------------------*/
3 /*--- An example Valgrind tool. lk_main.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Lackey, an example Valgrind tool that does
8 some simple program measurement and tracing.
9
10 Copyright (C) 2002-2010 Nicholas Nethercote
11 njn@valgrind.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 // This tool shows how to do some basic instrumentation.
32 //
33 // There are four kinds of instrumentation it can do. They can be turned
34 // on/off independently with command line options:
35 //
36 // * --basic-counts : do basic counts, eg. number of instructions
37 // executed, jumps executed, etc.
38 // * --detailed-counts: do more detailed counts: number of loads, stores
39 // and ALU operations of different sizes.
40 // * --trace-mem=yes: trace all (data) memory accesses.
41 // * --trace-superblocks=yes:
42 // trace all superblock entries. Mostly of interest
43 // to the Valgrind developers.
44 //
45 // The code for each kind of instrumentation is guarded by a clo_* variable:
46 // clo_basic_counts, clo_detailed_counts, clo_trace_mem and clo_trace_sbs.
47 //
48 // If you want to modify any of the instrumentation code, look for the code
49 // that is guarded by the relevant clo_* variable (eg. clo_trace_mem)
50 // If you're not interested in the other kinds of instrumentation you can
51 // remove them. If you want to do more complex modifications, please read
52 // VEX/pub/libvex_ir.h to understand the intermediate representation.
53 //
54 //
55 // Specific Details about --trace-mem=yes
56 // --------------------------------------
57 // Lackey's --trace-mem code is a good starting point for building Valgrind
58 // tools that act on memory loads and stores. It also could be used as is,
59 // with its output used as input to a post-mortem processing step. However,
60 // because memory traces can be very large, online analysis is generally
61 // better.
62 //
63 // It prints memory data access traces that look like this:
64 //
65 // I 0023C790,2 # instruction read at 0x0023C790 of size 2
66 // I 0023C792,5
67 // S BE80199C,4 # data store at 0xBE80199C of size 4
68 // I 0025242B,3
69 // L BE801950,4 # data load at 0xBE801950 of size 4
70 // I 0023D476,7
71 // M 0025747C,1 # data modify at 0x0025747C of size 1
72 // I 0023DC20,2
73 // L 00254962,1
74 // L BE801FB3,1
75 // I 00252305,1
76 // L 00254AEB,1
77 // S 00257998,1
78 //
79 // Every instruction executed has an "instr" event representing it.
80 // Instructions that do memory accesses are followed by one or more "load",
81 // "store" or "modify" events. Some instructions do more than one load or
82 // store, as in the last two examples in the above trace.
83 //
84 // Here are some examples of x86 instructions that do different combinations
85 // of loads, stores, and modifies.
86 //
87 // Instruction Memory accesses Event sequence
88 // ----------- --------------- --------------
89 // add %eax, %ebx No loads or stores instr
90 //
91 // movl (%eax), %ebx loads (%eax) instr, load
92 //
93 // movl %eax, (%ebx) stores (%ebx) instr, store
94 //
95 // incl (%ecx) modifies (%ecx) instr, modify
96 //
97 // cmpsb loads (%esi), loads(%edi) instr, load, load
98 //
99 // call*l (%edx) loads (%edx), stores -4(%esp) instr, load, store
100 // pushl (%edx) loads (%edx), stores -4(%esp) instr, load, store
101 // movsw loads (%esi), stores (%edi) instr, load, store
102 //
103 // Instructions using x86 "rep" prefixes are traced as if they are repeated
104 // N times.
105 //
106 // Lackey with --trace-mem gives good traces, but they are not perfect, for
107 // the following reasons:
108 //
109 // - It does not trace into the OS kernel, so system calls and other kernel
110 // operations (eg. some scheduling and signal handling code) are ignored.
111 //
112 // - It could model loads and stores done at the system call boundary using
113 // the pre_mem_read/post_mem_write events. For example, if you call
114 // fstat() you know that the passed in buffer has been written. But it
115 // currently does not do this.
116 //
117 // - Valgrind replaces some code (not much) with its own, notably parts of
118 // code for scheduling operations and signal handling. This code is not
119 // traced.
120 //
121 // - There is no consideration of virtual-to-physical address mapping.
122 // This may not matter for many purposes.
123 //
124 // - Valgrind modifies the instruction stream in some very minor ways. For
125 // example, on x86 the bts, btc, btr instructions are incorrectly
126 // considered to always touch memory (this is a consequence of these
127 // instructions being very difficult to simulate).
128 //
129 // - Valgrind tools layout memory differently to normal programs, so the
130 // addresses you get will not be typical. Thus Lackey (and all Valgrind
131 // tools) is suitable for getting relative memory traces -- eg. if you
132 // want to analyse locality of memory accesses -- but is not good if
133 // absolute addresses are important.
134 //
135 // Despite all these warnings, Lackey's results should be good enough for a
136 // wide range of purposes. For example, Cachegrind shares all the above
137 // shortcomings and it is still useful.
138 //
139 // For further inspiration, you should look at cachegrind/cg_main.c which
140 // uses the same basic technique for tracing memory accesses, but also groups
141 // events together for processing into twos and threes so that fewer C calls
142 // are made and things run faster.
143 //
144 // Specific Details about --trace-superblocks=yes
145 // ----------------------------------------------
146 // Valgrind splits code up into single entry, multiple exit blocks
147 // known as superblocks. By itself, --trace-superblocks=yes just
148 // prints a message as each superblock is run:
149 //
150 // SB 04013170
151 // SB 04013177
152 // SB 04013173
153 // SB 04013177
154 //
155 // The hex number is the address of the first instruction in the
156 // superblock. You can see the relationship more obviously if you use
157 // --trace-superblocks=yes and --trace-mem=yes together. Then a "SB"
158 // message at address X is immediately followed by an "instr:" message
159 // for that address, as the first instruction in the block is
160 // executed, for example:
161 //
162 // SB 04014073
163 // I 04014073,3
164 // L 7FEFFF7F8,8
165 // I 04014076,4
166 // I 0401407A,3
167 // I 0401407D,3
168 // I 04014080,3
169 // I 04014083,6
170
171
172 #include "pub_tool_basics.h"
173 #include "pub_tool_tooliface.h"
174 #include "pub_tool_libcassert.h"
175 #include "pub_tool_libcprint.h"
176 #include "pub_tool_debuginfo.h"
177 #include "pub_tool_libcbase.h"
178 #include "pub_tool_options.h"
179 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
180
181 /*------------------------------------------------------------*/
182 /*--- Command line options ---*/
183 /*------------------------------------------------------------*/
184
185 /* Command line options controlling instrumentation kinds, as described at
186 * the top of this file. */
187 static Bool clo_basic_counts = True;
188 static Bool clo_detailed_counts = False;
189 static Bool clo_trace_mem = False;
190 static Bool clo_trace_sbs = False;
191
192 /* The name of the function of which the number of calls (under
193 * --basic-counts=yes) is to be counted, with default. Override with command
194 * line option --fnname. */
195 static Char* clo_fnname = "main";
196
lk_process_cmd_line_option(Char * arg)197 static Bool lk_process_cmd_line_option(Char* arg)
198 {
199 if VG_STR_CLO(arg, "--fnname", clo_fnname) {}
200 else if VG_BOOL_CLO(arg, "--basic-counts", clo_basic_counts) {}
201 else if VG_BOOL_CLO(arg, "--detailed-counts", clo_detailed_counts) {}
202 else if VG_BOOL_CLO(arg, "--trace-mem", clo_trace_mem) {}
203 else if VG_BOOL_CLO(arg, "--trace-superblocks", clo_trace_sbs) {}
204 else
205 return False;
206
207 tl_assert(clo_fnname);
208 tl_assert(clo_fnname[0]);
209 return True;
210 }
211
lk_print_usage(void)212 static void lk_print_usage(void)
213 {
214 VG_(printf)(
215 " --basic-counts=no|yes count instructions, jumps, etc. [yes]\n"
216 " --detailed-counts=no|yes count loads, stores and alu ops [no]\n"
217 " --trace-mem=no|yes trace all loads and stores [no]\n"
218 " --trace-superblocks=no|yes trace all superblock entries [no]\n"
219 " --fnname=<name> count calls to <name> (only used if\n"
220 " --basic-count=yes) [main]\n"
221 );
222 }
223
lk_print_debug_usage(void)224 static void lk_print_debug_usage(void)
225 {
226 VG_(printf)(
227 " (none)\n"
228 );
229 }
230
231 /*------------------------------------------------------------*/
232 /*--- Stuff for --basic-counts ---*/
233 /*------------------------------------------------------------*/
234
235 /* Nb: use ULongs because the numbers can get very big */
236 static ULong n_func_calls = 0;
237 static ULong n_SBs_entered = 0;
238 static ULong n_SBs_completed = 0;
239 static ULong n_IRStmts = 0;
240 static ULong n_guest_instrs = 0;
241 static ULong n_Jccs = 0;
242 static ULong n_Jccs_untaken = 0;
243 static ULong n_IJccs = 0;
244 static ULong n_IJccs_untaken = 0;
245
add_one_func_call(void)246 static void add_one_func_call(void)
247 {
248 n_func_calls++;
249 }
250
add_one_SB_entered(void)251 static void add_one_SB_entered(void)
252 {
253 n_SBs_entered++;
254 }
255
add_one_SB_completed(void)256 static void add_one_SB_completed(void)
257 {
258 n_SBs_completed++;
259 }
260
add_one_IRStmt(void)261 static void add_one_IRStmt(void)
262 {
263 n_IRStmts++;
264 }
265
add_one_guest_instr(void)266 static void add_one_guest_instr(void)
267 {
268 n_guest_instrs++;
269 }
270
add_one_Jcc(void)271 static void add_one_Jcc(void)
272 {
273 n_Jccs++;
274 }
275
add_one_Jcc_untaken(void)276 static void add_one_Jcc_untaken(void)
277 {
278 n_Jccs_untaken++;
279 }
280
add_one_inverted_Jcc(void)281 static void add_one_inverted_Jcc(void)
282 {
283 n_IJccs++;
284 }
285
add_one_inverted_Jcc_untaken(void)286 static void add_one_inverted_Jcc_untaken(void)
287 {
288 n_IJccs_untaken++;
289 }
290
291 /*------------------------------------------------------------*/
292 /*--- Stuff for --detailed-counts ---*/
293 /*------------------------------------------------------------*/
294
295 /* --- Operations --- */
296
297 typedef enum { OpLoad=0, OpStore=1, OpAlu=2 } Op;
298
299 #define N_OPS 3
300
301
302 /* --- Types --- */
303
304 #define N_TYPES 9
305
type2index(IRType ty)306 static Int type2index ( IRType ty )
307 {
308 switch (ty) {
309 case Ity_I1: return 0;
310 case Ity_I8: return 1;
311 case Ity_I16: return 2;
312 case Ity_I32: return 3;
313 case Ity_I64: return 4;
314 case Ity_I128: return 5;
315 case Ity_F32: return 6;
316 case Ity_F64: return 7;
317 case Ity_V128: return 8;
318 default: tl_assert(0);
319 }
320 }
321
nameOfTypeIndex(Int i)322 static HChar* nameOfTypeIndex ( Int i )
323 {
324 switch (i) {
325 case 0: return "I1"; break;
326 case 1: return "I8"; break;
327 case 2: return "I16"; break;
328 case 3: return "I32"; break;
329 case 4: return "I64"; break;
330 case 5: return "I128"; break;
331 case 6: return "F32"; break;
332 case 7: return "F64"; break;
333 case 8: return "V128"; break;
334 default: tl_assert(0);
335 }
336 }
337
338
339 /* --- Counts --- */
340
341 static ULong detailCounts[N_OPS][N_TYPES];
342
343 /* The helper that is called from the instrumented code. */
344 static VG_REGPARM(1)
increment_detail(ULong * detail)345 void increment_detail(ULong* detail)
346 {
347 (*detail)++;
348 }
349
350 /* A helper that adds the instrumentation for a detail. */
instrument_detail(IRSB * sb,Op op,IRType type)351 static void instrument_detail(IRSB* sb, Op op, IRType type)
352 {
353 IRDirty* di;
354 IRExpr** argv;
355 const UInt typeIx = type2index(type);
356
357 tl_assert(op < N_OPS);
358 tl_assert(typeIx < N_TYPES);
359
360 argv = mkIRExprVec_1( mkIRExpr_HWord( (HWord)&detailCounts[op][typeIx] ) );
361 di = unsafeIRDirty_0_N( 1, "increment_detail",
362 VG_(fnptr_to_fnentry)( &increment_detail ),
363 argv);
364 addStmtToIRSB( sb, IRStmt_Dirty(di) );
365 }
366
367 /* Summarize and print the details. */
print_details(void)368 static void print_details ( void )
369 {
370 Int typeIx;
371 VG_(umsg)(" Type Loads Stores AluOps\n");
372 VG_(umsg)(" -------------------------------------------\n");
373 for (typeIx = 0; typeIx < N_TYPES; typeIx++) {
374 VG_(umsg)(" %4s %'12llu %'12llu %'12llu\n",
375 nameOfTypeIndex( typeIx ),
376 detailCounts[OpLoad ][typeIx],
377 detailCounts[OpStore][typeIx],
378 detailCounts[OpAlu ][typeIx]
379 );
380 }
381 }
382
383
384 /*------------------------------------------------------------*/
385 /*--- Stuff for --trace-mem ---*/
386 /*------------------------------------------------------------*/
387
388 #define MAX_DSIZE 512
389
390 typedef
391 IRExpr
392 IRAtom;
393
394 typedef
395 enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm }
396 EventKind;
397
398 typedef
399 struct {
400 EventKind ekind;
401 IRAtom* addr;
402 Int size;
403 }
404 Event;
405
406 /* Up to this many unnotified events are allowed. Must be at least two,
407 so that reads and writes to the same address can be merged into a modify.
408 Beyond that, larger numbers just potentially induce more spilling due to
409 extending live ranges of address temporaries. */
410 #define N_EVENTS 4
411
412 /* Maintain an ordered list of memory events which are outstanding, in
413 the sense that no IR has yet been generated to do the relevant
414 helper calls. The SB is scanned top to bottom and memory events
415 are added to the end of the list, merging with the most recent
416 notified event where possible (Dw immediately following Dr and
417 having the same size and EA can be merged).
418
419 This merging is done so that for architectures which have
420 load-op-store instructions (x86, amd64), the instr is treated as if
421 it makes just one memory reference (a modify), rather than two (a
422 read followed by a write at the same address).
423
424 At various points the list will need to be flushed, that is, IR
425 generated from it. That must happen before any possible exit from
426 the block (the end, or an IRStmt_Exit). Flushing also takes place
427 when there is no space to add a new event.
428
429 If we require the simulation statistics to be up to date with
430 respect to possible memory exceptions, then the list would have to
431 be flushed before each memory reference. That's a pain so we don't
432 bother.
433
434 Flushing the list consists of walking it start to end and emitting
435 instrumentation IR for each event, in the order in which they
436 appear. */
437
438 static Event events[N_EVENTS];
439 static Int events_used = 0;
440
441
trace_instr(Addr addr,SizeT size)442 static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size)
443 {
444 VG_(printf)("I %08lx,%lu\n", addr, size);
445 }
446
trace_load(Addr addr,SizeT size)447 static VG_REGPARM(2) void trace_load(Addr addr, SizeT size)
448 {
449 VG_(printf)(" L %08lx,%lu\n", addr, size);
450 }
451
trace_store(Addr addr,SizeT size)452 static VG_REGPARM(2) void trace_store(Addr addr, SizeT size)
453 {
454 VG_(printf)(" S %08lx,%lu\n", addr, size);
455 }
456
trace_modify(Addr addr,SizeT size)457 static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size)
458 {
459 VG_(printf)(" M %08lx,%lu\n", addr, size);
460 }
461
462
flushEvents(IRSB * sb)463 static void flushEvents(IRSB* sb)
464 {
465 Int i;
466 Char* helperName;
467 void* helperAddr;
468 IRExpr** argv;
469 IRDirty* di;
470 Event* ev;
471
472 for (i = 0; i < events_used; i++) {
473
474 ev = &events[i];
475
476 // Decide on helper fn to call and args to pass it.
477 switch (ev->ekind) {
478 case Event_Ir: helperName = "trace_instr";
479 helperAddr = trace_instr; break;
480
481 case Event_Dr: helperName = "trace_load";
482 helperAddr = trace_load; break;
483
484 case Event_Dw: helperName = "trace_store";
485 helperAddr = trace_store; break;
486
487 case Event_Dm: helperName = "trace_modify";
488 helperAddr = trace_modify; break;
489 default:
490 tl_assert(0);
491 }
492
493 // Add the helper.
494 argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) );
495 di = unsafeIRDirty_0_N( /*regparms*/2,
496 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
497 argv );
498 addStmtToIRSB( sb, IRStmt_Dirty(di) );
499 }
500
501 events_used = 0;
502 }
503
504 // WARNING: If you aren't interested in instruction reads, you can omit the
505 // code that adds calls to trace_instr() in flushEvents(). However, you
506 // must still call this function, addEvent_Ir() -- it is necessary to add
507 // the Ir events to the events list so that merging of paired load/store
508 // events into modify events works correctly.
addEvent_Ir(IRSB * sb,IRAtom * iaddr,UInt isize)509 static void addEvent_Ir ( IRSB* sb, IRAtom* iaddr, UInt isize )
510 {
511 Event* evt;
512 tl_assert(clo_trace_mem);
513 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
514 || VG_CLREQ_SZB == isize );
515 if (events_used == N_EVENTS)
516 flushEvents(sb);
517 tl_assert(events_used >= 0 && events_used < N_EVENTS);
518 evt = &events[events_used];
519 evt->ekind = Event_Ir;
520 evt->addr = iaddr;
521 evt->size = isize;
522 events_used++;
523 }
524
525 static
addEvent_Dr(IRSB * sb,IRAtom * daddr,Int dsize)526 void addEvent_Dr ( IRSB* sb, IRAtom* daddr, Int dsize )
527 {
528 Event* evt;
529 tl_assert(clo_trace_mem);
530 tl_assert(isIRAtom(daddr));
531 tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
532 if (events_used == N_EVENTS)
533 flushEvents(sb);
534 tl_assert(events_used >= 0 && events_used < N_EVENTS);
535 evt = &events[events_used];
536 evt->ekind = Event_Dr;
537 evt->addr = daddr;
538 evt->size = dsize;
539 events_used++;
540 }
541
542 static
addEvent_Dw(IRSB * sb,IRAtom * daddr,Int dsize)543 void addEvent_Dw ( IRSB* sb, IRAtom* daddr, Int dsize )
544 {
545 Event* lastEvt;
546 Event* evt;
547 tl_assert(clo_trace_mem);
548 tl_assert(isIRAtom(daddr));
549 tl_assert(dsize >= 1 && dsize <= MAX_DSIZE);
550
551 // Is it possible to merge this write with the preceding read?
552 lastEvt = &events[events_used-1];
553 if (events_used > 0
554 && lastEvt->ekind == Event_Dr
555 && lastEvt->size == dsize
556 && eqIRAtom(lastEvt->addr, daddr))
557 {
558 lastEvt->ekind = Event_Dm;
559 return;
560 }
561
562 // No. Add as normal.
563 if (events_used == N_EVENTS)
564 flushEvents(sb);
565 tl_assert(events_used >= 0 && events_used < N_EVENTS);
566 evt = &events[events_used];
567 evt->ekind = Event_Dw;
568 evt->size = dsize;
569 evt->addr = daddr;
570 events_used++;
571 }
572
573
574 /*------------------------------------------------------------*/
575 /*--- Stuff for --trace-superblocks ---*/
576 /*------------------------------------------------------------*/
577
trace_superblock(Addr addr)578 static void trace_superblock(Addr addr)
579 {
580 VG_(printf)("SB %08lx\n", addr);
581 }
582
583
584 /*------------------------------------------------------------*/
585 /*--- Basic tool functions ---*/
586 /*------------------------------------------------------------*/
587
lk_post_clo_init(void)588 static void lk_post_clo_init(void)
589 {
590 Int op, tyIx;
591
592 if (clo_detailed_counts) {
593 for (op = 0; op < N_OPS; op++)
594 for (tyIx = 0; tyIx < N_TYPES; tyIx++)
595 detailCounts[op][tyIx] = 0;
596 }
597 }
598
599 static
lk_instrument(VgCallbackClosure * closure,IRSB * sbIn,VexGuestLayout * layout,VexGuestExtents * vge,IRType gWordTy,IRType hWordTy)600 IRSB* lk_instrument ( VgCallbackClosure* closure,
601 IRSB* sbIn,
602 VexGuestLayout* layout,
603 VexGuestExtents* vge,
604 IRType gWordTy, IRType hWordTy )
605 {
606 IRDirty* di;
607 Int i;
608 IRSB* sbOut;
609 Char fnname[100];
610 IRType type;
611 IRTypeEnv* tyenv = sbIn->tyenv;
612 Addr iaddr = 0, dst;
613 UInt ilen = 0;
614 Bool condition_inverted = False;
615
616 if (gWordTy != hWordTy) {
617 /* We don't currently support this case. */
618 VG_(tool_panic)("host/guest word size mismatch");
619 }
620
621 /* Set up SB */
622 sbOut = deepCopyIRSBExceptStmts(sbIn);
623
624 // Copy verbatim any IR preamble preceding the first IMark
625 i = 0;
626 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
627 addStmtToIRSB( sbOut, sbIn->stmts[i] );
628 i++;
629 }
630
631 if (clo_basic_counts) {
632 /* Count this superblock. */
633 di = unsafeIRDirty_0_N( 0, "add_one_SB_entered",
634 VG_(fnptr_to_fnentry)( &add_one_SB_entered ),
635 mkIRExprVec_0() );
636 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
637 }
638
639 if (clo_trace_sbs) {
640 /* Print this superblock's address. */
641 di = unsafeIRDirty_0_N(
642 0, "trace_superblock",
643 VG_(fnptr_to_fnentry)( &trace_superblock ),
644 mkIRExprVec_1( mkIRExpr_HWord( vge->base[0] ) )
645 );
646 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
647 }
648
649 if (clo_trace_mem) {
650 events_used = 0;
651 }
652
653 for (/*use current i*/; i < sbIn->stmts_used; i++) {
654 IRStmt* st = sbIn->stmts[i];
655 if (!st || st->tag == Ist_NoOp) continue;
656
657 if (clo_basic_counts) {
658 /* Count one VEX statement. */
659 di = unsafeIRDirty_0_N( 0, "add_one_IRStmt",
660 VG_(fnptr_to_fnentry)( &add_one_IRStmt ),
661 mkIRExprVec_0() );
662 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
663 }
664
665 switch (st->tag) {
666 case Ist_NoOp:
667 case Ist_AbiHint:
668 case Ist_Put:
669 case Ist_PutI:
670 case Ist_MBE:
671 addStmtToIRSB( sbOut, st );
672 break;
673
674 case Ist_IMark:
675 if (clo_basic_counts) {
676 /* Needed to be able to check for inverted condition in Ist_Exit */
677 iaddr = st->Ist.IMark.addr;
678 ilen = st->Ist.IMark.len;
679
680 /* Count guest instruction. */
681 di = unsafeIRDirty_0_N( 0, "add_one_guest_instr",
682 VG_(fnptr_to_fnentry)( &add_one_guest_instr ),
683 mkIRExprVec_0() );
684 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
685
686 /* An unconditional branch to a known destination in the
687 * guest's instructions can be represented, in the IRSB to
688 * instrument, by the VEX statements that are the
689 * translation of that known destination. This feature is
690 * called 'SB chasing' and can be influenced by command
691 * line option --vex-guest-chase-thresh.
692 *
693 * To get an accurate count of the calls to a specific
694 * function, taking SB chasing into account, we need to
695 * check for each guest instruction (Ist_IMark) if it is
696 * the entry point of a function.
697 */
698 tl_assert(clo_fnname);
699 tl_assert(clo_fnname[0]);
700 if (VG_(get_fnname_if_entry)(st->Ist.IMark.addr,
701 fnname, sizeof(fnname))
702 && 0 == VG_(strcmp)(fnname, clo_fnname)) {
703 di = unsafeIRDirty_0_N(
704 0, "add_one_func_call",
705 VG_(fnptr_to_fnentry)( &add_one_func_call ),
706 mkIRExprVec_0() );
707 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
708 }
709 }
710 if (clo_trace_mem) {
711 // WARNING: do not remove this function call, even if you
712 // aren't interested in instruction reads. See the comment
713 // above the function itself for more detail.
714 addEvent_Ir( sbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ),
715 st->Ist.IMark.len );
716 }
717 addStmtToIRSB( sbOut, st );
718 break;
719
720 case Ist_WrTmp:
721 // Add a call to trace_load() if --trace-mem=yes.
722 if (clo_trace_mem) {
723 IRExpr* data = st->Ist.WrTmp.data;
724 if (data->tag == Iex_Load) {
725 addEvent_Dr( sbOut, data->Iex.Load.addr,
726 sizeofIRType(data->Iex.Load.ty) );
727 }
728 }
729 if (clo_detailed_counts) {
730 IRExpr* expr = st->Ist.WrTmp.data;
731 type = typeOfIRExpr(sbOut->tyenv, expr);
732 tl_assert(type != Ity_INVALID);
733 switch (expr->tag) {
734 case Iex_Load:
735 instrument_detail( sbOut, OpLoad, type );
736 break;
737 case Iex_Unop:
738 case Iex_Binop:
739 case Iex_Triop:
740 case Iex_Qop:
741 case Iex_Mux0X:
742 instrument_detail( sbOut, OpAlu, type );
743 break;
744 default:
745 break;
746 }
747 }
748 addStmtToIRSB( sbOut, st );
749 break;
750
751 case Ist_Store:
752 if (clo_trace_mem) {
753 IRExpr* data = st->Ist.Store.data;
754 addEvent_Dw( sbOut, st->Ist.Store.addr,
755 sizeofIRType(typeOfIRExpr(tyenv, data)) );
756 }
757 if (clo_detailed_counts) {
758 type = typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data);
759 tl_assert(type != Ity_INVALID);
760 instrument_detail( sbOut, OpStore, type );
761 }
762 addStmtToIRSB( sbOut, st );
763 break;
764
765 case Ist_Dirty: {
766 if (clo_trace_mem) {
767 Int dsize;
768 IRDirty* d = st->Ist.Dirty.details;
769 if (d->mFx != Ifx_None) {
770 // This dirty helper accesses memory. Collect the details.
771 tl_assert(d->mAddr != NULL);
772 tl_assert(d->mSize != 0);
773 dsize = d->mSize;
774 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
775 addEvent_Dr( sbOut, d->mAddr, dsize );
776 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
777 addEvent_Dw( sbOut, d->mAddr, dsize );
778 } else {
779 tl_assert(d->mAddr == NULL);
780 tl_assert(d->mSize == 0);
781 }
782 }
783 addStmtToIRSB( sbOut, st );
784 break;
785 }
786
787 case Ist_CAS: {
788 /* We treat it as a read and a write of the location. I
789 think that is the same behaviour as it was before IRCAS
790 was introduced, since prior to that point, the Vex
791 front ends would translate a lock-prefixed instruction
792 into a (normal) read followed by a (normal) write. */
793 Int dataSize;
794 IRType dataTy;
795 IRCAS* cas = st->Ist.CAS.details;
796 tl_assert(cas->addr != NULL);
797 tl_assert(cas->dataLo != NULL);
798 dataTy = typeOfIRExpr(tyenv, cas->dataLo);
799 dataSize = sizeofIRType(dataTy);
800 if (cas->dataHi != NULL)
801 dataSize *= 2; /* since it's a doubleword-CAS */
802 if (clo_trace_mem) {
803 addEvent_Dr( sbOut, cas->addr, dataSize );
804 addEvent_Dw( sbOut, cas->addr, dataSize );
805 }
806 if (clo_detailed_counts) {
807 instrument_detail( sbOut, OpLoad, dataTy );
808 if (cas->dataHi != NULL) /* dcas */
809 instrument_detail( sbOut, OpLoad, dataTy );
810 instrument_detail( sbOut, OpStore, dataTy );
811 if (cas->dataHi != NULL) /* dcas */
812 instrument_detail( sbOut, OpStore, dataTy );
813 }
814 addStmtToIRSB( sbOut, st );
815 break;
816 }
817
818 case Ist_LLSC: {
819 IRType dataTy;
820 if (st->Ist.LLSC.storedata == NULL) {
821 /* LL */
822 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
823 if (clo_trace_mem)
824 addEvent_Dr( sbOut, st->Ist.LLSC.addr,
825 sizeofIRType(dataTy) );
826 if (clo_detailed_counts)
827 instrument_detail( sbOut, OpLoad, dataTy );
828 } else {
829 /* SC */
830 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
831 if (clo_trace_mem)
832 addEvent_Dw( sbOut, st->Ist.LLSC.addr,
833 sizeofIRType(dataTy) );
834 if (clo_detailed_counts)
835 instrument_detail( sbOut, OpStore, dataTy );
836 }
837 addStmtToIRSB( sbOut, st );
838 break;
839 }
840
841 case Ist_Exit:
842 if (clo_basic_counts) {
843 // The condition of a branch was inverted by VEX if a taken
844 // branch is in fact a fall trough according to client address
845 tl_assert(iaddr != 0);
846 dst = (sizeof(Addr) == 4) ? st->Ist.Exit.dst->Ico.U32 :
847 st->Ist.Exit.dst->Ico.U64;
848 condition_inverted = (dst == iaddr + ilen);
849
850 /* Count Jcc */
851 if (!condition_inverted)
852 di = unsafeIRDirty_0_N( 0, "add_one_Jcc",
853 VG_(fnptr_to_fnentry)( &add_one_Jcc ),
854 mkIRExprVec_0() );
855 else
856 di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc",
857 VG_(fnptr_to_fnentry)(
858 &add_one_inverted_Jcc ),
859 mkIRExprVec_0() );
860
861 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
862 }
863 if (clo_trace_mem) {
864 flushEvents(sbOut);
865 }
866
867 addStmtToIRSB( sbOut, st ); // Original statement
868
869 if (clo_basic_counts) {
870 /* Count non-taken Jcc */
871 if (!condition_inverted)
872 di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken",
873 VG_(fnptr_to_fnentry)(
874 &add_one_Jcc_untaken ),
875 mkIRExprVec_0() );
876 else
877 di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc_untaken",
878 VG_(fnptr_to_fnentry)(
879 &add_one_inverted_Jcc_untaken ),
880 mkIRExprVec_0() );
881
882 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
883 }
884 break;
885
886 default:
887 tl_assert(0);
888 }
889 }
890
891 if (clo_basic_counts) {
892 /* Count this basic block. */
893 di = unsafeIRDirty_0_N( 0, "add_one_SB_completed",
894 VG_(fnptr_to_fnentry)( &add_one_SB_completed ),
895 mkIRExprVec_0() );
896 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
897 }
898
899 if (clo_trace_mem) {
900 /* At the end of the sbIn. Flush outstandings. */
901 flushEvents(sbOut);
902 }
903
904 return sbOut;
905 }
906
lk_fini(Int exitcode)907 static void lk_fini(Int exitcode)
908 {
909 char percentify_buf[4]; /* Two digits, '%' and 0. */
910 const int percentify_size = sizeof(percentify_buf);
911 const int percentify_decs = 0;
912
913 tl_assert(clo_fnname);
914 tl_assert(clo_fnname[0]);
915
916 if (clo_basic_counts) {
917 ULong total_Jccs = n_Jccs + n_IJccs;
918 ULong taken_Jccs = (n_Jccs - n_Jccs_untaken) + n_IJccs_untaken;
919
920 VG_(umsg)("Counted %'llu call%s to %s()\n",
921 n_func_calls, ( n_func_calls==1 ? "" : "s" ), clo_fnname);
922
923 VG_(umsg)("\n");
924 VG_(umsg)("Jccs:\n");
925 VG_(umsg)(" total: %'llu\n", total_Jccs);
926 VG_(percentify)(taken_Jccs, (total_Jccs ? total_Jccs : 1),
927 percentify_decs, percentify_size, percentify_buf);
928 VG_(umsg)(" taken: %'llu (%s)\n",
929 taken_Jccs, percentify_buf);
930
931 VG_(umsg)("\n");
932 VG_(umsg)("Executed:\n");
933 VG_(umsg)(" SBs entered: %'llu\n", n_SBs_entered);
934 VG_(umsg)(" SBs completed: %'llu\n", n_SBs_completed);
935 VG_(umsg)(" guest instrs: %'llu\n", n_guest_instrs);
936 VG_(umsg)(" IRStmts: %'llu\n", n_IRStmts);
937
938 VG_(umsg)("\n");
939 VG_(umsg)("Ratios:\n");
940 tl_assert(n_SBs_entered); // Paranoia time.
941 VG_(umsg)(" guest instrs : SB entered = %'llu : 10\n",
942 10 * n_guest_instrs / n_SBs_entered);
943 VG_(umsg)(" IRStmts : SB entered = %'llu : 10\n",
944 10 * n_IRStmts / n_SBs_entered);
945 tl_assert(n_guest_instrs); // Paranoia time.
946 VG_(umsg)(" IRStmts : guest instr = %'llu : 10\n",
947 10 * n_IRStmts / n_guest_instrs);
948 }
949
950 if (clo_detailed_counts) {
951 VG_(umsg)("\n");
952 VG_(umsg)("IR-level counts by type:\n");
953 print_details();
954 }
955
956 if (clo_basic_counts) {
957 VG_(umsg)("\n");
958 VG_(umsg)("Exit code: %d\n", exitcode);
959 }
960 }
961
lk_pre_clo_init(void)962 static void lk_pre_clo_init(void)
963 {
964 VG_(details_name) ("Lackey");
965 VG_(details_version) (NULL);
966 VG_(details_description) ("an example Valgrind tool");
967 VG_(details_copyright_author)(
968 "Copyright (C) 2002-2010, and GNU GPL'd, by Nicholas Nethercote.");
969 VG_(details_bug_reports_to) (VG_BUGS_TO);
970 VG_(details_avg_translation_sizeB) ( 200 );
971
972 VG_(basic_tool_funcs) (lk_post_clo_init,
973 lk_instrument,
974 lk_fini);
975 VG_(needs_command_line_options)(lk_process_cmd_line_option,
976 lk_print_usage,
977 lk_print_debug_usage);
978 }
979
980 VG_DETERMINE_INTERFACE_VERSION(lk_pre_clo_init)
981
982 /*--------------------------------------------------------------------*/
983 /*--- end lk_main.c ---*/
984 /*--------------------------------------------------------------------*/
985