1 //--------------------------------------------------------------------*/
2 //--- BBV: a SimPoint basic block vector generator bbv_main.c ---*/
3 //--------------------------------------------------------------------*/
4
5 /*
6 This file is part of BBV, a Valgrind tool for generating SimPoint
7 basic block vectors.
8
9 Copyright (C) 2006-2013 Vince Weaver
10 vince _at_ csl.cornell.edu
11
12 pcfile code is Copyright (C) 2006-2013 Oriol Prat
13 oriol.prat _at _ bsc.es
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31 */
32
33
34 #include "pub_tool_basics.h"
35 #include "pub_tool_tooliface.h"
36 #include "pub_tool_options.h" /* command line options */
37
38 #include "pub_tool_vki.h" /* vki_stat */
39 #include "pub_tool_libcbase.h" /* VG_(strlen) */
40 #include "pub_tool_libcfile.h" /* VG_(write) */
41 #include "pub_tool_libcprint.h" /* VG_(printf) */
42 #include "pub_tool_libcassert.h" /* VG_(exit) */
43 #include "pub_tool_mallocfree.h" /* plain_free */
44 #include "pub_tool_machine.h" /* VG_(fnptr_to_fnentry) */
45 #include "pub_tool_debuginfo.h" /* VG_(get_fnname) */
46
47 #include "pub_tool_oset.h" /* ordered set stuff */
48
49 /* instruction special cases */
50 #define REP_INSTRUCTION 0x1
51 #define FLDCW_INSTRUCTION 0x2
52
53 /* interval variables */
54 #define DEFAULT_GRAIN_SIZE 100000000 /* 100 million by default */
55 static Int interval_size=DEFAULT_GRAIN_SIZE;
56
57 /* filenames */
58 static const HChar *clo_bb_out_file="bb.out.%p";
59 static const HChar *clo_pc_out_file="pc.out.%p";
60 static HChar *pc_out_file=NULL;
61 static HChar *bb_out_file=NULL;
62
63
64 /* output parameters */
65 static Bool instr_count_only=False;
66 static Bool generate_pc_file=False;
67
68 /* write buffer */
69 static HChar buf[1024];
70
71 /* Global values */
72 static OSet* instr_info_table; /* table that holds the basic block info */
73 static Int block_num=1; /* global next block number */
74 static Int current_thread=0;
75 static Int allocated_threads=1;
76 struct thread_info *bbv_thread=NULL;
77
78 /* Per-thread variables */
79 struct thread_info {
80 ULong dyn_instr; /* Current retired instruction count */
81 ULong total_instr; /* Total retired instruction count */
82 Addr last_rep_addr; /* rep counting values */
83 ULong rep_count;
84 ULong global_rep_count;
85 ULong unique_rep_count;
86 ULong fldcw_count; /* fldcw count */
87 Int bbtrace_fd; /* file descriptor */
88 };
89
90 #define FUNCTION_NAME_LENGTH 20
91
92 struct BB_info {
93 Addr BB_addr; /* used as key, must be first */
94 Int n_instrs; /* instructions in the basic block */
95 Int block_num; /* unique block identifier */
96 Int *inst_counter; /* times entered * num_instructions */
97 Bool is_entry; /* is this block a function entry point */
98 HChar fn_name[FUNCTION_NAME_LENGTH]; /* Function block is in */
99 };
100
101
102 /* dump the optional PC file, which contains basic block number to */
103 /* instruction address and function name mappings */
dumpPcFile(void)104 static void dumpPcFile(void)
105 {
106 struct BB_info *bb_elem;
107 Int pctrace_fd;
108 SysRes sres;
109
110 pc_out_file =
111 VG_(expand_file_name)("--pc-out-file", clo_pc_out_file);
112
113 sres = VG_(open)(pc_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
114 VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IWGRP);
115 if (sr_isError(sres)) {
116 VG_(umsg)("Error: cannot create pc file %s\n", pc_out_file);
117 VG_(exit)(1);
118 } else {
119 pctrace_fd = sr_Res(sres);
120 }
121
122 /* Loop through the table, printing the number, address, */
123 /* and function name for each basic block */
124 VG_(OSetGen_ResetIter)(instr_info_table);
125 while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) {
126 VG_(write)(pctrace_fd,"F",1);
127 VG_(sprintf)( buf,":%d:%x:%s\n",
128 bb_elem->block_num,
129 (Int)bb_elem->BB_addr,
130 bb_elem->fn_name);
131 VG_(write)(pctrace_fd, (void*)buf, VG_(strlen)(buf));
132 }
133
134 VG_(close)(pctrace_fd);
135 }
136
open_tracefile(Int thread_num)137 static Int open_tracefile(Int thread_num)
138 {
139 SysRes sres;
140 HChar temp_string[2048];
141
142 /* For thread 1, don't append any thread number */
143 /* This lets the single-thread case not have any */
144 /* extra values appended to the file name. */
145 if (thread_num==1) {
146 VG_(strncpy)(temp_string,bb_out_file,2047);
147 }
148 else {
149 VG_(sprintf)(temp_string,"%s.%d",bb_out_file,thread_num);
150 }
151
152 sres = VG_(open)(temp_string, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
153 VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IWGRP);
154
155 if (sr_isError(sres)) {
156 VG_(umsg)("Error: cannot create bb file %s\n",temp_string);
157 VG_(exit)(1);
158 }
159
160 return sr_Res(sres);
161 }
162
handle_overflow(void)163 static void handle_overflow(void)
164 {
165 struct BB_info *bb_elem;
166
167 if (bbv_thread[current_thread].dyn_instr > interval_size) {
168
169 if (!instr_count_only) {
170
171 /* If our output fd hasn't been opened, open it */
172 if (bbv_thread[current_thread].bbtrace_fd < 0) {
173 bbv_thread[current_thread].bbtrace_fd=open_tracefile(current_thread);
174 }
175
176 /* put an entry to the bb.out file */
177
178 VG_(write)(bbv_thread[current_thread].bbtrace_fd,"T",1);
179
180 VG_(OSetGen_ResetIter)(instr_info_table);
181 while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) {
182 if ( bb_elem->inst_counter[current_thread] != 0 ) {
183 VG_(sprintf)( buf,":%d:%d ",
184 bb_elem->block_num,
185 bb_elem->inst_counter[current_thread]);
186 VG_(write)(bbv_thread[current_thread].bbtrace_fd,
187 (void*)buf, VG_(strlen)(buf));
188 bb_elem->inst_counter[current_thread] = 0;
189 }
190 }
191
192 VG_(write)(bbv_thread[current_thread].bbtrace_fd,"\n",1);
193 }
194
195 bbv_thread[current_thread].dyn_instr -= interval_size;
196 }
197 }
198
199
close_out_reps(void)200 static void close_out_reps(void)
201 {
202 bbv_thread[current_thread].global_rep_count+=bbv_thread[current_thread].rep_count;
203 bbv_thread[current_thread].unique_rep_count++;
204 bbv_thread[current_thread].rep_count=0;
205 }
206
207 /* Generic function to get called each instruction */
per_instruction_BBV(struct BB_info * bbInfo)208 static VG_REGPARM(1) void per_instruction_BBV(struct BB_info *bbInfo)
209 {
210 Int n_instrs=1;
211
212 tl_assert(bbInfo);
213
214 /* we finished rep but didn't clear out count */
215 if (bbv_thread[current_thread].rep_count) {
216 n_instrs++;
217 close_out_reps();
218 }
219
220 bbInfo->inst_counter[current_thread]+=n_instrs;
221
222 bbv_thread[current_thread].total_instr+=n_instrs;
223 bbv_thread[current_thread].dyn_instr +=n_instrs;
224
225 handle_overflow();
226 }
227
228 /* Function to get called if instruction has a rep prefix */
per_instruction_BBV_rep(Addr addr)229 static VG_REGPARM(1) void per_instruction_BBV_rep(Addr addr)
230 {
231 /* handle back-to-back rep instructions */
232 if (bbv_thread[current_thread].last_rep_addr!=addr) {
233 if (bbv_thread[current_thread].rep_count) {
234 close_out_reps();
235 bbv_thread[current_thread].total_instr++;
236 bbv_thread[current_thread].dyn_instr++;
237 }
238 bbv_thread[current_thread].last_rep_addr=addr;
239 }
240
241 bbv_thread[current_thread].rep_count++;
242
243 }
244
245 /* Function to call if our instruction has a fldcw instruction */
per_instruction_BBV_fldcw(struct BB_info * bbInfo)246 static VG_REGPARM(1) void per_instruction_BBV_fldcw(struct BB_info *bbInfo)
247 {
248 Int n_instrs=1;
249
250 tl_assert(bbInfo);
251
252 /* we finished rep but didn't clear out count */
253 if (bbv_thread[current_thread].rep_count) {
254 n_instrs++;
255 close_out_reps();
256 }
257
258 /* count fldcw instructions */
259 bbv_thread[current_thread].fldcw_count++;
260
261 bbInfo->inst_counter[current_thread]+=n_instrs;
262
263 bbv_thread[current_thread].total_instr+=n_instrs;
264 bbv_thread[current_thread].dyn_instr +=n_instrs;
265
266 handle_overflow();
267 }
268
269 /* Check if the instruction pointed to is one that needs */
270 /* special handling. If so, set a bit in the return */
271 /* value indicating what type. */
get_inst_type(Int len,Addr addr)272 static Int get_inst_type(Int len, Addr addr)
273 {
274 int result=0;
275
276 #if defined(VGA_x86) || defined(VGA_amd64)
277
278 UChar *inst_pointer;
279 UChar inst_byte;
280 int i,possible_rep;
281
282 /* rep prefixed instructions are counted as one instruction on */
283 /* x86 processors and must be handled as a special case */
284
285 /* Also, the rep prefix is re-used as part of the opcode for */
286 /* SSE instructions. So we need to specifically check for */
287 /* the following: movs, cmps, scas, lods, stos, ins, outs */
288
289 inst_pointer=(UChar *)addr;
290 i=0;
291 inst_byte=0;
292 possible_rep=0;
293
294 while (i<len) {
295
296 inst_byte=*inst_pointer;
297
298 if ( (inst_byte == 0x67) || /* size override prefix */
299 (inst_byte == 0x66) || /* size override prefix */
300 (inst_byte == 0x48) ) { /* 64-bit prefix */
301 } else if ( (inst_byte == 0xf2) || /* rep prefix */
302 (inst_byte == 0xf3) ) { /* repne prefix */
303 possible_rep=1;
304 } else {
305 break; /* other byte, exit */
306 }
307
308 i++;
309 inst_pointer++;
310 }
311
312 if ( possible_rep &&
313 ( ( (inst_byte >= 0xa4) && /* movs,cmps,scas */
314 (inst_byte <= 0xaf) ) || /* lods,stos */
315 ( (inst_byte >= 0x6c) &&
316 (inst_byte <= 0x6f) ) ) ) { /* ins,outs */
317
318 result|=REP_INSTRUCTION;
319 }
320
321 /* fldcw instructions are double-counted by the hardware */
322 /* performance counters on pentium 4 processors so it is */
323 /* useful to have that count when doing validation work. */
324
325 inst_pointer=(UChar *)addr;
326 if (len>1) {
327 /* FLDCW detection */
328 /* opcode is 0xd9/5, ie 1101 1001 oo10 1mmm */
329 if ((*inst_pointer==0xd9) &&
330 (*(inst_pointer+1)<0xb0) && /* need this case of fldz, etc, count */
331 ( (*(inst_pointer+1) & 0x38) == 0x28)) {
332 result|=FLDCW_INSTRUCTION;
333 }
334 }
335
336 #endif
337 return result;
338 }
339
340
341
342 /* Our instrumentation function */
343 /* sbIn = super block to translate */
344 /* layout = guest layout */
345 /* gWordTy = size of guest word */
346 /* hWordTy = size of host word */
bbv_instrument(VgCallbackClosure * closure,IRSB * sbIn,VexGuestLayout * layout,VexGuestExtents * vge,VexArchInfo * archinfo_host,IRType gWordTy,IRType hWordTy)347 static IRSB* bbv_instrument ( VgCallbackClosure* closure,
348 IRSB* sbIn, VexGuestLayout* layout,
349 VexGuestExtents* vge,
350 VexArchInfo* archinfo_host,
351 IRType gWordTy, IRType hWordTy )
352 {
353 Int i,n_instrs=1;
354 IRSB *sbOut;
355 IRStmt *st;
356 struct BB_info *bbInfo;
357 Addr64 origAddr,ourAddr;
358 IRDirty *di;
359 IRExpr **argv, *arg1;
360 Int regparms,opcode_type;
361
362 /* We don't handle a host/guest word size mismatch */
363 if (gWordTy != hWordTy) {
364 VG_(tool_panic)("host/guest word size mismatch");
365 }
366
367 /* Set up SB */
368 sbOut = deepCopyIRSBExceptStmts(sbIn);
369
370 /* Copy verbatim any IR preamble preceding the first IMark */
371 i = 0;
372 while ( (i < sbIn->stmts_used) && (sbIn->stmts[i]->tag!=Ist_IMark)) {
373 addStmtToIRSB( sbOut, sbIn->stmts[i] );
374 i++;
375 }
376
377 /* Get the first statement */
378 tl_assert(sbIn->stmts_used > 0);
379 st = sbIn->stmts[i];
380
381 /* double check we are at a Mark statement */
382 tl_assert(Ist_IMark == st->tag);
383
384 origAddr=st->Ist.IMark.addr;
385
386 /* Get the BB_info */
387 bbInfo = VG_(OSetGen_Lookup)(instr_info_table, &origAddr);
388
389 if (bbInfo==NULL) {
390
391 /* BB never translated before (at this address, at least; */
392 /* could have been unloaded and then reloaded elsewhere in memory) */
393
394 /* allocate and initialize a new basic block structure */
395 bbInfo=VG_(OSetGen_AllocNode)(instr_info_table, sizeof(struct BB_info));
396 bbInfo->BB_addr = origAddr;
397 bbInfo->n_instrs = n_instrs;
398 bbInfo->inst_counter=VG_(calloc)("bbv_instrument",
399 allocated_threads,
400 sizeof(Int));
401
402 /* assign a unique block number */
403 bbInfo->block_num=block_num;
404 block_num++;
405 /* get function name and entry point information */
406 VG_(get_fnname)(origAddr,bbInfo->fn_name,FUNCTION_NAME_LENGTH);
407 bbInfo->is_entry=VG_(get_fnname_if_entry)(origAddr, bbInfo->fn_name,
408 FUNCTION_NAME_LENGTH);
409 /* insert structure into table */
410 VG_(OSetGen_Insert)( instr_info_table, bbInfo );
411 }
412
413 /* Iterate through the basic block, putting the original */
414 /* instructions in place, plus putting a call to updateBBV */
415 /* for each original instruction */
416
417 /* This is less efficient than only instrumenting the BB */
418 /* But it gives proper results given the fact that */
419 /* valgrind uses superblocks (not basic blocks) by default */
420
421
422 while(i < sbIn->stmts_used) {
423 st=sbIn->stmts[i];
424
425 if (st->tag == Ist_IMark) {
426
427 ourAddr = st->Ist.IMark.addr;
428
429 opcode_type=get_inst_type(st->Ist.IMark.len,ourAddr);
430
431 regparms=1;
432 arg1= mkIRExpr_HWord( (HWord)bbInfo);
433 argv= mkIRExprVec_1(arg1);
434
435
436 if (opcode_type&REP_INSTRUCTION) {
437 arg1= mkIRExpr_HWord(ourAddr);
438 argv= mkIRExprVec_1(arg1);
439 di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV_rep",
440 VG_(fnptr_to_fnentry)( &per_instruction_BBV_rep ),
441 argv);
442 }
443 else if (opcode_type&FLDCW_INSTRUCTION) {
444 di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV_fldcw",
445 VG_(fnptr_to_fnentry)( &per_instruction_BBV_fldcw ),
446 argv);
447 }
448 else {
449 di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV",
450 VG_(fnptr_to_fnentry)( &per_instruction_BBV ),
451 argv);
452 }
453
454
455 /* Insert our call */
456 addStmtToIRSB( sbOut, IRStmt_Dirty(di));
457 }
458
459 /* Insert the original instruction */
460 addStmtToIRSB( sbOut, st );
461
462 i++;
463 }
464
465 return sbOut;
466 }
467
allocate_new_thread(struct thread_info * old,Int old_number,Int new_number)468 static struct thread_info *allocate_new_thread(struct thread_info *old,
469 Int old_number, Int new_number)
470 {
471 struct thread_info *temp;
472 struct BB_info *bb_elem;
473 Int i;
474
475 temp=VG_(realloc)("bbv_main.c allocate_threads",
476 old,
477 new_number*sizeof(struct thread_info));
478
479 /* init the new thread */
480 /* We loop in case the new thread is not contiguous */
481 for(i=old_number;i<new_number;i++) {
482 temp[i].last_rep_addr=0;
483 temp[i].dyn_instr=0;
484 temp[i].total_instr=0;
485 temp[i].global_rep_count=0;
486 temp[i].unique_rep_count=0;
487 temp[i].rep_count=0;
488 temp[i].fldcw_count=0;
489 temp[i].bbtrace_fd=-1;
490 }
491 /* expand the inst_counter on all allocated basic blocks */
492 VG_(OSetGen_ResetIter)(instr_info_table);
493 while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) {
494 bb_elem->inst_counter =
495 VG_(realloc)("bbv_main.c inst_counter",
496 bb_elem->inst_counter,
497 new_number*sizeof(Int));
498 for(i=old_number;i<new_number;i++) {
499 bb_elem->inst_counter[i]=0;
500 }
501 }
502
503 return temp;
504 }
505
bbv_thread_called(ThreadId tid,ULong nDisp)506 static void bbv_thread_called ( ThreadId tid, ULong nDisp )
507 {
508 if (tid >= allocated_threads) {
509 bbv_thread=allocate_new_thread(bbv_thread,allocated_threads,tid+1);
510 allocated_threads=tid+1;
511 }
512 current_thread=tid;
513 }
514
515
516
517
518 /*--------------------------------------------------------------------*/
519 /*--- Setup ---*/
520 /*--------------------------------------------------------------------*/
521
bbv_post_clo_init(void)522 static void bbv_post_clo_init(void)
523 {
524 bb_out_file =
525 VG_(expand_file_name)("--bb-out-file", clo_bb_out_file);
526
527 /* Try a closer approximation of basic blocks */
528 /* This is the same as the command line option */
529 /* --vex-guest-chase-thresh=0 */
530 VG_(clo_vex_control).guest_chase_thresh = 0;
531 }
532
533 /* Parse the command line options */
bbv_process_cmd_line_option(const HChar * arg)534 static Bool bbv_process_cmd_line_option(const HChar* arg)
535 {
536 if VG_INT_CLO (arg, "--interval-size", interval_size) {}
537 else if VG_STR_CLO (arg, "--bb-out-file", clo_bb_out_file) {}
538 else if VG_STR_CLO (arg, "--pc-out-file", clo_pc_out_file) {
539 generate_pc_file = True;
540 }
541 else if VG_BOOL_CLO (arg, "--instr-count-only", instr_count_only) {}
542 else {
543 return False;
544 }
545
546 return True;
547 }
548
bbv_print_usage(void)549 static void bbv_print_usage(void)
550 {
551 VG_(printf)(
552 " --bb-out-file=<file> filename for BBV info\n"
553 " --pc-out-file=<file> filename for BB addresses and function names\n"
554 " --interval-size=<num> interval size\n"
555 " --instr-count-only=yes|no only print total instruction count\n"
556 );
557 }
558
bbv_print_debug_usage(void)559 static void bbv_print_debug_usage(void)
560 {
561 VG_(printf)(" (none)\n");
562 }
563
bbv_fini(Int exitcode)564 static void bbv_fini(Int exitcode)
565 {
566 Int i;
567
568 if (generate_pc_file) {
569 dumpPcFile();
570 }
571
572 for(i=0;i<allocated_threads;i++) {
573
574 if (bbv_thread[i].total_instr!=0) {
575
576 VG_(sprintf)(buf,"\n\n"
577 "# Thread %d\n"
578 "# Total intervals: %d (Interval Size %d)\n"
579 "# Total instructions: %lld\n"
580 "# Total reps: %lld\n"
581 "# Unique reps: %lld\n"
582 "# Total fldcw instructions: %lld\n\n",
583 i,
584 (Int)(bbv_thread[i].total_instr/(ULong)interval_size),
585 interval_size,
586 bbv_thread[i].total_instr,
587 bbv_thread[i].global_rep_count,
588 bbv_thread[i].unique_rep_count,
589 bbv_thread[i].fldcw_count);
590
591 /* Print results to display */
592 VG_(umsg)("%s\n", buf);
593
594 /* open the output file if it hasn't already */
595 if (bbv_thread[i].bbtrace_fd < 0) {
596 bbv_thread[i].bbtrace_fd=open_tracefile(i);
597 }
598 /* Also print to results file */
599 VG_(write)(bbv_thread[i].bbtrace_fd,(void*)buf,VG_(strlen)(buf));
600 VG_(close)(bbv_thread[i].bbtrace_fd);
601 }
602 }
603 }
604
bbv_pre_clo_init(void)605 static void bbv_pre_clo_init(void)
606 {
607 VG_(details_name) ("exp-bbv");
608 VG_(details_version) (NULL);
609 VG_(details_description) ("a SimPoint basic block vector generator");
610 VG_(details_copyright_author)(
611 "Copyright (C) 2006-2013 Vince Weaver");
612 VG_(details_bug_reports_to) (VG_BUGS_TO);
613
614 VG_(basic_tool_funcs) (bbv_post_clo_init,
615 bbv_instrument,
616 bbv_fini);
617
618 VG_(needs_command_line_options)(bbv_process_cmd_line_option,
619 bbv_print_usage,
620 bbv_print_debug_usage);
621
622 VG_(track_start_client_code)( bbv_thread_called );
623
624
625 instr_info_table = VG_(OSetGen_Create)(/*keyOff*/0,
626 NULL,
627 VG_(malloc), "bbv.1", VG_(free));
628
629 bbv_thread=allocate_new_thread(bbv_thread,0,allocated_threads);
630 }
631
632 VG_DETERMINE_INTERFACE_VERSION(bbv_pre_clo_init)
633
634 /*--------------------------------------------------------------------*/
635 /*--- end ---*/
636 /*--------------------------------------------------------------------*/
637