1
2 /*--------------------------------------------------------------------*/
3 /*--- Cachegrind: everything but the simulation itself. ---*/
4 /*--- cg_main.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of Cachegrind, a Valgrind tool for cache
9 profiling programs.
10
11 Copyright (C) 2002-2010 Nicholas Nethercote
12 njn@valgrind.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30 */
31
32 #include "pub_tool_basics.h"
33 #include "pub_tool_vki.h"
34 #include "pub_tool_debuginfo.h"
35 #include "pub_tool_libcbase.h"
36 #include "pub_tool_libcassert.h"
37 #include "pub_tool_libcfile.h"
38 #include "pub_tool_libcprint.h"
39 #include "pub_tool_libcproc.h"
40 #include "pub_tool_machine.h"
41 #include "pub_tool_mallocfree.h"
42 #include "pub_tool_options.h"
43 #include "pub_tool_oset.h"
44 #include "pub_tool_tooliface.h"
45 #include "pub_tool_xarray.h"
46 #include "pub_tool_clientstate.h"
47 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
48
49 #include "cg_arch.h"
50 #include "cg_sim.c"
51 #include "cg_branchpred.c"
52
53 /*------------------------------------------------------------*/
54 /*--- Constants ---*/
55 /*------------------------------------------------------------*/
56
57 /* Set to 1 for very verbose debugging */
58 #define DEBUG_CG 0
59
60 #define MIN_LINE_SIZE 16
61 #define FILE_LEN VKI_PATH_MAX
62 #define FN_LEN 256
63
64 /*------------------------------------------------------------*/
65 /*--- Options ---*/
66 /*------------------------------------------------------------*/
67
68 static Bool clo_cache_sim = True; /* do cache simulation? */
69 static Bool clo_branch_sim = False; /* do branch simulation? */
70 static Char* clo_cachegrind_out_file = "cachegrind.out.%p";
71
72 /*------------------------------------------------------------*/
73 /*--- Types and Data Structures ---*/
74 /*------------------------------------------------------------*/
75
76 typedef
77 struct {
78 ULong a; /* total # memory accesses of this kind */
79 ULong m1; /* misses in the first level cache */
80 ULong mL; /* misses in the second level cache */
81 }
82 CacheCC;
83
84 typedef
85 struct {
86 ULong b; /* total # branches of this kind */
87 ULong mp; /* number of branches mispredicted */
88 }
89 BranchCC;
90
91 //------------------------------------------------------------
92 // Primary data structure #1: CC table
93 // - Holds the per-source-line hit/miss stats, grouped by file/function/line.
94 // - an ordered set of CCs. CC indexing done by file/function/line (as
95 // determined from the instrAddr).
96 // - Traversed for dumping stats at end in file/func/line hierarchy.
97
98 typedef struct {
99 Char* file;
100 Char* fn;
101 Int line;
102 }
103 CodeLoc;
104
105 typedef struct {
106 CodeLoc loc; /* Source location that these counts pertain to */
107 CacheCC Ir; /* Insn read counts */
108 CacheCC Dr; /* Data read counts */
109 CacheCC Dw; /* Data write/modify counts */
110 BranchCC Bc; /* Conditional branch counts */
111 BranchCC Bi; /* Indirect branch counts */
112 } LineCC;
113
114 // First compare file, then fn, then line.
cmp_CodeLoc_LineCC(const void * vloc,const void * vcc)115 static Word cmp_CodeLoc_LineCC(const void *vloc, const void *vcc)
116 {
117 Word res;
118 CodeLoc* a = (CodeLoc*)vloc;
119 CodeLoc* b = &(((LineCC*)vcc)->loc);
120
121 res = VG_(strcmp)(a->file, b->file);
122 if (0 != res)
123 return res;
124
125 res = VG_(strcmp)(a->fn, b->fn);
126 if (0 != res)
127 return res;
128
129 return a->line - b->line;
130 }
131
132 static OSet* CC_table;
133
134 //------------------------------------------------------------
135 // Primary data structure #2: InstrInfo table
136 // - Holds the cached info about each instr that is used for simulation.
137 // - table(SB_start_addr, list(InstrInfo))
138 // - For each SB, each InstrInfo in the list holds info about the
139 // instruction (instrLen, instrAddr, etc), plus a pointer to its line
140 // CC. This node is what's passed to the simulation function.
141 // - When SBs are discarded the relevant list(instr_details) is freed.
142
143 typedef struct _InstrInfo InstrInfo;
144 struct _InstrInfo {
145 Addr instr_addr;
146 UChar instr_len;
147 LineCC* parent; // parent line-CC
148 };
149
150 typedef struct _SB_info SB_info;
151 struct _SB_info {
152 Addr SB_addr; // key; MUST BE FIRST
153 Int n_instrs;
154 InstrInfo instrs[0];
155 };
156
157 static OSet* instrInfoTable;
158
159 //------------------------------------------------------------
160 // Secondary data structure: string table
161 // - holds strings, avoiding dups
162 // - used for filenames and function names, each of which will be
163 // pointed to by one or more CCs.
164 // - it also allows equality checks just by pointer comparison, which
165 // is good when printing the output file at the end.
166
167 static OSet* stringTable;
168
169 //------------------------------------------------------------
170 // Stats
171 static Int distinct_files = 0;
172 static Int distinct_fns = 0;
173 static Int distinct_lines = 0;
174 static Int distinct_instrs = 0;
175
176 static Int full_debugs = 0;
177 static Int file_line_debugs = 0;
178 static Int fn_debugs = 0;
179 static Int no_debugs = 0;
180
181 /*------------------------------------------------------------*/
182 /*--- String table operations ---*/
183 /*------------------------------------------------------------*/
184
stringCmp(const void * key,const void * elem)185 static Word stringCmp( const void* key, const void* elem )
186 {
187 return VG_(strcmp)(*(Char**)key, *(Char**)elem);
188 }
189
190 // Get a permanent string; either pull it out of the string table if it's
191 // been encountered before, or dup it and put it into the string table.
get_perm_string(Char * s)192 static Char* get_perm_string(Char* s)
193 {
194 Char** s_ptr = VG_(OSetGen_Lookup)(stringTable, &s);
195 if (s_ptr) {
196 return *s_ptr;
197 } else {
198 Char** s_node = VG_(OSetGen_AllocNode)(stringTable, sizeof(Char*));
199 *s_node = VG_(strdup)("cg.main.gps.1", s);
200 VG_(OSetGen_Insert)(stringTable, s_node);
201 return *s_node;
202 }
203 }
204
205 /*------------------------------------------------------------*/
206 /*--- CC table operations ---*/
207 /*------------------------------------------------------------*/
208
get_debug_info(Addr instr_addr,Char file[FILE_LEN],Char fn[FN_LEN],Int * line)209 static void get_debug_info(Addr instr_addr, Char file[FILE_LEN],
210 Char fn[FN_LEN], Int* line)
211 {
212 Char dir[FILE_LEN];
213 Bool found_dirname;
214 Bool found_file_line = VG_(get_filename_linenum)(
215 instr_addr,
216 file, FILE_LEN,
217 dir, FILE_LEN, &found_dirname,
218 line
219 );
220 Bool found_fn = VG_(get_fnname)(instr_addr, fn, FN_LEN);
221
222 if (!found_file_line) {
223 VG_(strcpy)(file, "???");
224 *line = 0;
225 }
226 if (!found_fn) {
227 VG_(strcpy)(fn, "???");
228 }
229
230 if (found_dirname) {
231 // +1 for the '/'.
232 tl_assert(VG_(strlen)(dir) + VG_(strlen)(file) + 1 < FILE_LEN);
233 VG_(strcat)(dir, "/"); // Append '/'
234 VG_(strcat)(dir, file); // Append file to dir
235 VG_(strcpy)(file, dir); // Move dir+file to file
236 }
237
238 if (found_file_line) {
239 if (found_fn) full_debugs++;
240 else file_line_debugs++;
241 } else {
242 if (found_fn) fn_debugs++;
243 else no_debugs++;
244 }
245 }
246
247 // Do a three step traversal: by file, then fn, then line.
248 // Returns a pointer to the line CC, creates a new one if necessary.
get_lineCC(Addr origAddr)249 static LineCC* get_lineCC(Addr origAddr)
250 {
251 Char file[FILE_LEN], fn[FN_LEN];
252 Int line;
253 CodeLoc loc;
254 LineCC* lineCC;
255
256 get_debug_info(origAddr, file, fn, &line);
257
258 loc.file = file;
259 loc.fn = fn;
260 loc.line = line;
261
262 lineCC = VG_(OSetGen_Lookup)(CC_table, &loc);
263 if (!lineCC) {
264 // Allocate and zero a new node.
265 lineCC = VG_(OSetGen_AllocNode)(CC_table, sizeof(LineCC));
266 lineCC->loc.file = get_perm_string(loc.file);
267 lineCC->loc.fn = get_perm_string(loc.fn);
268 lineCC->loc.line = loc.line;
269 lineCC->Ir.a = 0;
270 lineCC->Ir.m1 = 0;
271 lineCC->Ir.mL = 0;
272 lineCC->Dr.a = 0;
273 lineCC->Dr.m1 = 0;
274 lineCC->Dr.mL = 0;
275 lineCC->Dw.a = 0;
276 lineCC->Dw.m1 = 0;
277 lineCC->Dw.mL = 0;
278 lineCC->Bc.b = 0;
279 lineCC->Bc.mp = 0;
280 lineCC->Bi.b = 0;
281 lineCC->Bi.mp = 0;
282 VG_(OSetGen_Insert)(CC_table, lineCC);
283 }
284
285 return lineCC;
286 }
287
288 /*------------------------------------------------------------*/
289 /*--- Cache simulation functions ---*/
290 /*------------------------------------------------------------*/
291
292 // Only used with --cache-sim=no.
293 static VG_REGPARM(1)
log_1I(InstrInfo * n)294 void log_1I(InstrInfo* n)
295 {
296 n->parent->Ir.a++;
297 }
298
299 // Only used with --cache-sim=no.
300 static VG_REGPARM(2)
log_2I(InstrInfo * n,InstrInfo * n2)301 void log_2I(InstrInfo* n, InstrInfo* n2)
302 {
303 n->parent->Ir.a++;
304 n2->parent->Ir.a++;
305 }
306
307 // Only used with --cache-sim=no.
308 static VG_REGPARM(3)
log_3I(InstrInfo * n,InstrInfo * n2,InstrInfo * n3)309 void log_3I(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
310 {
311 n->parent->Ir.a++;
312 n2->parent->Ir.a++;
313 n3->parent->Ir.a++;
314 }
315
316 static VG_REGPARM(1)
log_1I_0D_cache_access(InstrInfo * n)317 void log_1I_0D_cache_access(InstrInfo* n)
318 {
319 //VG_(printf)("1I_0D : CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n",
320 // n, n->instr_addr, n->instr_len);
321 cachesim_I1_doref(n->instr_addr, n->instr_len,
322 &n->parent->Ir.m1, &n->parent->Ir.mL);
323 n->parent->Ir.a++;
324 }
325
326 static VG_REGPARM(2)
log_2I_0D_cache_access(InstrInfo * n,InstrInfo * n2)327 void log_2I_0D_cache_access(InstrInfo* n, InstrInfo* n2)
328 {
329 //VG_(printf)("2I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
330 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n",
331 // n, n->instr_addr, n->instr_len,
332 // n2, n2->instr_addr, n2->instr_len);
333 cachesim_I1_doref(n->instr_addr, n->instr_len,
334 &n->parent->Ir.m1, &n->parent->Ir.mL);
335 n->parent->Ir.a++;
336 cachesim_I1_doref(n2->instr_addr, n2->instr_len,
337 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
338 n2->parent->Ir.a++;
339 }
340
341 static VG_REGPARM(3)
log_3I_0D_cache_access(InstrInfo * n,InstrInfo * n2,InstrInfo * n3)342 void log_3I_0D_cache_access(InstrInfo* n, InstrInfo* n2, InstrInfo* n3)
343 {
344 //VG_(printf)("3I_0D : CC1addr=0x%010lx, i1addr=0x%010lx, i1size=%lu\n"
345 // " CC2addr=0x%010lx, i2addr=0x%010lx, i2size=%lu\n"
346 // " CC3addr=0x%010lx, i3addr=0x%010lx, i3size=%lu\n",
347 // n, n->instr_addr, n->instr_len,
348 // n2, n2->instr_addr, n2->instr_len,
349 // n3, n3->instr_addr, n3->instr_len);
350 cachesim_I1_doref(n->instr_addr, n->instr_len,
351 &n->parent->Ir.m1, &n->parent->Ir.mL);
352 n->parent->Ir.a++;
353 cachesim_I1_doref(n2->instr_addr, n2->instr_len,
354 &n2->parent->Ir.m1, &n2->parent->Ir.mL);
355 n2->parent->Ir.a++;
356 cachesim_I1_doref(n3->instr_addr, n3->instr_len,
357 &n3->parent->Ir.m1, &n3->parent->Ir.mL);
358 n3->parent->Ir.a++;
359 }
360
361 static VG_REGPARM(3)
log_1I_1Dr_cache_access(InstrInfo * n,Addr data_addr,Word data_size)362 void log_1I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
363 {
364 //VG_(printf)("1I_1Dr: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
365 // " daddr=0x%010lx, dsize=%lu\n",
366 // n, n->instr_addr, n->instr_len, data_addr, data_size);
367 cachesim_I1_doref(n->instr_addr, n->instr_len,
368 &n->parent->Ir.m1, &n->parent->Ir.mL);
369 n->parent->Ir.a++;
370
371 cachesim_D1_doref(data_addr, data_size,
372 &n->parent->Dr.m1, &n->parent->Dr.mL);
373 n->parent->Dr.a++;
374 }
375
376 static VG_REGPARM(3)
log_1I_1Dw_cache_access(InstrInfo * n,Addr data_addr,Word data_size)377 void log_1I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
378 {
379 //VG_(printf)("1I_1Dw: CCaddr=0x%010lx, iaddr=0x%010lx, isize=%lu\n"
380 // " daddr=0x%010lx, dsize=%lu\n",
381 // n, n->instr_addr, n->instr_len, data_addr, data_size);
382 cachesim_I1_doref(n->instr_addr, n->instr_len,
383 &n->parent->Ir.m1, &n->parent->Ir.mL);
384 n->parent->Ir.a++;
385
386 cachesim_D1_doref(data_addr, data_size,
387 &n->parent->Dw.m1, &n->parent->Dw.mL);
388 n->parent->Dw.a++;
389 }
390
391 static VG_REGPARM(3)
log_0I_1Dr_cache_access(InstrInfo * n,Addr data_addr,Word data_size)392 void log_0I_1Dr_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
393 {
394 //VG_(printf)("0I_1Dr: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
395 // n, data_addr, data_size);
396 cachesim_D1_doref(data_addr, data_size,
397 &n->parent->Dr.m1, &n->parent->Dr.mL);
398 n->parent->Dr.a++;
399 }
400
401 static VG_REGPARM(3)
log_0I_1Dw_cache_access(InstrInfo * n,Addr data_addr,Word data_size)402 void log_0I_1Dw_cache_access(InstrInfo* n, Addr data_addr, Word data_size)
403 {
404 //VG_(printf)("0I_1Dw: CCaddr=0x%010lx, daddr=0x%010lx, dsize=%lu\n",
405 // n, data_addr, data_size);
406 cachesim_D1_doref(data_addr, data_size,
407 &n->parent->Dw.m1, &n->parent->Dw.mL);
408 n->parent->Dw.a++;
409 }
410
411 /* For branches, we consult two different predictors, one which
412 predicts taken/untaken for conditional branches, and the other
413 which predicts the branch target address for indirect branches
414 (jump-to-register style ones). */
415
416 static VG_REGPARM(2)
log_cond_branch(InstrInfo * n,Word taken)417 void log_cond_branch(InstrInfo* n, Word taken)
418 {
419 //VG_(printf)("cbrnch: CCaddr=0x%010lx, taken=0x%010lx\n",
420 // n, taken);
421 n->parent->Bc.b++;
422 n->parent->Bc.mp
423 += (1 & do_cond_branch_predict(n->instr_addr, taken));
424 }
425
426 static VG_REGPARM(2)
log_ind_branch(InstrInfo * n,UWord actual_dst)427 void log_ind_branch(InstrInfo* n, UWord actual_dst)
428 {
429 //VG_(printf)("ibrnch: CCaddr=0x%010lx, dst=0x%010lx\n",
430 // n, actual_dst);
431 n->parent->Bi.b++;
432 n->parent->Bi.mp
433 += (1 & do_ind_branch_predict(n->instr_addr, actual_dst));
434 }
435
436
437 /*------------------------------------------------------------*/
438 /*--- Instrumentation types and structures ---*/
439 /*------------------------------------------------------------*/
440
441 /* Maintain an ordered list of memory events which are outstanding, in
442 the sense that no IR has yet been generated to do the relevant
443 helper calls. The BB is scanned top to bottom and memory events
444 are added to the end of the list, merging with the most recent
445 notified event where possible (Dw immediately following Dr and
446 having the same size and EA can be merged).
447
448 This merging is done so that for architectures which have
449 load-op-store instructions (x86, amd64), the insn is treated as if
450 it makes just one memory reference (a modify), rather than two (a
451 read followed by a write at the same address).
452
453 At various points the list will need to be flushed, that is, IR
454 generated from it. That must happen before any possible exit from
455 the block (the end, or an IRStmt_Exit). Flushing also takes place
456 when there is no space to add a new event.
457
458 If we require the simulation statistics to be up to date with
459 respect to possible memory exceptions, then the list would have to
460 be flushed before each memory reference. That would however lose
461 performance by inhibiting event-merging during flushing.
462
463 Flushing the list consists of walking it start to end and emitting
464 instrumentation IR for each event, in the order in which they
465 appear. It may be possible to emit a single call for two adjacent
466 events in order to reduce the number of helper function calls made.
467 For example, it could well be profitable to handle two adjacent Ir
468 events with a single helper call. */
469
470 typedef
471 IRExpr
472 IRAtom;
473
474 typedef
475 enum {
476 Ev_Ir, // Instruction read
477 Ev_Dr, // Data read
478 Ev_Dw, // Data write
479 Ev_Dm, // Data modify (read then write)
480 Ev_Bc, // branch conditional
481 Ev_Bi // branch indirect (to unknown destination)
482 }
483 EventTag;
484
485 typedef
486 struct {
487 EventTag tag;
488 InstrInfo* inode;
489 union {
490 struct {
491 } Ir;
492 struct {
493 IRAtom* ea;
494 Int szB;
495 } Dr;
496 struct {
497 IRAtom* ea;
498 Int szB;
499 } Dw;
500 struct {
501 IRAtom* ea;
502 Int szB;
503 } Dm;
504 struct {
505 IRAtom* taken; /* :: Ity_I1 */
506 } Bc;
507 struct {
508 IRAtom* dst;
509 } Bi;
510 } Ev;
511 }
512 Event;
513
init_Event(Event * ev)514 static void init_Event ( Event* ev ) {
515 VG_(memset)(ev, 0, sizeof(Event));
516 }
517
get_Event_dea(Event * ev)518 static IRAtom* get_Event_dea ( Event* ev ) {
519 switch (ev->tag) {
520 case Ev_Dr: return ev->Ev.Dr.ea;
521 case Ev_Dw: return ev->Ev.Dw.ea;
522 case Ev_Dm: return ev->Ev.Dm.ea;
523 default: tl_assert(0);
524 }
525 }
526
get_Event_dszB(Event * ev)527 static Int get_Event_dszB ( Event* ev ) {
528 switch (ev->tag) {
529 case Ev_Dr: return ev->Ev.Dr.szB;
530 case Ev_Dw: return ev->Ev.Dw.szB;
531 case Ev_Dm: return ev->Ev.Dm.szB;
532 default: tl_assert(0);
533 }
534 }
535
536
537 /* Up to this many unnotified events are allowed. Number is
538 arbitrary. Larger numbers allow more event merging to occur, but
539 potentially induce more spilling due to extending live ranges of
540 address temporaries. */
541 #define N_EVENTS 16
542
543
544 /* A struct which holds all the running state during instrumentation.
545 Mostly to avoid passing loads of parameters everywhere. */
546 typedef
547 struct {
548 /* The current outstanding-memory-event list. */
549 Event events[N_EVENTS];
550 Int events_used;
551
552 /* The array of InstrInfo bins for the BB. */
553 SB_info* sbInfo;
554
555 /* Number InstrInfo bins 'used' so far. */
556 Int sbInfo_i;
557
558 /* The output SB being constructed. */
559 IRSB* sbOut;
560 }
561 CgState;
562
563
564 /*------------------------------------------------------------*/
565 /*--- Instrumentation main ---*/
566 /*------------------------------------------------------------*/
567
568 // Note that origAddr is the real origAddr, not the address of the first
569 // instruction in the block (they can be different due to redirection).
570 static
get_SB_info(IRSB * sbIn,Addr origAddr)571 SB_info* get_SB_info(IRSB* sbIn, Addr origAddr)
572 {
573 Int i, n_instrs;
574 IRStmt* st;
575 SB_info* sbInfo;
576
577 // Count number of original instrs in SB
578 n_instrs = 0;
579 for (i = 0; i < sbIn->stmts_used; i++) {
580 st = sbIn->stmts[i];
581 if (Ist_IMark == st->tag) n_instrs++;
582 }
583
584 // Check that we don't have an entry for this BB in the instr-info table.
585 // If this assertion fails, there has been some screwup: some
586 // translations must have been discarded but Cachegrind hasn't discarded
587 // the corresponding entries in the instr-info table.
588 sbInfo = VG_(OSetGen_Lookup)(instrInfoTable, &origAddr);
589 tl_assert(NULL == sbInfo);
590
591 // BB never translated before (at this address, at least; could have
592 // been unloaded and then reloaded elsewhere in memory)
593 sbInfo = VG_(OSetGen_AllocNode)(instrInfoTable,
594 sizeof(SB_info) + n_instrs*sizeof(InstrInfo));
595 sbInfo->SB_addr = origAddr;
596 sbInfo->n_instrs = n_instrs;
597 VG_(OSetGen_Insert)( instrInfoTable, sbInfo );
598 distinct_instrs++;
599
600 return sbInfo;
601 }
602
603
showEvent(Event * ev)604 static void showEvent ( Event* ev )
605 {
606 switch (ev->tag) {
607 case Ev_Ir:
608 VG_(printf)("Ir %p\n", ev->inode);
609 break;
610 case Ev_Dr:
611 VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
612 ppIRExpr(ev->Ev.Dr.ea);
613 VG_(printf)("\n");
614 break;
615 case Ev_Dw:
616 VG_(printf)("Dw %p %d EA=", ev->inode, ev->Ev.Dw.szB);
617 ppIRExpr(ev->Ev.Dw.ea);
618 VG_(printf)("\n");
619 break;
620 case Ev_Dm:
621 VG_(printf)("Dm %p %d EA=", ev->inode, ev->Ev.Dm.szB);
622 ppIRExpr(ev->Ev.Dm.ea);
623 VG_(printf)("\n");
624 break;
625 case Ev_Bc:
626 VG_(printf)("Bc %p GA=", ev->inode);
627 ppIRExpr(ev->Ev.Bc.taken);
628 VG_(printf)("\n");
629 break;
630 case Ev_Bi:
631 VG_(printf)("Bi %p DST=", ev->inode);
632 ppIRExpr(ev->Ev.Bi.dst);
633 VG_(printf)("\n");
634 break;
635 default:
636 tl_assert(0);
637 break;
638 }
639 }
640
641 // Reserve and initialise an InstrInfo for the first mention of a new insn.
642 static
setup_InstrInfo(CgState * cgs,Addr instr_addr,UInt instr_len)643 InstrInfo* setup_InstrInfo ( CgState* cgs, Addr instr_addr, UInt instr_len )
644 {
645 InstrInfo* i_node;
646 tl_assert(cgs->sbInfo_i >= 0);
647 tl_assert(cgs->sbInfo_i < cgs->sbInfo->n_instrs);
648 i_node = &cgs->sbInfo->instrs[ cgs->sbInfo_i ];
649 i_node->instr_addr = instr_addr;
650 i_node->instr_len = instr_len;
651 i_node->parent = get_lineCC(instr_addr);
652 cgs->sbInfo_i++;
653 return i_node;
654 }
655
656
657 /* Generate code for all outstanding memory events, and mark the queue
658 empty. Code is generated into cgs->bbOut, and this activity
659 'consumes' slots in cgs->sbInfo. */
660
flushEvents(CgState * cgs)661 static void flushEvents ( CgState* cgs )
662 {
663 Int i, regparms;
664 Char* helperName;
665 void* helperAddr;
666 IRExpr** argv;
667 IRExpr* i_node_expr;
668 IRDirty* di;
669 Event* ev;
670 Event* ev2;
671 Event* ev3;
672
673 i = 0;
674 while (i < cgs->events_used) {
675
676 helperName = NULL;
677 helperAddr = NULL;
678 argv = NULL;
679 regparms = 0;
680
681 /* generate IR to notify event i and possibly the ones
682 immediately following it. */
683 tl_assert(i >= 0 && i < cgs->events_used);
684
685 ev = &cgs->events[i];
686 ev2 = ( i < cgs->events_used-1 ? &cgs->events[i+1] : NULL );
687 ev3 = ( i < cgs->events_used-2 ? &cgs->events[i+2] : NULL );
688
689 if (DEBUG_CG) {
690 VG_(printf)(" flush ");
691 showEvent( ev );
692 }
693
694 i_node_expr = mkIRExpr_HWord( (HWord)ev->inode );
695
696 /* Decide on helper fn to call and args to pass it, and advance
697 i appropriately. */
698 switch (ev->tag) {
699 case Ev_Ir:
700 /* Merge an Ir with a following Dr/Dm. */
701 if (ev2 && (ev2->tag == Ev_Dr || ev2->tag == Ev_Dm)) {
702 /* Why is this true? It's because we're merging an Ir
703 with a following Dr or Dm. The Ir derives from the
704 instruction's IMark and the Dr/Dm from data
705 references which follow it. In short it holds
706 because each insn starts with an IMark, hence an
707 Ev_Ir, and so these Dr/Dm must pertain to the
708 immediately preceding Ir. Same applies to analogous
709 assertions in the subsequent cases. */
710 tl_assert(ev2->inode == ev->inode);
711 helperName = "log_1I_1Dr_cache_access";
712 helperAddr = &log_1I_1Dr_cache_access;
713 argv = mkIRExprVec_3( i_node_expr,
714 get_Event_dea(ev2),
715 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
716 regparms = 3;
717 i += 2;
718 }
719 /* Merge an Ir with a following Dw. */
720 else
721 if (ev2 && ev2->tag == Ev_Dw) {
722 tl_assert(ev2->inode == ev->inode);
723 helperName = "log_1I_1Dw_cache_access";
724 helperAddr = &log_1I_1Dw_cache_access;
725 argv = mkIRExprVec_3( i_node_expr,
726 get_Event_dea(ev2),
727 mkIRExpr_HWord( get_Event_dszB(ev2) ) );
728 regparms = 3;
729 i += 2;
730 }
731 /* Merge an Ir with two following Irs. */
732 else
733 if (ev2 && ev3 && ev2->tag == Ev_Ir && ev3->tag == Ev_Ir)
734 {
735 if (clo_cache_sim) {
736 helperName = "log_3I_0D_cache_access";
737 helperAddr = &log_3I_0D_cache_access;
738 } else {
739 helperName = "log_3I";
740 helperAddr = &log_3I;
741 }
742 argv = mkIRExprVec_3( i_node_expr,
743 mkIRExpr_HWord( (HWord)ev2->inode ),
744 mkIRExpr_HWord( (HWord)ev3->inode ) );
745 regparms = 3;
746 i += 3;
747 }
748 /* Merge an Ir with one following Ir. */
749 else
750 if (ev2 && ev2->tag == Ev_Ir) {
751 if (clo_cache_sim) {
752 helperName = "log_2I_0D_cache_access";
753 helperAddr = &log_2I_0D_cache_access;
754 } else {
755 helperName = "log_2I";
756 helperAddr = &log_2I;
757 }
758 argv = mkIRExprVec_2( i_node_expr,
759 mkIRExpr_HWord( (HWord)ev2->inode ) );
760 regparms = 2;
761 i += 2;
762 }
763 /* No merging possible; emit as-is. */
764 else {
765 if (clo_cache_sim) {
766 helperName = "log_1I_0D_cache_access";
767 helperAddr = &log_1I_0D_cache_access;
768 } else {
769 helperName = "log_1I";
770 helperAddr = &log_1I;
771 }
772 argv = mkIRExprVec_1( i_node_expr );
773 regparms = 1;
774 i++;
775 }
776 break;
777 case Ev_Dr:
778 case Ev_Dm:
779 /* Data read or modify */
780 helperName = "log_0I_1Dr_cache_access";
781 helperAddr = &log_0I_1Dr_cache_access;
782 argv = mkIRExprVec_3( i_node_expr,
783 get_Event_dea(ev),
784 mkIRExpr_HWord( get_Event_dszB(ev) ) );
785 regparms = 3;
786 i++;
787 break;
788 case Ev_Dw:
789 /* Data write */
790 helperName = "log_0I_1Dw_cache_access";
791 helperAddr = &log_0I_1Dw_cache_access;
792 argv = mkIRExprVec_3( i_node_expr,
793 get_Event_dea(ev),
794 mkIRExpr_HWord( get_Event_dszB(ev) ) );
795 regparms = 3;
796 i++;
797 break;
798 case Ev_Bc:
799 /* Conditional branch */
800 helperName = "log_cond_branch";
801 helperAddr = &log_cond_branch;
802 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bc.taken );
803 regparms = 2;
804 i++;
805 break;
806 case Ev_Bi:
807 /* Branch to an unknown destination */
808 helperName = "log_ind_branch";
809 helperAddr = &log_ind_branch;
810 argv = mkIRExprVec_2( i_node_expr, ev->Ev.Bi.dst );
811 regparms = 2;
812 i++;
813 break;
814 default:
815 tl_assert(0);
816 }
817
818 /* Add the helper. */
819 tl_assert(helperName);
820 tl_assert(helperAddr);
821 tl_assert(argv);
822 di = unsafeIRDirty_0_N( regparms,
823 helperName, VG_(fnptr_to_fnentry)( helperAddr ),
824 argv );
825 addStmtToIRSB( cgs->sbOut, IRStmt_Dirty(di) );
826 }
827
828 cgs->events_used = 0;
829 }
830
addEvent_Ir(CgState * cgs,InstrInfo * inode)831 static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
832 {
833 Event* evt;
834 if (cgs->events_used == N_EVENTS)
835 flushEvents(cgs);
836 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
837 evt = &cgs->events[cgs->events_used];
838 init_Event(evt);
839 evt->tag = Ev_Ir;
840 evt->inode = inode;
841 cgs->events_used++;
842 }
843
844 static
addEvent_Dr(CgState * cgs,InstrInfo * inode,Int datasize,IRAtom * ea)845 void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
846 {
847 Event* evt;
848 tl_assert(isIRAtom(ea));
849 tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
850 if (!clo_cache_sim)
851 return;
852 if (cgs->events_used == N_EVENTS)
853 flushEvents(cgs);
854 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
855 evt = &cgs->events[cgs->events_used];
856 init_Event(evt);
857 evt->tag = Ev_Dr;
858 evt->inode = inode;
859 evt->Ev.Dr.szB = datasize;
860 evt->Ev.Dr.ea = ea;
861 cgs->events_used++;
862 }
863
864 static
addEvent_Dw(CgState * cgs,InstrInfo * inode,Int datasize,IRAtom * ea)865 void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea )
866 {
867 Event* lastEvt;
868 Event* evt;
869
870 tl_assert(isIRAtom(ea));
871 tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
872
873 if (!clo_cache_sim)
874 return;
875
876 /* Is it possible to merge this write with the preceding read? */
877 lastEvt = &cgs->events[cgs->events_used-1];
878 if (cgs->events_used > 0
879 && lastEvt->tag == Ev_Dr
880 && lastEvt->Ev.Dr.szB == datasize
881 && lastEvt->inode == inode
882 && eqIRAtom(lastEvt->Ev.Dr.ea, ea))
883 {
884 lastEvt->tag = Ev_Dm;
885 return;
886 }
887
888 /* No. Add as normal. */
889 if (cgs->events_used == N_EVENTS)
890 flushEvents(cgs);
891 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
892 evt = &cgs->events[cgs->events_used];
893 init_Event(evt);
894 evt->tag = Ev_Dw;
895 evt->inode = inode;
896 evt->Ev.Dw.szB = datasize;
897 evt->Ev.Dw.ea = ea;
898 cgs->events_used++;
899 }
900
901 static
addEvent_Bc(CgState * cgs,InstrInfo * inode,IRAtom * guard)902 void addEvent_Bc ( CgState* cgs, InstrInfo* inode, IRAtom* guard )
903 {
904 Event* evt;
905 tl_assert(isIRAtom(guard));
906 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, guard)
907 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
908 if (!clo_branch_sim)
909 return;
910 if (cgs->events_used == N_EVENTS)
911 flushEvents(cgs);
912 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
913 evt = &cgs->events[cgs->events_used];
914 init_Event(evt);
915 evt->tag = Ev_Bc;
916 evt->inode = inode;
917 evt->Ev.Bc.taken = guard;
918 cgs->events_used++;
919 }
920
921 static
addEvent_Bi(CgState * cgs,InstrInfo * inode,IRAtom * whereTo)922 void addEvent_Bi ( CgState* cgs, InstrInfo* inode, IRAtom* whereTo )
923 {
924 Event* evt;
925 tl_assert(isIRAtom(whereTo));
926 tl_assert(typeOfIRExpr(cgs->sbOut->tyenv, whereTo)
927 == (sizeof(HWord)==4 ? Ity_I32 : Ity_I64));
928 if (!clo_branch_sim)
929 return;
930 if (cgs->events_used == N_EVENTS)
931 flushEvents(cgs);
932 tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
933 evt = &cgs->events[cgs->events_used];
934 init_Event(evt);
935 evt->tag = Ev_Bi;
936 evt->inode = inode;
937 evt->Ev.Bi.dst = whereTo;
938 cgs->events_used++;
939 }
940
941 ////////////////////////////////////////////////////////////
942
943
944 static
cg_instrument(VgCallbackClosure * closure,IRSB * sbIn,VexGuestLayout * layout,VexGuestExtents * vge,IRType gWordTy,IRType hWordTy)945 IRSB* cg_instrument ( VgCallbackClosure* closure,
946 IRSB* sbIn,
947 VexGuestLayout* layout,
948 VexGuestExtents* vge,
949 IRType gWordTy, IRType hWordTy )
950 {
951 Int i, isize;
952 IRStmt* st;
953 Addr64 cia; /* address of current insn */
954 CgState cgs;
955 IRTypeEnv* tyenv = sbIn->tyenv;
956 InstrInfo* curr_inode = NULL;
957
958 if (gWordTy != hWordTy) {
959 /* We don't currently support this case. */
960 VG_(tool_panic)("host/guest word size mismatch");
961 }
962
963 // Set up new SB
964 cgs.sbOut = deepCopyIRSBExceptStmts(sbIn);
965
966 // Copy verbatim any IR preamble preceding the first IMark
967 i = 0;
968 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) {
969 addStmtToIRSB( cgs.sbOut, sbIn->stmts[i] );
970 i++;
971 }
972
973 // Get the first statement, and initial cia from it
974 tl_assert(sbIn->stmts_used > 0);
975 tl_assert(i < sbIn->stmts_used);
976 st = sbIn->stmts[i];
977 tl_assert(Ist_IMark == st->tag);
978
979 cia = st->Ist.IMark.addr;
980 isize = st->Ist.IMark.len;
981 // If Vex fails to decode an instruction, the size will be zero.
982 // Pretend otherwise.
983 if (isize == 0) isize = VG_MIN_INSTR_SZB;
984
985 // Set up running state and get block info
986 tl_assert(closure->readdr == vge->base[0]);
987 cgs.events_used = 0;
988 cgs.sbInfo = get_SB_info(sbIn, (Addr)closure->readdr);
989 cgs.sbInfo_i = 0;
990
991 if (DEBUG_CG)
992 VG_(printf)("\n\n---------- cg_instrument ----------\n");
993
994 // Traverse the block, initialising inodes, adding events and flushing as
995 // necessary.
996 for (/*use current i*/; i < sbIn->stmts_used; i++) {
997
998 st = sbIn->stmts[i];
999 tl_assert(isFlatIRStmt(st));
1000
1001 switch (st->tag) {
1002 case Ist_NoOp:
1003 case Ist_AbiHint:
1004 case Ist_Put:
1005 case Ist_PutI:
1006 case Ist_MBE:
1007 break;
1008
1009 case Ist_IMark:
1010 cia = st->Ist.IMark.addr;
1011 isize = st->Ist.IMark.len;
1012
1013 // If Vex fails to decode an instruction, the size will be zero.
1014 // Pretend otherwise.
1015 if (isize == 0) isize = VG_MIN_INSTR_SZB;
1016
1017 // Sanity-check size.
1018 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB)
1019 || VG_CLREQ_SZB == isize );
1020
1021 // Get space for and init the inode, record it as the current one.
1022 // Subsequent Dr/Dw/Dm events from the same instruction will
1023 // also use it.
1024 curr_inode = setup_InstrInfo(&cgs, cia, isize);
1025
1026 addEvent_Ir( &cgs, curr_inode );
1027 break;
1028
1029 case Ist_WrTmp: {
1030 IRExpr* data = st->Ist.WrTmp.data;
1031 if (data->tag == Iex_Load) {
1032 IRExpr* aexpr = data->Iex.Load.addr;
1033 // Note also, endianness info is ignored. I guess
1034 // that's not interesting.
1035 addEvent_Dr( &cgs, curr_inode, sizeofIRType(data->Iex.Load.ty),
1036 aexpr );
1037 }
1038 break;
1039 }
1040
1041 case Ist_Store: {
1042 IRExpr* data = st->Ist.Store.data;
1043 IRExpr* aexpr = st->Ist.Store.addr;
1044 addEvent_Dw( &cgs, curr_inode,
1045 sizeofIRType(typeOfIRExpr(tyenv, data)), aexpr );
1046 break;
1047 }
1048
1049 case Ist_Dirty: {
1050 Int dataSize;
1051 IRDirty* d = st->Ist.Dirty.details;
1052 if (d->mFx != Ifx_None) {
1053 /* This dirty helper accesses memory. Collect the details. */
1054 tl_assert(d->mAddr != NULL);
1055 tl_assert(d->mSize != 0);
1056 dataSize = d->mSize;
1057 // Large (eg. 28B, 108B, 512B on x86) data-sized
1058 // instructions will be done inaccurately, but they're
1059 // very rare and this avoids errors from hitting more
1060 // than two cache lines in the simulation.
1061 if (dataSize > MIN_LINE_SIZE)
1062 dataSize = MIN_LINE_SIZE;
1063 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
1064 addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
1065 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
1066 addEvent_Dw( &cgs, curr_inode, dataSize, d->mAddr );
1067 } else {
1068 tl_assert(d->mAddr == NULL);
1069 tl_assert(d->mSize == 0);
1070 }
1071 break;
1072 }
1073
1074 case Ist_CAS: {
1075 /* We treat it as a read and a write of the location. I
1076 think that is the same behaviour as it was before IRCAS
1077 was introduced, since prior to that point, the Vex
1078 front ends would translate a lock-prefixed instruction
1079 into a (normal) read followed by a (normal) write. */
1080 Int dataSize;
1081 IRCAS* cas = st->Ist.CAS.details;
1082 tl_assert(cas->addr != NULL);
1083 tl_assert(cas->dataLo != NULL);
1084 dataSize = sizeofIRType(typeOfIRExpr(tyenv, cas->dataLo));
1085 if (cas->dataHi != NULL)
1086 dataSize *= 2; /* since it's a doubleword-CAS */
1087 /* I don't think this can ever happen, but play safe. */
1088 if (dataSize > MIN_LINE_SIZE)
1089 dataSize = MIN_LINE_SIZE;
1090 addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
1091 addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
1092 break;
1093 }
1094
1095 case Ist_LLSC: {
1096 IRType dataTy;
1097 if (st->Ist.LLSC.storedata == NULL) {
1098 /* LL */
1099 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result);
1100 addEvent_Dr( &cgs, curr_inode,
1101 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1102 } else {
1103 /* SC */
1104 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata);
1105 addEvent_Dw( &cgs, curr_inode,
1106 sizeofIRType(dataTy), st->Ist.LLSC.addr );
1107 }
1108 break;
1109 }
1110
1111 case Ist_Exit: {
1112 // call branch predictor only if this is a branch in guest code
1113 if ( (st->Ist.Exit.jk == Ijk_Boring) ||
1114 (st->Ist.Exit.jk == Ijk_Call) ||
1115 (st->Ist.Exit.jk == Ijk_Ret) )
1116 {
1117 /* Stuff to widen the guard expression to a host word, so
1118 we can pass it to the branch predictor simulation
1119 functions easily. */
1120 Bool inverted;
1121 Addr64 nia, sea;
1122 IRConst* dst;
1123 IRType tyW = hWordTy;
1124 IROp widen = tyW==Ity_I32 ? Iop_1Uto32 : Iop_1Uto64;
1125 IROp opXOR = tyW==Ity_I32 ? Iop_Xor32 : Iop_Xor64;
1126 IRTemp guard1 = newIRTemp(cgs.sbOut->tyenv, Ity_I1);
1127 IRTemp guardW = newIRTemp(cgs.sbOut->tyenv, tyW);
1128 IRTemp guard = newIRTemp(cgs.sbOut->tyenv, tyW);
1129 IRExpr* one = tyW==Ity_I32 ? IRExpr_Const(IRConst_U32(1))
1130 : IRExpr_Const(IRConst_U64(1));
1131
1132 /* First we need to figure out whether the side exit got
1133 inverted by the ir optimiser. To do that, figure out
1134 the next (fallthrough) instruction's address and the
1135 side exit address and see if they are the same. */
1136 nia = cia + (Addr64)isize;
1137 if (tyW == Ity_I32)
1138 nia &= 0xFFFFFFFFULL;
1139
1140 /* Side exit address */
1141 dst = st->Ist.Exit.dst;
1142 if (tyW == Ity_I32) {
1143 tl_assert(dst->tag == Ico_U32);
1144 sea = (Addr64)(UInt)dst->Ico.U32;
1145 } else {
1146 tl_assert(tyW == Ity_I64);
1147 tl_assert(dst->tag == Ico_U64);
1148 sea = dst->Ico.U64;
1149 }
1150
1151 inverted = nia == sea;
1152
1153 /* Widen the guard expression. */
1154 addStmtToIRSB( cgs.sbOut,
1155 IRStmt_WrTmp( guard1, st->Ist.Exit.guard ));
1156 addStmtToIRSB( cgs.sbOut,
1157 IRStmt_WrTmp( guardW,
1158 IRExpr_Unop(widen,
1159 IRExpr_RdTmp(guard1))) );
1160 /* If the exit is inverted, invert the sense of the guard. */
1161 addStmtToIRSB(
1162 cgs.sbOut,
1163 IRStmt_WrTmp(
1164 guard,
1165 inverted ? IRExpr_Binop(opXOR, IRExpr_RdTmp(guardW), one)
1166 : IRExpr_RdTmp(guardW)
1167 ));
1168 /* And post the event. */
1169 addEvent_Bc( &cgs, curr_inode, IRExpr_RdTmp(guard) );
1170 }
1171
1172 /* We may never reach the next statement, so need to flush
1173 all outstanding transactions now. */
1174 flushEvents( &cgs );
1175 break;
1176 }
1177
1178 default:
1179 tl_assert(0);
1180 break;
1181 }
1182
1183 /* Copy the original statement */
1184 addStmtToIRSB( cgs.sbOut, st );
1185
1186 if (DEBUG_CG) {
1187 ppIRStmt(st);
1188 VG_(printf)("\n");
1189 }
1190 }
1191
1192 /* Deal with branches to unknown destinations. Except ignore ones
1193 which are function returns as we assume the return stack
1194 predictor never mispredicts. */
1195 if ((sbIn->jumpkind == Ijk_Boring) || (sbIn->jumpkind == Ijk_Call)) {
1196 if (0) { ppIRExpr( sbIn->next ); VG_(printf)("\n"); }
1197 switch (sbIn->next->tag) {
1198 case Iex_Const:
1199 break; /* boring - branch to known address */
1200 case Iex_RdTmp:
1201 /* looks like an indirect branch (branch to unknown) */
1202 addEvent_Bi( &cgs, curr_inode, sbIn->next );
1203 break;
1204 default:
1205 /* shouldn't happen - if the incoming IR is properly
1206 flattened, should only have tmp and const cases to
1207 consider. */
1208 tl_assert(0);
1209 }
1210 }
1211
1212 /* At the end of the bb. Flush outstandings. */
1213 flushEvents( &cgs );
1214
1215 /* done. stay sane ... */
1216 tl_assert(cgs.sbInfo_i == cgs.sbInfo->n_instrs);
1217
1218 if (DEBUG_CG) {
1219 VG_(printf)( "goto {");
1220 ppIRJumpKind(sbIn->jumpkind);
1221 VG_(printf)( "} ");
1222 ppIRExpr( sbIn->next );
1223 VG_(printf)( "}\n");
1224 }
1225
1226 return cgs.sbOut;
1227 }
1228
1229 /*------------------------------------------------------------*/
1230 /*--- Cache configuration ---*/
1231 /*------------------------------------------------------------*/
1232
1233 #define UNDEFINED_CACHE { -1, -1, -1 }
1234
1235 static cache_t clo_I1_cache = UNDEFINED_CACHE;
1236 static cache_t clo_D1_cache = UNDEFINED_CACHE;
1237 static cache_t clo_LL_cache = UNDEFINED_CACHE;
1238
1239 // Checks cache config is ok. Returns NULL if ok, or a pointer to an error
1240 // string otherwise.
check_cache(cache_t * cache)1241 static Char* check_cache(cache_t* cache)
1242 {
1243 // Simulator requires set count to be a power of two.
1244 if ((cache->size % (cache->line_size * cache->assoc) != 0) ||
1245 (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc)))
1246 {
1247 return "Cache set count is not a power of two.\n";
1248 }
1249
1250 // Simulator requires line size to be a power of two.
1251 if (-1 == VG_(log2)(cache->line_size)) {
1252 return "Cache line size is not a power of two.\n";
1253 }
1254
1255 // Then check line size >= 16 -- any smaller and a single instruction could
1256 // straddle three cache lines, which breaks a simulation assertion and is
1257 // stupid anyway.
1258 if (cache->line_size < MIN_LINE_SIZE) {
1259 return "Cache line size is too small.\n";
1260 }
1261
1262 /* Then check cache size > line size (causes seg faults if not). */
1263 if (cache->size <= cache->line_size) {
1264 return "Cache size <= line size.\n";
1265 }
1266
1267 /* Then check assoc <= (size / line size) (seg faults otherwise). */
1268 if (cache->assoc > (cache->size / cache->line_size)) {
1269 return "Cache associativity > (size / line size).\n";
1270 }
1271
1272 return NULL;
1273 }
1274
1275 static
configure_caches(cache_t * I1c,cache_t * D1c,cache_t * LLc)1276 void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* LLc)
1277 {
1278 #define DEFINED(L) (-1 != L.size || -1 != L.assoc || -1 != L.line_size)
1279
1280 Char* checkRes;
1281
1282 // Count how many were defined on the command line.
1283 Bool all_caches_clo_defined =
1284 (DEFINED(clo_I1_cache) &&
1285 DEFINED(clo_D1_cache) &&
1286 DEFINED(clo_LL_cache));
1287
1288 // Set the cache config (using auto-detection, if supported by the
1289 // architecture).
1290 VG_(configure_caches)( I1c, D1c, LLc, all_caches_clo_defined );
1291
1292 // Check the default/auto-detected values.
1293 checkRes = check_cache(I1c); tl_assert(!checkRes);
1294 checkRes = check_cache(D1c); tl_assert(!checkRes);
1295 checkRes = check_cache(LLc); tl_assert(!checkRes);
1296
1297 // Then replace with any defined on the command line. (Already checked in
1298 // parse_cache_opt().)
1299 if (DEFINED(clo_I1_cache)) { *I1c = clo_I1_cache; }
1300 if (DEFINED(clo_D1_cache)) { *D1c = clo_D1_cache; }
1301 if (DEFINED(clo_LL_cache)) { *LLc = clo_LL_cache; }
1302
1303 if (VG_(clo_verbosity) >= 2) {
1304 VG_(umsg)("Cache configuration used:\n");
1305 VG_(umsg)(" I1: %dB, %d-way, %dB lines\n",
1306 I1c->size, I1c->assoc, I1c->line_size);
1307 VG_(umsg)(" D1: %dB, %d-way, %dB lines\n",
1308 D1c->size, D1c->assoc, D1c->line_size);
1309 VG_(umsg)(" LL: %dB, %d-way, %dB lines\n",
1310 LLc->size, LLc->assoc, LLc->line_size);
1311 }
1312 #undef CMD_LINE_DEFINED
1313 }
1314
1315 /*------------------------------------------------------------*/
1316 /*--- cg_fini() and related function ---*/
1317 /*------------------------------------------------------------*/
1318
1319 // Total reads/writes/misses. Calculated during CC traversal at the end.
1320 // All auto-zeroed.
1321 static CacheCC Ir_total;
1322 static CacheCC Dr_total;
1323 static CacheCC Dw_total;
1324 static BranchCC Bc_total;
1325 static BranchCC Bi_total;
1326
fprint_CC_table_and_calc_totals(void)1327 static void fprint_CC_table_and_calc_totals(void)
1328 {
1329 Int i, fd;
1330 SysRes sres;
1331 Char buf[512], *currFile = NULL, *currFn = NULL;
1332 LineCC* lineCC;
1333
1334 // Setup output filename. Nb: it's important to do this now, ie. as late
1335 // as possible. If we do it at start-up and the program forks and the
1336 // output file format string contains a %p (pid) specifier, both the
1337 // parent and child will incorrectly write to the same file; this
1338 // happened in 3.3.0.
1339 Char* cachegrind_out_file =
1340 VG_(expand_file_name)("--cachegrind-out-file", clo_cachegrind_out_file);
1341
1342 sres = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
1343 VKI_S_IRUSR|VKI_S_IWUSR);
1344 if (sr_isError(sres)) {
1345 // If the file can't be opened for whatever reason (conflict
1346 // between multiple cachegrinded processes?), give up now.
1347 VG_(umsg)("error: can't open cache simulation output file '%s'\n",
1348 cachegrind_out_file );
1349 VG_(umsg)(" ... so simulation results will be missing.\n");
1350 VG_(free)(cachegrind_out_file);
1351 return;
1352 } else {
1353 fd = sr_Res(sres);
1354 VG_(free)(cachegrind_out_file);
1355 }
1356
1357 // "desc:" lines (giving I1/D1/LL cache configuration). The spaces after
1358 // the 2nd colon makes cg_annotate's output look nicer.
1359 VG_(sprintf)(buf, "desc: I1 cache: %s\n"
1360 "desc: D1 cache: %s\n"
1361 "desc: LL cache: %s\n",
1362 I1.desc_line, D1.desc_line, LL.desc_line);
1363 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1364
1365 // "cmd:" line
1366 VG_(strcpy)(buf, "cmd:");
1367 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1368 if (VG_(args_the_exename)) {
1369 VG_(write)(fd, " ", 1);
1370 VG_(write)(fd, VG_(args_the_exename),
1371 VG_(strlen)( VG_(args_the_exename) ));
1372 }
1373 for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1374 HChar* arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
1375 if (arg) {
1376 VG_(write)(fd, " ", 1);
1377 VG_(write)(fd, arg, VG_(strlen)( arg ));
1378 }
1379 }
1380 // "events:" line
1381 if (clo_cache_sim && clo_branch_sim) {
1382 VG_(sprintf)(buf, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1383 "Bc Bcm Bi Bim\n");
1384 }
1385 else if (clo_cache_sim && !clo_branch_sim) {
1386 VG_(sprintf)(buf, "\nevents: Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw "
1387 "\n");
1388 }
1389 else if (!clo_cache_sim && clo_branch_sim) {
1390 VG_(sprintf)(buf, "\nevents: Ir "
1391 "Bc Bcm Bi Bim\n");
1392 }
1393 else {
1394 VG_(sprintf)(buf, "\nevents: Ir\n");
1395 }
1396
1397 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1398
1399 // Traverse every lineCC
1400 VG_(OSetGen_ResetIter)(CC_table);
1401 while ( (lineCC = VG_(OSetGen_Next)(CC_table)) ) {
1402 Bool just_hit_a_new_file = False;
1403 // If we've hit a new file, print a "fl=" line. Note that because
1404 // each string is stored exactly once in the string table, we can use
1405 // pointer comparison rather than strcmp() to test for equality, which
1406 // is good because most of the time the comparisons are equal and so
1407 // the whole strings would have to be checked.
1408 if ( lineCC->loc.file != currFile ) {
1409 currFile = lineCC->loc.file;
1410 VG_(sprintf)(buf, "fl=%s\n", currFile);
1411 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1412 distinct_files++;
1413 just_hit_a_new_file = True;
1414 }
1415 // If we've hit a new function, print a "fn=" line. We know to do
1416 // this when the function name changes, and also every time we hit a
1417 // new file (in which case the new function name might be the same as
1418 // in the old file, hence the just_hit_a_new_file test).
1419 if ( just_hit_a_new_file || lineCC->loc.fn != currFn ) {
1420 currFn = lineCC->loc.fn;
1421 VG_(sprintf)(buf, "fn=%s\n", currFn);
1422 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1423 distinct_fns++;
1424 }
1425
1426 // Print the LineCC
1427 if (clo_cache_sim && clo_branch_sim) {
1428 VG_(sprintf)(buf, "%u %llu %llu %llu"
1429 " %llu %llu %llu"
1430 " %llu %llu %llu"
1431 " %llu %llu %llu %llu\n",
1432 lineCC->loc.line,
1433 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1434 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1435 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL,
1436 lineCC->Bc.b, lineCC->Bc.mp,
1437 lineCC->Bi.b, lineCC->Bi.mp);
1438 }
1439 else if (clo_cache_sim && !clo_branch_sim) {
1440 VG_(sprintf)(buf, "%u %llu %llu %llu"
1441 " %llu %llu %llu"
1442 " %llu %llu %llu\n",
1443 lineCC->loc.line,
1444 lineCC->Ir.a, lineCC->Ir.m1, lineCC->Ir.mL,
1445 lineCC->Dr.a, lineCC->Dr.m1, lineCC->Dr.mL,
1446 lineCC->Dw.a, lineCC->Dw.m1, lineCC->Dw.mL);
1447 }
1448 else if (!clo_cache_sim && clo_branch_sim) {
1449 VG_(sprintf)(buf, "%u %llu"
1450 " %llu %llu %llu %llu\n",
1451 lineCC->loc.line,
1452 lineCC->Ir.a,
1453 lineCC->Bc.b, lineCC->Bc.mp,
1454 lineCC->Bi.b, lineCC->Bi.mp);
1455 }
1456 else {
1457 VG_(sprintf)(buf, "%u %llu\n",
1458 lineCC->loc.line,
1459 lineCC->Ir.a);
1460 }
1461
1462 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1463
1464 // Update summary stats
1465 Ir_total.a += lineCC->Ir.a;
1466 Ir_total.m1 += lineCC->Ir.m1;
1467 Ir_total.mL += lineCC->Ir.mL;
1468 Dr_total.a += lineCC->Dr.a;
1469 Dr_total.m1 += lineCC->Dr.m1;
1470 Dr_total.mL += lineCC->Dr.mL;
1471 Dw_total.a += lineCC->Dw.a;
1472 Dw_total.m1 += lineCC->Dw.m1;
1473 Dw_total.mL += lineCC->Dw.mL;
1474 Bc_total.b += lineCC->Bc.b;
1475 Bc_total.mp += lineCC->Bc.mp;
1476 Bi_total.b += lineCC->Bi.b;
1477 Bi_total.mp += lineCC->Bi.mp;
1478
1479 distinct_lines++;
1480 }
1481
1482 // Summary stats must come after rest of table, since we calculate them
1483 // during traversal. */
1484 if (clo_cache_sim && clo_branch_sim) {
1485 VG_(sprintf)(buf, "summary:"
1486 " %llu %llu %llu"
1487 " %llu %llu %llu"
1488 " %llu %llu %llu"
1489 " %llu %llu %llu %llu\n",
1490 Ir_total.a, Ir_total.m1, Ir_total.mL,
1491 Dr_total.a, Dr_total.m1, Dr_total.mL,
1492 Dw_total.a, Dw_total.m1, Dw_total.mL,
1493 Bc_total.b, Bc_total.mp,
1494 Bi_total.b, Bi_total.mp);
1495 }
1496 else if (clo_cache_sim && !clo_branch_sim) {
1497 VG_(sprintf)(buf, "summary:"
1498 " %llu %llu %llu"
1499 " %llu %llu %llu"
1500 " %llu %llu %llu\n",
1501 Ir_total.a, Ir_total.m1, Ir_total.mL,
1502 Dr_total.a, Dr_total.m1, Dr_total.mL,
1503 Dw_total.a, Dw_total.m1, Dw_total.mL);
1504 }
1505 else if (!clo_cache_sim && clo_branch_sim) {
1506 VG_(sprintf)(buf, "summary:"
1507 " %llu"
1508 " %llu %llu %llu %llu\n",
1509 Ir_total.a,
1510 Bc_total.b, Bc_total.mp,
1511 Bi_total.b, Bi_total.mp);
1512 }
1513 else {
1514 VG_(sprintf)(buf, "summary:"
1515 " %llu\n",
1516 Ir_total.a);
1517 }
1518
1519 VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
1520 VG_(close)(fd);
1521 }
1522
ULong_width(ULong n)1523 static UInt ULong_width(ULong n)
1524 {
1525 UInt w = 0;
1526 while (n > 0) {
1527 n = n / 10;
1528 w++;
1529 }
1530 if (w == 0) w = 1;
1531 return w + (w-1)/3; // add space for commas
1532 }
1533
cg_fini(Int exitcode)1534 static void cg_fini(Int exitcode)
1535 {
1536 static Char buf1[128], buf2[128], buf3[128], buf4[123], fmt[128];
1537
1538 CacheCC D_total;
1539 BranchCC B_total;
1540 ULong LL_total_m, LL_total_mr, LL_total_mw,
1541 LL_total, LL_total_r, LL_total_w;
1542 Int l1, l2, l3;
1543
1544 fprint_CC_table_and_calc_totals();
1545
1546 if (VG_(clo_verbosity) == 0)
1547 return;
1548
1549 // Nb: this isn't called "MAX" because that overshadows a global on Darwin.
1550 #define CG_MAX(a, b) ((a) >= (b) ? (a) : (b))
1551
1552 /* I cache results. Use the I_refs value to determine the first column
1553 * width. */
1554 l1 = ULong_width(Ir_total.a);
1555 l2 = ULong_width(CG_MAX(Dr_total.a, Bc_total.b));
1556 l3 = ULong_width(CG_MAX(Dw_total.a, Bi_total.b));
1557
1558 /* Make format string, getting width right for numbers */
1559 VG_(sprintf)(fmt, "%%s %%,%dllu\n", l1);
1560
1561 /* Always print this */
1562 VG_(umsg)(fmt, "I refs: ", Ir_total.a);
1563
1564 /* If cache profiling is enabled, show D access numbers and all
1565 miss numbers */
1566 if (clo_cache_sim) {
1567 VG_(umsg)(fmt, "I1 misses: ", Ir_total.m1);
1568 VG_(umsg)(fmt, "LLi misses: ", Ir_total.mL);
1569
1570 if (0 == Ir_total.a) Ir_total.a = 1;
1571 VG_(percentify)(Ir_total.m1, Ir_total.a, 2, l1+1, buf1);
1572 VG_(umsg)("I1 miss rate: %s\n", buf1);
1573
1574 VG_(percentify)(Ir_total.mL, Ir_total.a, 2, l1+1, buf1);
1575 VG_(umsg)("LLi miss rate: %s\n", buf1);
1576 VG_(umsg)("\n");
1577
1578 /* D cache results. Use the D_refs.rd and D_refs.wr values to
1579 * determine the width of columns 2 & 3. */
1580 D_total.a = Dr_total.a + Dw_total.a;
1581 D_total.m1 = Dr_total.m1 + Dw_total.m1;
1582 D_total.mL = Dr_total.mL + Dw_total.mL;
1583
1584 /* Make format string, getting width right for numbers */
1585 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu rd + %%,%dllu wr)\n",
1586 l1, l2, l3);
1587
1588 VG_(umsg)(fmt, "D refs: ",
1589 D_total.a, Dr_total.a, Dw_total.a);
1590 VG_(umsg)(fmt, "D1 misses: ",
1591 D_total.m1, Dr_total.m1, Dw_total.m1);
1592 VG_(umsg)(fmt, "LLd misses: ",
1593 D_total.mL, Dr_total.mL, Dw_total.mL);
1594
1595 if (0 == D_total.a) D_total.a = 1;
1596 if (0 == Dr_total.a) Dr_total.a = 1;
1597 if (0 == Dw_total.a) Dw_total.a = 1;
1598 VG_(percentify)( D_total.m1, D_total.a, 1, l1+1, buf1);
1599 VG_(percentify)(Dr_total.m1, Dr_total.a, 1, l2+1, buf2);
1600 VG_(percentify)(Dw_total.m1, Dw_total.a, 1, l3+1, buf3);
1601 VG_(umsg)("D1 miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
1602
1603 VG_(percentify)( D_total.mL, D_total.a, 1, l1+1, buf1);
1604 VG_(percentify)(Dr_total.mL, Dr_total.a, 1, l2+1, buf2);
1605 VG_(percentify)(Dw_total.mL, Dw_total.a, 1, l3+1, buf3);
1606 VG_(umsg)("LLd miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
1607 VG_(umsg)("\n");
1608
1609 /* LL overall results */
1610
1611 LL_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1;
1612 LL_total_r = Dr_total.m1 + Ir_total.m1;
1613 LL_total_w = Dw_total.m1;
1614 VG_(umsg)(fmt, "LL refs: ",
1615 LL_total, LL_total_r, LL_total_w);
1616
1617 LL_total_m = Dr_total.mL + Dw_total.mL + Ir_total.mL;
1618 LL_total_mr = Dr_total.mL + Ir_total.mL;
1619 LL_total_mw = Dw_total.mL;
1620 VG_(umsg)(fmt, "LL misses: ",
1621 LL_total_m, LL_total_mr, LL_total_mw);
1622
1623 VG_(percentify)(LL_total_m, (Ir_total.a + D_total.a), 1, l1+1, buf1);
1624 VG_(percentify)(LL_total_mr, (Ir_total.a + Dr_total.a), 1, l2+1, buf2);
1625 VG_(percentify)(LL_total_mw, Dw_total.a, 1, l3+1, buf3);
1626 VG_(umsg)("LL miss rate: %s (%s + %s )\n", buf1, buf2,buf3);
1627 }
1628
1629 /* If branch profiling is enabled, show branch overall results. */
1630 if (clo_branch_sim) {
1631 /* Make format string, getting width right for numbers */
1632 VG_(sprintf)(fmt, "%%s %%,%dllu (%%,%dllu cond + %%,%dllu ind)\n",
1633 l1, l2, l3);
1634
1635 if (0 == Bc_total.b) Bc_total.b = 1;
1636 if (0 == Bi_total.b) Bi_total.b = 1;
1637 B_total.b = Bc_total.b + Bi_total.b;
1638 B_total.mp = Bc_total.mp + Bi_total.mp;
1639
1640 VG_(umsg)("\n");
1641 VG_(umsg)(fmt, "Branches: ",
1642 B_total.b, Bc_total.b, Bi_total.b);
1643
1644 VG_(umsg)(fmt, "Mispredicts: ",
1645 B_total.mp, Bc_total.mp, Bi_total.mp);
1646
1647 VG_(percentify)(B_total.mp, B_total.b, 1, l1+1, buf1);
1648 VG_(percentify)(Bc_total.mp, Bc_total.b, 1, l2+1, buf2);
1649 VG_(percentify)(Bi_total.mp, Bi_total.b, 1, l3+1, buf3);
1650
1651 VG_(umsg)("Mispred rate: %s (%s + %s )\n", buf1, buf2,buf3);
1652 }
1653
1654 // Various stats
1655 if (VG_(clo_stats)) {
1656 Int debug_lookups = full_debugs + fn_debugs +
1657 file_line_debugs + no_debugs;
1658
1659 VG_(dmsg)("\n");
1660 VG_(dmsg)("cachegrind: distinct files: %d\n", distinct_files);
1661 VG_(dmsg)("cachegrind: distinct fns: %d\n", distinct_fns);
1662 VG_(dmsg)("cachegrind: distinct lines: %d\n", distinct_lines);
1663 VG_(dmsg)("cachegrind: distinct instrs:%d\n", distinct_instrs);
1664 VG_(dmsg)("cachegrind: debug lookups : %d\n", debug_lookups);
1665
1666 VG_(percentify)(full_debugs, debug_lookups, 1, 6, buf1);
1667 VG_(percentify)(file_line_debugs, debug_lookups, 1, 6, buf2);
1668 VG_(percentify)(fn_debugs, debug_lookups, 1, 6, buf3);
1669 VG_(percentify)(no_debugs, debug_lookups, 1, 6, buf4);
1670 VG_(dmsg)("cachegrind: with full info:%s (%d)\n",
1671 buf1, full_debugs);
1672 VG_(dmsg)("cachegrind: with file/line info:%s (%d)\n",
1673 buf2, file_line_debugs);
1674 VG_(dmsg)("cachegrind: with fn name info:%s (%d)\n",
1675 buf3, fn_debugs);
1676 VG_(dmsg)("cachegrind: with zero info:%s (%d)\n",
1677 buf4, no_debugs);
1678
1679 VG_(dmsg)("cachegrind: string table size: %lu\n",
1680 VG_(OSetGen_Size)(stringTable));
1681 VG_(dmsg)("cachegrind: CC table size: %lu\n",
1682 VG_(OSetGen_Size)(CC_table));
1683 VG_(dmsg)("cachegrind: InstrInfo table size: %lu\n",
1684 VG_(OSetGen_Size)(instrInfoTable));
1685 }
1686 }
1687
1688 /*--------------------------------------------------------------------*/
1689 /*--- Discarding BB info ---*/
1690 /*--------------------------------------------------------------------*/
1691
1692 // Called when a translation is removed from the translation cache for
1693 // any reason at all: to free up space, because the guest code was
1694 // unmapped or modified, or for any arbitrary reason.
1695 static
cg_discard_superblock_info(Addr64 orig_addr64,VexGuestExtents vge)1696 void cg_discard_superblock_info ( Addr64 orig_addr64, VexGuestExtents vge )
1697 {
1698 SB_info* sbInfo;
1699 Addr orig_addr = (Addr)vge.base[0];
1700
1701 tl_assert(vge.n_used > 0);
1702
1703 if (DEBUG_CG)
1704 VG_(printf)( "discard_basic_block_info: %p, %p, %llu\n",
1705 (void*)(Addr)orig_addr,
1706 (void*)(Addr)vge.base[0], (ULong)vge.len[0]);
1707
1708 // Get BB info, remove from table, free BB info. Simple! Note that we
1709 // use orig_addr, not the first instruction address in vge.
1710 sbInfo = VG_(OSetGen_Remove)(instrInfoTable, &orig_addr);
1711 tl_assert(NULL != sbInfo);
1712 VG_(OSetGen_FreeNode)(instrInfoTable, sbInfo);
1713 }
1714
1715 /*--------------------------------------------------------------------*/
1716 /*--- Command line processing ---*/
1717 /*--------------------------------------------------------------------*/
1718
parse_cache_opt(cache_t * cache,Char * opt,Char * optval)1719 static void parse_cache_opt ( cache_t* cache, Char* opt, Char* optval )
1720 {
1721 Long i1, i2, i3;
1722 Char* endptr;
1723 Char* checkRes;
1724
1725 // Option argument looks like "65536,2,64". Extract them.
1726 i1 = VG_(strtoll10)(optval, &endptr); if (*endptr != ',') goto bad;
1727 i2 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != ',') goto bad;
1728 i3 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != '\0') goto bad;
1729
1730 // Check for overflow.
1731 cache->size = (Int)i1;
1732 cache->assoc = (Int)i2;
1733 cache->line_size = (Int)i3;
1734 if (cache->size != i1) goto overflow;
1735 if (cache->assoc != i2) goto overflow;
1736 if (cache->line_size != i3) goto overflow;
1737
1738 checkRes = check_cache(cache);
1739 if (checkRes) {
1740 VG_(fmsg)("%s", checkRes);
1741 goto bad;
1742 }
1743
1744 return;
1745
1746 bad:
1747 VG_(fmsg_bad_option)(opt, "");
1748
1749 overflow:
1750 VG_(fmsg_bad_option)(opt,
1751 "One of the cache parameters was too large and overflowed.\n");
1752 }
1753
cg_process_cmd_line_option(Char * arg)1754 static Bool cg_process_cmd_line_option(Char* arg)
1755 {
1756 Char* tmp_str;
1757
1758 // 5 is length of "--I1="
1759 if VG_STR_CLO(arg, "--I1", tmp_str)
1760 parse_cache_opt(&clo_I1_cache, arg, tmp_str);
1761 else if VG_STR_CLO(arg, "--D1", tmp_str)
1762 parse_cache_opt(&clo_D1_cache, arg, tmp_str);
1763 else if (VG_STR_CLO(arg, "--L2", tmp_str) || // for backwards compatibility
1764 VG_STR_CLO(arg, "--LL", tmp_str))
1765 parse_cache_opt(&clo_LL_cache, arg, tmp_str);
1766
1767 else if VG_STR_CLO( arg, "--cachegrind-out-file", clo_cachegrind_out_file) {}
1768 else if VG_BOOL_CLO(arg, "--cache-sim", clo_cache_sim) {}
1769 else if VG_BOOL_CLO(arg, "--branch-sim", clo_branch_sim) {}
1770 else
1771 return False;
1772
1773 return True;
1774 }
1775
cg_print_usage(void)1776 static void cg_print_usage(void)
1777 {
1778 VG_(printf)(
1779 " --I1=<size>,<assoc>,<line_size> set I1 cache manually\n"
1780 " --D1=<size>,<assoc>,<line_size> set D1 cache manually\n"
1781 " --LL=<size>,<assoc>,<line_size> set LL cache manually\n"
1782 " --cache-sim=yes|no [yes] collect cache stats?\n"
1783 " --branch-sim=yes|no [no] collect branch prediction stats?\n"
1784 " --cachegrind-out-file=<file> output file name [cachegrind.out.%%p]\n"
1785 );
1786 }
1787
cg_print_debug_usage(void)1788 static void cg_print_debug_usage(void)
1789 {
1790 VG_(printf)(
1791 " (none)\n"
1792 );
1793 }
1794
1795 /*--------------------------------------------------------------------*/
1796 /*--- Setup ---*/
1797 /*--------------------------------------------------------------------*/
1798
1799 static void cg_post_clo_init(void); /* just below */
1800
cg_pre_clo_init(void)1801 static void cg_pre_clo_init(void)
1802 {
1803 VG_(details_name) ("Cachegrind");
1804 VG_(details_version) (NULL);
1805 VG_(details_description) ("a cache and branch-prediction profiler");
1806 VG_(details_copyright_author)(
1807 "Copyright (C) 2002-2010, and GNU GPL'd, by Nicholas Nethercote et al.");
1808 VG_(details_bug_reports_to) (VG_BUGS_TO);
1809 VG_(details_avg_translation_sizeB) ( 500 );
1810
1811 VG_(basic_tool_funcs) (cg_post_clo_init,
1812 cg_instrument,
1813 cg_fini);
1814
1815 VG_(needs_superblock_discards)(cg_discard_superblock_info);
1816 VG_(needs_command_line_options)(cg_process_cmd_line_option,
1817 cg_print_usage,
1818 cg_print_debug_usage);
1819 }
1820
cg_post_clo_init(void)1821 static void cg_post_clo_init(void)
1822 {
1823 cache_t I1c, D1c, LLc;
1824
1825 CC_table =
1826 VG_(OSetGen_Create)(offsetof(LineCC, loc),
1827 cmp_CodeLoc_LineCC,
1828 VG_(malloc), "cg.main.cpci.1",
1829 VG_(free));
1830 instrInfoTable =
1831 VG_(OSetGen_Create)(/*keyOff*/0,
1832 NULL,
1833 VG_(malloc), "cg.main.cpci.2",
1834 VG_(free));
1835 stringTable =
1836 VG_(OSetGen_Create)(/*keyOff*/0,
1837 stringCmp,
1838 VG_(malloc), "cg.main.cpci.3",
1839 VG_(free));
1840
1841 configure_caches(&I1c, &D1c, &LLc);
1842
1843 cachesim_I1_initcache(I1c);
1844 cachesim_D1_initcache(D1c);
1845 cachesim_LL_initcache(LLc);
1846 }
1847
1848 VG_DETERMINE_INTERFACE_VERSION(cg_pre_clo_init)
1849
1850 /*--------------------------------------------------------------------*/
1851 /*--- end ---*/
1852 /*--------------------------------------------------------------------*/
1853
1854