1
2 /*--------------------------------------------------------------------*/
3 /*--- Cache simulation cg_sim.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Cachegrind, a Valgrind tool for cache
8 profiling programs.
9
10 Copyright (C) 2002-2012 Nicholas Nethercote
11 njn@valgrind.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 /* Notes:
32 - simulates a write-allocate cache
33 - (block --> set) hash function uses simple bit selection
34 - handling of references straddling two cache blocks:
35 - counts as only one cache access (not two)
36 - both blocks hit --> one hit
37 - one block hits, the other misses --> one miss
38 - both blocks miss --> one miss (not two)
39 */
40
41 typedef struct {
42 Int size; /* bytes */
43 Int assoc;
44 Int line_size; /* bytes */
45 Int sets;
46 Int sets_min_1;
47 Int line_size_bits;
48 Int tag_shift;
49 Char desc_line[128];
50 UWord* tags;
51 } cache_t2;
52
53 /* By this point, the size/assoc/line_size has been checked. */
cachesim_initcache(cache_t config,cache_t2 * c)54 static void cachesim_initcache(cache_t config, cache_t2* c)
55 {
56 Int i;
57
58 c->size = config.size;
59 c->assoc = config.assoc;
60 c->line_size = config.line_size;
61
62 c->sets = (c->size / c->line_size) / c->assoc;
63 c->sets_min_1 = c->sets - 1;
64 c->line_size_bits = VG_(log2)(c->line_size);
65 c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);
66
67 if (c->assoc == 1) {
68 VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",
69 c->size, c->line_size);
70 } else {
71 VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
72 c->size, c->line_size, c->assoc);
73 }
74
75 c->tags = VG_(malloc)("cg.sim.ci.1",
76 sizeof(UWord) * c->sets * c->assoc);
77
78 for (i = 0; i < c->sets * c->assoc; i++)
79 c->tags[i] = 0;
80 }
81
82 /* This is done as a macro rather than by passing in the cache_t2 as an
83 * arg because it slows things down by a small amount (3-5%) due to all
84 * that extra indirection. */
85
86 #define CACHESIM(L, MISS_TREATMENT) \
87 /* The cache and associated bits and pieces. */ \
88 static cache_t2 L; \
89 \
90 static void cachesim_##L##_initcache(cache_t config) \
91 { \
92 cachesim_initcache(config, &L); \
93 } \
94 \
95 /* This attribute forces GCC to inline this function, even though it's */ \
96 /* bigger than its usual limit. Inlining gains around 5--10% speedup. */ \
97 __attribute__((always_inline)) \
98 static __inline__ \
99 void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *mL) \
100 { \
101 UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \
102 UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \
103 UWord tag = a >> L.tag_shift; \
104 UWord tag2; \
105 Int i, j; \
106 Bool is_miss = False; \
107 UWord* set; \
108 \
109 /* First case: word entirely within line. */ \
110 if (set1 == set2) { \
111 \
112 set = &(L.tags[set1 * L.assoc]); \
113 \
114 /* This loop is unrolled for just the first case, which is the most */\
115 /* common. We can't unroll any further because it would screw up */\
116 /* if we have a direct-mapped (1-way) cache. */\
117 if (tag == set[0]) { \
118 return; \
119 } \
120 /* If the tag is one other than the MRU, move it into the MRU spot */\
121 /* and shuffle the rest down. */\
122 for (i = 1; i < L.assoc; i++) { \
123 if (tag == set[i]) { \
124 for (j = i; j > 0; j--) { \
125 set[j] = set[j - 1]; \
126 } \
127 set[0] = tag; \
128 return; \
129 } \
130 } \
131 \
132 /* A miss; install this tag as MRU, shuffle rest down. */ \
133 for (j = L.assoc - 1; j > 0; j--) { \
134 set[j] = set[j - 1]; \
135 } \
136 set[0] = tag; \
137 MISS_TREATMENT; \
138 return; \
139 \
140 /* Second case: word straddles two lines. */ \
141 /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \
142 } else if (((set1 + 1) & (L.sets_min_1)) == set2) { \
143 set = &(L.tags[set1 * L.assoc]); \
144 if (tag == set[0]) { \
145 goto block2; \
146 } \
147 for (i = 1; i < L.assoc; i++) { \
148 if (tag == set[i]) { \
149 for (j = i; j > 0; j--) { \
150 set[j] = set[j - 1]; \
151 } \
152 set[0] = tag; \
153 goto block2; \
154 } \
155 } \
156 for (j = L.assoc - 1; j > 0; j--) { \
157 set[j] = set[j - 1]; \
158 } \
159 set[0] = tag; \
160 is_miss = True; \
161 block2: \
162 set = &(L.tags[set2 * L.assoc]); \
163 tag2 = (a+size-1) >> L.tag_shift; \
164 if (tag2 == set[0]) { \
165 goto miss_treatment; \
166 } \
167 for (i = 1; i < L.assoc; i++) { \
168 if (tag2 == set[i]) { \
169 for (j = i; j > 0; j--) { \
170 set[j] = set[j - 1]; \
171 } \
172 set[0] = tag2; \
173 goto miss_treatment; \
174 } \
175 } \
176 for (j = L.assoc - 1; j > 0; j--) { \
177 set[j] = set[j - 1]; \
178 } \
179 set[0] = tag2; \
180 is_miss = True; \
181 miss_treatment: \
182 if (is_miss) { MISS_TREATMENT; } \
183 \
184 } else { \
185 VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2);\
186 VG_(tool_panic)("item straddles more than two cache sets"); \
187 } \
188 return; \
189 }
190
191 CACHESIM(LL, (*mL)++ );
192 CACHESIM(I1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } );
193 CACHESIM(D1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } );
194
195 /*--------------------------------------------------------------------*/
196 /*--- end cg_sim.c ---*/
197 /*--------------------------------------------------------------------*/
198
199