• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Cache simulation                                    cg_sim.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Cachegrind, a Valgrind tool for cache
8    profiling programs.
9 
10    Copyright (C) 2002-2012 Nicholas Nethercote
11       njn@valgrind.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 /* Notes:
32   - simulates a write-allocate cache
33   - (block --> set) hash function uses simple bit selection
34   - handling of references straddling two cache blocks:
35       - counts as only one cache access (not two)
36       - both blocks hit                  --> one hit
37       - one block hits, the other misses --> one miss
38       - both blocks miss                 --> one miss (not two)
39 */
40 
41 typedef struct {
42    Int          size;                   /* bytes */
43    Int          assoc;
44    Int          line_size;              /* bytes */
45    Int          sets;
46    Int          sets_min_1;
47    Int          line_size_bits;
48    Int          tag_shift;
49    Char         desc_line[128];
50    UWord*       tags;
51 } cache_t2;
52 
53 /* By this point, the size/assoc/line_size has been checked. */
cachesim_initcache(cache_t config,cache_t2 * c)54 static void cachesim_initcache(cache_t config, cache_t2* c)
55 {
56    Int i;
57 
58    c->size      = config.size;
59    c->assoc     = config.assoc;
60    c->line_size = config.line_size;
61 
62    c->sets           = (c->size / c->line_size) / c->assoc;
63    c->sets_min_1     = c->sets - 1;
64    c->line_size_bits = VG_(log2)(c->line_size);
65    c->tag_shift      = c->line_size_bits + VG_(log2)(c->sets);
66 
67    if (c->assoc == 1) {
68       VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",
69                                  c->size, c->line_size);
70    } else {
71       VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
72                                  c->size, c->line_size, c->assoc);
73    }
74 
75    c->tags = VG_(malloc)("cg.sim.ci.1",
76                          sizeof(UWord) * c->sets * c->assoc);
77 
78    for (i = 0; i < c->sets * c->assoc; i++)
79       c->tags[i] = 0;
80 }
81 
82 /* This is done as a macro rather than by passing in the cache_t2 as an
83  * arg because it slows things down by a small amount (3-5%) due to all
84  * that extra indirection. */
85 
86 #define CACHESIM(L, MISS_TREATMENT)                                         \
87 /* The cache and associated bits and pieces. */                             \
88 static cache_t2 L;                                                          \
89                                                                             \
90 static void cachesim_##L##_initcache(cache_t config)                        \
91 {                                                                           \
92     cachesim_initcache(config, &L);                                         \
93 }                                                                           \
94                                                                             \
95 /* This attribute forces GCC to inline this function, even though it's */   \
96 /* bigger than its usual limit.  Inlining gains around 5--10% speedup. */   \
97 __attribute__((always_inline))                                              \
98 static __inline__                                                           \
99 void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *mL)         \
100 {                                                                           \
101    UInt  set1 = ( a         >> L.line_size_bits) & (L.sets_min_1);          \
102    UInt  set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1);          \
103    UWord tag  = a >> L.tag_shift;                                           \
104    UWord tag2;                                                              \
105    Int i, j;                                                                \
106    Bool is_miss = False;                                                    \
107    UWord* set;                                                              \
108                                                                             \
109    /* First case: word entirely within line. */                             \
110    if (set1 == set2) {                                                      \
111                                                                             \
112       set = &(L.tags[set1 * L.assoc]);                                      \
113                                                                             \
114       /* This loop is unrolled for just the first case, which is the most */\
115       /* common.  We can't unroll any further because it would screw up   */\
116       /* if we have a direct-mapped (1-way) cache.                        */\
117       if (tag == set[0]) {                                                  \
118          return;                                                            \
119       }                                                                     \
120       /* If the tag is one other than the MRU, move it into the MRU spot  */\
121       /* and shuffle the rest down.                                       */\
122       for (i = 1; i < L.assoc; i++) {                                       \
123          if (tag == set[i]) {                                               \
124             for (j = i; j > 0; j--) {                                       \
125                set[j] = set[j - 1];                                         \
126             }                                                               \
127             set[0] = tag;                                                   \
128             return;                                                         \
129          }                                                                  \
130       }                                                                     \
131                                                                             \
132       /* A miss;  install this tag as MRU, shuffle rest down. */            \
133       for (j = L.assoc - 1; j > 0; j--) {                                   \
134          set[j] = set[j - 1];                                               \
135       }                                                                     \
136       set[0] = tag;                                                         \
137       MISS_TREATMENT;                                                       \
138       return;                                                               \
139                                                                             \
140    /* Second case: word straddles two lines. */                             \
141    /* Nb: this is a fast way of doing ((set1+1) % L.sets) */                \
142    } else if (((set1 + 1) & (L.sets_min_1)) == set2) {                      \
143       set = &(L.tags[set1 * L.assoc]);                                      \
144       if (tag == set[0]) {                                                  \
145          goto block2;                                                       \
146       }                                                                     \
147       for (i = 1; i < L.assoc; i++) {                                       \
148          if (tag == set[i]) {                                               \
149             for (j = i; j > 0; j--) {                                       \
150                set[j] = set[j - 1];                                         \
151             }                                                               \
152             set[0] = tag;                                                   \
153             goto block2;                                                    \
154          }                                                                  \
155       }                                                                     \
156       for (j = L.assoc - 1; j > 0; j--) {                                   \
157          set[j] = set[j - 1];                                               \
158       }                                                                     \
159       set[0] = tag;                                                         \
160       is_miss = True;                                                       \
161 block2:                                                                     \
162       set = &(L.tags[set2 * L.assoc]);                                      \
163       tag2 = (a+size-1) >> L.tag_shift;                                     \
164       if (tag2 == set[0]) {                                                 \
165          goto miss_treatment;                                               \
166       }                                                                     \
167       for (i = 1; i < L.assoc; i++) {                                       \
168          if (tag2 == set[i]) {                                              \
169             for (j = i; j > 0; j--) {                                       \
170                set[j] = set[j - 1];                                         \
171             }                                                               \
172             set[0] = tag2;                                                  \
173             goto miss_treatment;                                            \
174          }                                                                  \
175       }                                                                     \
176       for (j = L.assoc - 1; j > 0; j--) {                                   \
177          set[j] = set[j - 1];                                               \
178       }                                                                     \
179       set[0] = tag2;                                                        \
180       is_miss = True;                                                       \
181 miss_treatment:                                                             \
182       if (is_miss) { MISS_TREATMENT; }                                      \
183                                                                             \
184    } else {                                                                 \
185        VG_(printf)("addr: %lx  size: %u  sets: %d %d", a, size, set1, set2);\
186        VG_(tool_panic)("item straddles more than two cache sets");          \
187    }                                                                        \
188    return;                                                                  \
189 }
190 
191 CACHESIM(LL, (*mL)++ );
192 CACHESIM(I1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } );
193 CACHESIM(D1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } );
194 
195 /*--------------------------------------------------------------------*/
196 /*--- end                                                 cg_sim.c ---*/
197 /*--------------------------------------------------------------------*/
198 
199