• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*--------------------------------------------------------------------*/
2 /*--- Cachegrind: cache configuration.                   cg-arch.c ---*/
3 /*--------------------------------------------------------------------*/
4 
5 /*
6    This file is part of Cachegrind, a Valgrind tool for cache
7    profiling programs.
8 
9    Copyright (C) 2011-2013 Nicholas Nethercote
10       njn@valgrind.org
11 
12    This program is free software; you can redistribute it and/or
13    modify it under the terms of the GNU General Public License as
14    published by the Free Software Foundation; either version 2 of the
15    License, or (at your option) any later version.
16 
17    This program is distributed in the hope that it will be useful, but
18    WITHOUT ANY WARRANTY; without even the implied warranty of
19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20    General Public License for more details.
21 
22    You should have received a copy of the GNU General Public License
23    along with this program; if not, write to the Free Software
24    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25    02111-1307, USA.
26 
27    The GNU General Public License is contained in the file COPYING.
28 */
29 
30 #include "pub_tool_basics.h"
31 #include "pub_tool_libcassert.h"
32 #include "pub_tool_libcbase.h"
33 #include "pub_tool_libcprint.h"
34 #include "pub_tool_options.h"
35 #include "pub_tool_machine.h"
36 
37 #include "cg_arch.h"
38 
39 static void configure_caches(cache_t* I1c, cache_t* D1c, cache_t* LLc,
40                              Bool all_caches_clo_defined);
41 
42 // Checks cache config is ok.  Returns NULL if ok, or a pointer to an error
43 // string otherwise.
check_cache(cache_t * cache)44 static const HChar* check_cache(cache_t* cache)
45 {
46    // Simulator requires set count to be a power of two.
47    if ((cache->size % (cache->line_size * cache->assoc) != 0) ||
48        (-1 == VG_(log2)(cache->size/cache->line_size/cache->assoc)))
49    {
50       return "Cache set count is not a power of two.\n";
51    }
52 
53    // Simulator requires line size to be a power of two.
54    if (-1 == VG_(log2)(cache->line_size)) {
55       return "Cache line size is not a power of two.\n";
56    }
57 
58    // Then check line size >= 16 -- any smaller and a single instruction could
59    // straddle three cache lines, which breaks a simulation assertion and is
60    // stupid anyway.
61    if (cache->line_size < MIN_LINE_SIZE) {
62       return "Cache line size is too small.\n";
63    }
64 
65    /* Then check cache size > line size (causes seg faults if not). */
66    if (cache->size <= cache->line_size) {
67       return "Cache size <= line size.\n";
68    }
69 
70    /* Then check assoc <= (size / line size) (seg faults otherwise). */
71    if (cache->assoc > (cache->size / cache->line_size)) {
72       return "Cache associativity > (size / line size).\n";
73    }
74 
75    return NULL;
76 }
77 
78 
parse_cache_opt(cache_t * cache,const HChar * opt,const HChar * optval)79 static void parse_cache_opt ( cache_t* cache, const HChar* opt,
80                               const HChar* optval )
81 {
82    Long i1, i2, i3;
83    HChar* endptr;
84    const HChar* checkRes;
85 
86    // Option argument looks like "65536,2,64".  Extract them.
87    i1 = VG_(strtoll10)(optval,   &endptr); if (*endptr != ',')  goto bad;
88    i2 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != ',')  goto bad;
89    i3 = VG_(strtoll10)(endptr+1, &endptr); if (*endptr != '\0') goto bad;
90 
91    // Check for overflow.
92    cache->size      = (Int)i1;
93    cache->assoc     = (Int)i2;
94    cache->line_size = (Int)i3;
95    if (cache->size      != i1) goto overflow;
96    if (cache->assoc     != i2) goto overflow;
97    if (cache->line_size != i3) goto overflow;
98 
99    checkRes = check_cache(cache);
100    if (checkRes) {
101       VG_(fmsg)("%s", checkRes);
102       goto bad;
103    }
104 
105    return;
106 
107   bad:
108    VG_(fmsg_bad_option)(opt, "");
109 
110   overflow:
111    VG_(fmsg_bad_option)(opt,
112       "One of the cache parameters was too large and overflowed.\n");
113 }
114 
115 
VG_(str_clo_cache_opt)116 Bool VG_(str_clo_cache_opt)(const HChar *arg,
117                             cache_t* clo_I1c,
118                             cache_t* clo_D1c,
119                             cache_t* clo_LLc)
120 {
121    const HChar* tmp_str;
122 
123    if      VG_STR_CLO(arg, "--I1", tmp_str) {
124       parse_cache_opt(clo_I1c, arg, tmp_str);
125       return True;
126    } else if VG_STR_CLO(arg, "--D1", tmp_str) {
127       parse_cache_opt(clo_D1c, arg, tmp_str);
128       return True;
129    } else if (VG_STR_CLO(arg, "--L2", tmp_str) || // for backwards compatibility
130               VG_STR_CLO(arg, "--LL", tmp_str)) {
131       parse_cache_opt(clo_LLc, arg, tmp_str);
132       return True;
133    } else
134       return False;
135 }
136 
umsg_cache_img(const HChar * desc,cache_t * c)137 static void umsg_cache_img(const HChar* desc, cache_t* c)
138 {
139    VG_(umsg)("  %s: %'d B, %d-way, %d B lines\n", desc,
140              c->size, c->assoc, c->line_size);
141 }
142 
143 // Verifies if c is a valid cache.
144 // An invalid value causes an assert, unless clo_redefined is True.
check_cache_or_override(const HChar * desc,cache_t * c,Bool clo_redefined)145 static void check_cache_or_override(const HChar* desc, cache_t* c, Bool clo_redefined)
146 {
147    const HChar* checkRes;
148 
149    checkRes = check_cache(c);
150    if (checkRes) {
151       VG_(umsg)("Auto-detected %s cache configuration not supported: %s",
152                 desc, checkRes);
153       umsg_cache_img(desc, c);
154       if (!clo_redefined) {
155          VG_(umsg)("As it probably should be supported, please report a bug!\n");
156          VG_(umsg)("Bypass this message by using option --%s=...\n", desc);
157          tl_assert(0);
158       }
159    }
160 }
161 
162 
163 /* If the LL cache config isn't something the simulation functions
164    can handle, try to adjust it so it is.  Caches are characterised
165    by (total size T, line size L, associativity A), and then we
166    have
167 
168      number of sets S = T / (L * A)
169 
170    The required constraints are:
171 
172    * L must be a power of 2, but it always is in practice, so
173      no problem there
174 
175    * A can be any value >= 1
176 
177    * T can be any value, but ..
178 
179    * S must be a power of 2.
180 
181    That sometimes gives a problem.  For example, some Core iX based
182    Intel CPUs have T = 12MB, A = 16, L = 64, which gives 12288
183    sets.  The "fix" in this case is to increase the associativity
184    by 50% to 24, which reduces the number of sets to 8192, making
185    it a power of 2.  That's what the following code does (handing
186    the "3/2 rescaling case".)  We might need to deal with other
187    ratios later (5/4 ?).
188 
189    The "fix" is "justified" (cough, cough) by alleging that
190    increases of associativity above about 4 have very little effect
191    on the actual miss rate.  It would be far more inaccurate to
192    fudge this by changing the size of the simulated cache --
193    changing the associativity is a much better option.
194 */
195 
196 static void
maybe_tweak_LLc(cache_t * LLc)197 maybe_tweak_LLc(cache_t *LLc)
198 {
199   if (LLc->size > 0 && LLc->assoc > 0 && LLc->line_size > 0) {
200       Long nSets = (Long)LLc->size / (Long)(LLc->line_size * LLc->assoc);
201       if (/* stay sane */
202           nSets >= 4
203           /* nSets is not a power of 2 */
204           && VG_(log2_64)( (ULong)nSets ) == -1
205           /* nSets is 50% above a power of 2 */
206           && VG_(log2_64)( (ULong)((2 * nSets) / (Long)3) ) != -1
207           /* associativity can be increased by exactly 50% */
208           && (LLc->assoc % 2) == 0
209          ) {
210          /* # sets is 1.5 * a power of two, but the associativity is
211             even, so we can increase that up by 50% and implicitly
212             scale the # sets down accordingly. */
213          Int new_assoc = LLc->assoc + (LLc->assoc / 2);
214          VG_(dmsg)("warning: pretending that LL cache has associativity"
215                    " %d instead of actual %d\n", new_assoc, LLc->assoc);
216          LLc->assoc = new_assoc;
217       }
218    }
219 }
220 
VG_(post_clo_init_configure_caches)221 void VG_(post_clo_init_configure_caches)(cache_t* I1c,
222                                          cache_t* D1c,
223                                          cache_t* LLc,
224                                          cache_t* clo_I1c,
225                                          cache_t* clo_D1c,
226                                          cache_t* clo_LLc)
227 {
228 #define DEFINED(L)   (-1 != L->size  || -1 != L->assoc || -1 != L->line_size)
229 
230    // Count how many were defined on the command line.
231    Bool all_caches_clo_defined =
232       (DEFINED(clo_I1c) &&
233        DEFINED(clo_D1c) &&
234        DEFINED(clo_LLc));
235 
236    // Set the cache config (using auto-detection, if supported by the
237    // architecture).
238    configure_caches( I1c, D1c, LLc, all_caches_clo_defined );
239 
240    maybe_tweak_LLc( LLc );
241 
242    // Check the default/auto-detected values.
243    // Allow the user to override invalid auto-detected caches
244    // with command line.
245    check_cache_or_override ("I1", I1c, DEFINED(clo_I1c));
246    check_cache_or_override ("D1", D1c, DEFINED(clo_D1c));
247    check_cache_or_override ("LL", LLc, DEFINED(clo_LLc));
248 
249    // Then replace with any defined on the command line.  (Already checked in
250    // VG(parse_clo_cache_opt)().)
251    if (DEFINED(clo_I1c)) { *I1c = *clo_I1c; }
252    if (DEFINED(clo_D1c)) { *D1c = *clo_D1c; }
253    if (DEFINED(clo_LLc)) { *LLc = *clo_LLc; }
254 
255    if (VG_(clo_verbosity) >= 2) {
256       VG_(umsg)("Cache configuration used:\n");
257       umsg_cache_img ("I1", I1c);
258       umsg_cache_img ("D1", D1c);
259       umsg_cache_img ("LL", LLc);
260    }
261 #undef DEFINED
262 }
263 
VG_(print_cache_clo_opts)264 void VG_(print_cache_clo_opts)()
265 {
266    VG_(printf)(
267 "    --I1=<size>,<assoc>,<line_size>  set I1 cache manually\n"
268 "    --D1=<size>,<assoc>,<line_size>  set D1 cache manually\n"
269 "    --LL=<size>,<assoc>,<line_size>  set LL cache manually\n"
270                );
271 }
272 
273 
274 // Traverse the cache info and return a cache of the given kind and level.
275 // Return NULL if no such cache exists.
276 static const VexCache *
locate_cache(const VexCacheInfo * ci,VexCacheKind kind,UInt level)277 locate_cache(const VexCacheInfo *ci, VexCacheKind kind, UInt level)
278 {
279    const VexCache *c;
280 
281    for (c = ci->caches; c != ci->caches + ci->num_caches; ++c) {
282       if (c->level == level && c->kind == kind) {
283          return c;
284       }
285    }
286    return NULL;  // not found
287 }
288 
289 
290 // Gives the auto-detected configuration of I1, D1 and LL caches.  They get
291 // overridden by any cache configurations specified on the command line.
292 static void
configure_caches(cache_t * I1c,cache_t * D1c,cache_t * LLc,Bool all_caches_clo_defined)293 configure_caches(cache_t *I1c, cache_t *D1c, cache_t *LLc,
294                  Bool all_caches_clo_defined)
295 {
296    VexArchInfo vai;
297    const VexCacheInfo *ci;
298    const VexCache *i1, *d1, *ll;
299 
300    VG_(machine_get_VexArchInfo)(NULL, &vai);
301    ci = &vai.hwcache_info;
302 
303    // Extract what we need
304    i1 = locate_cache(ci, INSN_CACHE, 1);
305    d1 = locate_cache(ci, DATA_CACHE, 1);
306    ll = locate_cache(ci, UNIFIED_CACHE, ci->num_levels);
307 
308    if (ci->num_caches > 0 && ll == NULL) {
309       VG_(dmsg)("warning: L2 cache not installed, ignore LL results.\n");
310    }
311 
312    if (ll && ci->num_levels > 2) {
313       VG_(dmsg)("warning: L%u cache found, using its data for the "
314                 "LL simulation.\n", ci->num_levels);
315    }
316 
317    if (i1 && d1 && ll) {
318       if (i1->is_trace_cache) {
319          /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
320           * conversion to byte size is a total guess;  treat the 12K and 16K
321           * cases the same since the cache byte size must be a power of two for
322           * everything to work!.  Also guessing 32 bytes for the line size...
323           */
324          UInt adjusted_size, guessed_line_size = 32;
325 
326          if (i1->sizeB == 12 * 1024 || i1->sizeB == 16 * 1024) {
327             adjusted_size = 16 * 1024;
328          } else {
329             adjusted_size = 32 * 1024;
330          }
331          VG_(dmsg)("warning: Pentium 4 with %u KB micro-op instruction trace cache\n",
332                    i1->sizeB / 1024);
333          VG_(dmsg)("         Simulating a %d KB I-cache with %d B lines\n",
334                    adjusted_size / 1024, guessed_line_size);
335 
336          *I1c = (cache_t) { adjusted_size, i1->assoc, guessed_line_size };
337       } else {
338          *I1c = (cache_t) { i1->sizeB, i1->assoc, i1->line_sizeB };
339       }
340       *D1c = (cache_t) { d1->sizeB, d1->assoc, d1->line_sizeB };
341       *LLc = (cache_t) { ll->sizeB, ll->assoc, ll->line_sizeB };
342 
343       return;
344    }
345 
346    // Cache information could not be queried; choose some default
347    // architecture specific default setting.
348 
349 #if defined(VGA_ppc32)
350 
351    // Default cache configuration
352    *I1c = (cache_t) {  65536, 2, 64 };
353    *D1c = (cache_t) {  65536, 2, 64 };
354    *LLc = (cache_t) { 262144, 8, 64 };
355 
356 #elif defined(VGA_ppc64)
357 
358    // Default cache configuration
359    *I1c = (cache_t) {  65536, 2, 64 };
360    *D1c = (cache_t) {  65536, 2, 64 };
361    *LLc = (cache_t) { 262144, 8, 64 };
362 
363 #elif defined(VGA_arm)
364 
365    // Set caches to default (for Cortex-A8 ?)
366    *I1c = (cache_t) {  16384, 4, 64 };
367    *D1c = (cache_t) {  16384, 4, 64 };
368    *LLc = (cache_t) { 262144, 8, 64 };
369 
370 #elif defined(VGA_arm64)
371 
372    // Copy the 32-bit ARM version until such time as we have
373    // some real hardware to run on
374    *I1c = (cache_t) {  16384, 4, 64 };
375    *D1c = (cache_t) {  16384, 4, 64 };
376    *LLc = (cache_t) { 262144, 8, 64 };
377 
378 #elif defined(VGA_s390x)
379    //
380    // Here is the cache data from older machine models:
381    //
382    //           I1            D1      I/D L2
383    // z900  256k/256/4    256k/256/4   16MB
384    // z800  256k/256/4    256k/256/4    8MB
385    // z990  256k/256/4    256k/256/4   32MB
386    // z890  256k/256/4    256k/256/4   32MB
387    // z9    256k/256/4    256k/256/4   40MB
388    //
389    // Sources:
390    // (1) IBM System z9 109 Technical Introduction
391    //     www.redbooks.ibm.com/redbooks/pdfs/sg246669.pdf
392    // (2) The microarchitecture of the IBM eServer z900 processor
393    //     IBM Journal of Research and Development
394    //     Volume 46, Number 4/5, pp 381-395, July/September 2002
395    // (3) The IBM eServer z990 microprocessor
396    //     IBM Journal of Research and Development
397    //     Volume 48, Number 3/4, pp 295-309, May/July 2004
398    // (4) Charles Webb, IBM
399    //
400    // L2 data is unfortunately incomplete. Otherwise, we could support
401    // machines without the ECAG insn by looking at VEX_S390X_MODEL(hwcaps).
402 
403    // Default cache configuration is z10-EC  (Source: ECAG insn)
404    *I1c = (cache_t) {    65536,  4, 256 };
405    *D1c = (cache_t) {   131072,  8, 256 };
406    *LLc = (cache_t) { 50331648, 24, 256 };
407 
408 #elif defined(VGA_mips32)
409 
410    // Set caches to default (for MIPS32-r2(mips 74kc))
411    *I1c = (cache_t) {  32768, 4, 32 };
412    *D1c = (cache_t) {  32768, 4, 32 };
413    *LLc = (cache_t) { 524288, 8, 32 };
414 
415 #elif defined(VGA_mips64)
416 
417    // Set caches to default (for MIPS64 - 5kc)
418    *I1c = (cache_t) {  32768, 4, 32 };
419    *D1c = (cache_t) {  32768, 4, 32 };
420    *LLc = (cache_t) { 524288, 8, 32 };
421 
422 #elif defined(VGA_x86) || defined(VGA_amd64)
423 
424    *I1c = (cache_t) {  65536, 2, 64 };
425    *D1c = (cache_t) {  65536, 2, 64 };
426    *LLc = (cache_t) { 262144, 8, 64 };
427 
428 #else
429 
430 #error "Unknown arch"
431 
432 #endif
433 
434    if (!all_caches_clo_defined) {
435       const HChar warning[] =
436         "Warning: Cannot auto-detect cache config, using defaults.\n"
437         "         Run with -v to see.\n";
438       VG_(dmsg)("%s", warning);
439    }
440 }
441 
442 /*--------------------------------------------------------------------*/
443 /*--- end                                                          ---*/
444 /*--------------------------------------------------------------------*/
445