1
2 /*--------------------------------------------------------------------*/
3 /*--- Management of the translation table and cache. ---*/
4 /*--- m_transtab.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2000-2011 Julian Seward
12 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30 */
31
32 #include "pub_core_basics.h"
33 #include "pub_core_debuglog.h"
34 #include "pub_core_machine.h" // For VG(machine_get_VexArchInfo)
35 #include "pub_core_libcbase.h"
36 #include "pub_core_libcassert.h"
37 #include "pub_core_libcprint.h"
38 #include "pub_core_options.h"
39 #include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB
40 #include "pub_core_transtab.h"
41 #include "pub_core_aspacemgr.h"
42 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
43
44 // JRS FIXME get rid of this somehow
45 #if defined(VGP_arm_linux)
46 # include "pub_core_vkiscnums.h" // __ARM_NR_cacheflush
47 # include "pub_core_syscall.h" // VG_(do_syscallN)
48 #endif
49
50
51 /* #define DEBUG_TRANSTAB */
52
53
54 /*-------------------------------------------------------------*/
55 /*--- Management of the FIFO-based translation table+cache. ---*/
56 /*-------------------------------------------------------------*/
57
58 /*------------------ CONSTANTS ------------------*/
59
60 /* Number of sectors the TC is divided into. If you need a larger
61 overall translation cache, increase this value. */
62 #define N_SECTORS 8
63
64 /* Number of TC entries in each sector. This needs to be a prime
65 number to work properly, it must be <= 65535 (so that a TT index
66 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
67 'deleted') and it is strongly recommended not to change this.
68 65521 is the largest prime <= 65535. */
69 #define N_TTES_PER_SECTOR /*30011*/ /*40009*/ 65521
70
71 /* Because each sector contains a hash table of TTEntries, we need to
72 specify the maximum allowable loading, after which the sector is
73 deemed full. */
74 #define SECTOR_TT_LIMIT_PERCENT 65
75
76 /* The sector is deemed full when this many entries are in it. */
77 #define N_TTES_PER_SECTOR_USABLE \
78 ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
79
80 /* Equivalence classes for fast address range deletion. There are 1 +
81 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an
82 address range which does not fall cleanly within any specific bin.
83 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
84 #define ECLASS_SHIFT 11
85 #define ECLASS_WIDTH 8
86 #define ECLASS_MISC (1 << ECLASS_WIDTH)
87 #define ECLASS_N (1 + ECLASS_MISC)
88
89 #define EC2TTE_DELETED 0xFFFF /* 16-bit special value */
90
91
92 /*------------------ TYPES ------------------*/
93
94 /* A translation-table entry. This indicates precisely which areas of
95 guest code are included in the translation, and contains all other
96 auxiliary info too. */
97 typedef
98 struct {
99 /* Profiling only: the count and weight (arbitrary meaning) for
100 this translation. Weight is a property of the translation
101 itself and computed once when the translation is created.
102 Count is an entry count for the translation and is
103 incremented by 1 every time the translation is used, if we
104 are profiling. */
105 UInt count;
106 UShort weight;
107
108 /* Status of the slot. Note, we need to be able to do lazy
109 deletion, hence the Deleted state. */
110 enum { InUse, Deleted, Empty } status;
111
112 /* 64-bit aligned pointer to one or more 64-bit words containing
113 the corresponding host code (must be in the same sector!)
114 This is a pointer into the sector's tc (code) area. */
115 ULong* tcptr;
116
117 /* This is the original guest address that purportedly is the
118 entry point of the translation. You might think that .entry
119 should be the same as .vge->base[0], and most of the time it
120 is. However, when doing redirections, that is not the case.
121 .vge must always correctly describe the guest code sections
122 from which this translation was made. However, .entry may or
123 may not be a lie, depending on whether or not we're doing
124 redirection. */
125 Addr64 entry;
126
127 /* This structure describes precisely what ranges of guest code
128 the translation covers, so we can decide whether or not to
129 delete it when translations of a given address range are
130 invalidated. */
131 VexGuestExtents vge;
132
133 /* Address range summary info: these are pointers back to
134 eclass[] entries in the containing Sector. Those entries in
135 turn point back here -- the two structures are mutually
136 redundant but both necessary to make fast deletions work.
137 The eclass info is similar to, and derived from, this entry's
138 'vge' field, but it is not the same */
139 UShort n_tte2ec; // # tte2ec pointers (1 to 3)
140 UShort tte2ec_ec[3]; // for each, the eclass #
141 UInt tte2ec_ix[3]; // and the index within the eclass.
142 // for i in 0 .. n_tte2ec-1
143 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
144 // should be the index
145 // of this TTEntry in the containing Sector's tt array.
146 }
147 TTEntry;
148
149
150 /* Finally, a sector itself. Each sector contains an array of
151 TCEntries, which hold code, and an array of TTEntries, containing
152 all required administrative info. Profiling is supported using the
153 TTEntry .count and .weight fields, if required. Each sector is
154 independent in that no cross-sector references are allowed.
155
156 If the sector is not in use, all three pointers are NULL and
157 tt_n_inuse is zero.
158 */
159 typedef
160 struct {
161 /* The TCEntry area. Size of this depends on the average
162 translation size. We try and size it so it becomes full
163 precisely when this sector's translation table (tt) reaches
164 its load limit (SECTOR_TT_LIMIT_PERCENT). */
165 ULong* tc;
166
167 /* The TTEntry array. This is a fixed size, always containing
168 exactly N_TTES_PER_SECTOR entries. */
169 TTEntry* tt;
170
171 /* This points to the current allocation point in tc. */
172 ULong* tc_next;
173
174 /* The count of tt entries with state InUse. */
175 Int tt_n_inuse;
176
177 /* Expandable arrays of tt indices for each of the ECLASS_N
178 address range equivalence classes. These hold indices into
179 the containing sector's tt array, which in turn should point
180 back here. */
181 Int ec2tte_size[ECLASS_N];
182 Int ec2tte_used[ECLASS_N];
183 UShort* ec2tte[ECLASS_N];
184 }
185 Sector;
186
187
188 /*------------------ DECLS ------------------*/
189
190 /* The root data structure is an array of sectors. The index of the
191 youngest sector is recorded, and new translations are put into that
192 sector. When it fills up, we move along to the next sector and
193 start to fill that up, wrapping around at the end of the array.
194 That way, once all N_TC_SECTORS have been bought into use for the
195 first time, and are full, we then re-use the oldest sector,
196 endlessly.
197
198 When running, youngest sector should be between >= 0 and <
199 N_TC_SECTORS. The initial -1 value indicates the TT/TC system is
200 not yet initialised.
201 */
202 static Sector sectors[N_SECTORS];
203 static Int youngest_sector = -1;
204
205 /* The number of ULongs in each TCEntry area. This is computed once
206 at startup and does not change. */
207 static Int tc_sector_szQ;
208
209
210 /* A list of sector numbers, in the order which they should be
211 searched to find translations. This is an optimisation to be used
212 when searching for translations and should not affect
213 correctness. -1 denotes "no entry". */
214 static Int sector_search_order[N_SECTORS];
215
216
217 /* Fast helper for the TC. A direct-mapped cache which holds a set of
218 recently used (guest address, host address) pairs. This array is
219 referred to directly from m_dispatch/dispatch-<platform>.S.
220
221 Entries in tt_fast may refer to any valid TC entry, regardless of
222 which sector it's in. Consequently we must be very careful to
223 invalidate this cache when TC entries are changed or disappear.
224
225 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
226 pointed at to cause that cache entry to miss. This relies on the
227 assumption that no guest code actually has that address, hence a
228 value 0x1 seems good. m_translate gives the client a synthetic
229 segfault if it tries to execute at this address.
230 */
231 /*
232 typedef
233 struct {
234 Addr guest;
235 Addr host;
236 }
237 FastCacheEntry;
238 */
239 /*global*/ __attribute__((aligned(16)))
240 FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
241 /*
242 #define TRANSTAB_BOGUS_GUEST_ADDR ((Addr)1)
243 */
244
245 /* For profiling, we have a parallel array of pointers to .count
246 fields in TT entries. Again, these pointers must be invalidated
247 when translations disappear. A NULL pointer suffices to indicate
248 an unused slot.
249
250 When not profiling (the normal case, VG_(clo_profile_flags) == 0),
251 all tt_fastN entries are set to NULL at startup and never read nor
252 written after that.
253
254 When profiling (VG_(clo_profile_flags) > 0), tt_fast and tt_fastN
255 change together: if tt_fast[i].guest is TRANSTAB_BOGUS_GUEST_ADDR
256 then the corresponding tt_fastN[i] must be null. If
257 tt_fast[i].guest is any other value, then tt_fastN[i] *must* point
258 to the .count field of the corresponding TT entry.
259
260 tt_fast and tt_fastN are referred to from assembly code
261 (dispatch.S).
262 */
263 /*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE];
264
265
266 /* Make sure we're not used before initialisation. */
267 static Bool init_done = False;
268
269
270 /*------------------ STATS DECLS ------------------*/
271
272 /* Number of fast-cache updates and flushes done. */
273 ULong n_fast_flushes = 0;
274 ULong n_fast_updates = 0;
275
276 /* Number of full lookups done. */
277 ULong n_full_lookups = 0;
278 ULong n_lookup_probes = 0;
279
280 /* Number/osize/tsize of translations entered; also the number of
281 those for which self-checking was requested. */
282 ULong n_in_count = 0;
283 ULong n_in_osize = 0;
284 ULong n_in_tsize = 0;
285 ULong n_in_sc_count = 0;
286
287 /* Number/osize of translations discarded due to lack of space. */
288 ULong n_dump_count = 0;
289 ULong n_dump_osize = 0;
290
291 /* Number/osize of translations discarded due to requests to do so. */
292 ULong n_disc_count = 0;
293 ULong n_disc_osize = 0;
294
295
296 /*-------------------------------------------------------------*/
297 /*--- Address-range equivalence class stuff ---*/
298 /*-------------------------------------------------------------*/
299
300 /* Return equivalence class number for a range. */
301
range_to_eclass(Addr64 start,UInt len)302 static Int range_to_eclass ( Addr64 start, UInt len )
303 {
304 UInt mask = (1 << ECLASS_WIDTH) - 1;
305 UInt lo = (UInt)start;
306 UInt hi = lo + len - 1;
307 UInt loBits = (lo >> ECLASS_SHIFT) & mask;
308 UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
309 if (loBits == hiBits) {
310 vg_assert(loBits < ECLASS_N-1);
311 return loBits;
312 } else {
313 return ECLASS_MISC;
314 }
315 }
316
317
318 /* Calculates the equivalence class numbers for any VexGuestExtent.
319 These are written in *eclasses, which must be big enough to hold 3
320 Ints. The number written, between 1 and 3, is returned. The
321 eclasses are presented in order, and any duplicates are removed.
322 */
323
324 static
vexGuestExtents_to_eclasses(Int * eclasses,VexGuestExtents * vge)325 Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
326 VexGuestExtents* vge )
327 {
328 # define SWAP(_lv1,_lv2) \
329 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
330
331 Int i, j, n_ec, r;
332
333 vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
334
335 n_ec = 0;
336 for (i = 0; i < vge->n_used; i++) {
337 r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
338 if (r == ECLASS_MISC)
339 goto bad;
340 /* only add if we haven't already seen it */
341 for (j = 0; j < n_ec; j++)
342 if (eclasses[j] == r)
343 break;
344 if (j == n_ec)
345 eclasses[n_ec++] = r;
346 }
347
348 if (n_ec == 1)
349 return 1;
350
351 if (n_ec == 2) {
352 /* sort */
353 if (eclasses[0] > eclasses[1])
354 SWAP(eclasses[0], eclasses[1]);
355 return 2;
356 }
357
358 if (n_ec == 3) {
359 /* sort */
360 if (eclasses[0] > eclasses[2])
361 SWAP(eclasses[0], eclasses[2]);
362 if (eclasses[0] > eclasses[1])
363 SWAP(eclasses[0], eclasses[1]);
364 if (eclasses[1] > eclasses[2])
365 SWAP(eclasses[1], eclasses[2]);
366 return 3;
367 }
368
369 /* NOTREACHED */
370 vg_assert(0);
371
372 bad:
373 eclasses[0] = ECLASS_MISC;
374 return 1;
375
376 # undef SWAP
377 }
378
379
380 /* Add tteno to the set of entries listed for equivalence class ec in
381 this sector. Returns used location in eclass array. */
382
383 static
addEClassNo(Sector * sec,Int ec,UShort tteno)384 UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
385 {
386 Int old_sz, new_sz, i, r;
387 UShort *old_ar, *new_ar;
388
389 vg_assert(ec >= 0 && ec < ECLASS_N);
390 vg_assert(tteno < N_TTES_PER_SECTOR);
391
392 if (0) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno);
393
394 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
395
396 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
397
398 old_sz = sec->ec2tte_size[ec];
399 old_ar = sec->ec2tte[ec];
400 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
401 new_ar = VG_(arena_malloc)(VG_AR_TTAUX, "transtab.aECN.1",
402 new_sz * sizeof(UShort));
403 for (i = 0; i < old_sz; i++)
404 new_ar[i] = old_ar[i];
405 if (old_ar)
406 VG_(arena_free)(VG_AR_TTAUX, old_ar);
407 sec->ec2tte_size[ec] = new_sz;
408 sec->ec2tte[ec] = new_ar;
409
410 if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
411 }
412
413 /* Common case */
414 r = sec->ec2tte_used[ec]++;
415 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
416 sec->ec2tte[ec][r] = tteno;
417 return (UInt)r;
418 }
419
420
421 /* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate
422 eclass entries to 'sec'. */
423
424 static
upd_eclasses_after_add(Sector * sec,Int tteno)425 void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
426 {
427 Int i, r, eclasses[3];
428 TTEntry* tte;
429 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
430
431 tte = &sec->tt[tteno];
432 r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
433
434 vg_assert(r >= 1 && r <= 3);
435 tte->n_tte2ec = r;
436
437 for (i = 0; i < r; i++) {
438 tte->tte2ec_ec[i] = eclasses[i];
439 tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
440 }
441 }
442
443
444 /* Check the eclass info in 'sec' to ensure it is consistent. Returns
445 True if OK, False if something's not right. Expensive. */
446
sanity_check_eclasses_in_sector(Sector * sec)447 static Bool sanity_check_eclasses_in_sector ( Sector* sec )
448 {
449 # define BAD(_str) do { whassup = (_str); goto bad; } while (0)
450
451 HChar* whassup = NULL;
452 Int i, j, k, n, ec_num, ec_idx;
453 TTEntry* tte;
454 UShort tteno;
455 ULong* tce;
456
457 /* Basic checks on this sector */
458 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
459 BAD("invalid sec->tt_n_inuse");
460 tce = sec->tc_next;
461 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
462 BAD("sec->tc_next points outside tc");
463
464 /* For each eclass ... */
465 for (i = 0; i < ECLASS_N; i++) {
466 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
467 BAD("ec2tte_size/ec2tte mismatch(1)");
468 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
469 BAD("ec2tte_size/ec2tte mismatch(2)");
470 if (sec->ec2tte_used[i] < 0
471 || sec->ec2tte_used[i] > sec->ec2tte_size[i])
472 BAD("implausible ec2tte_used");
473 if (sec->ec2tte_used[i] == 0)
474 continue;
475
476 /* For each tt reference in each eclass .. ensure the reference
477 is to a valid tt entry, and that the entry's address ranges
478 really include this eclass. */
479
480 for (j = 0; j < sec->ec2tte_used[i]; j++) {
481 tteno = sec->ec2tte[i][j];
482 if (tteno == EC2TTE_DELETED)
483 continue;
484 if (tteno >= N_TTES_PER_SECTOR)
485 BAD("implausible tteno");
486 tte = &sec->tt[tteno];
487 if (tte->status != InUse)
488 BAD("tteno points to non-inuse tte");
489 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
490 BAD("tte->n_tte2ec out of range");
491 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
492 must equal i. Inspect tte's eclass info. */
493 n = 0;
494 for (k = 0; k < tte->n_tte2ec; k++) {
495 if (k < tte->n_tte2ec-1
496 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
497 BAD("tte->tte2ec_ec[..] out of order");
498 ec_num = tte->tte2ec_ec[k];
499 if (ec_num < 0 || ec_num >= ECLASS_N)
500 BAD("tte->tte2ec_ec[..] out of range");
501 if (ec_num != i)
502 continue;
503 ec_idx = tte->tte2ec_ix[k];
504 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
505 BAD("tte->tte2ec_ix[..] out of range");
506 if (ec_idx == j)
507 n++;
508 }
509 if (n != 1)
510 BAD("tteno does not point back at eclass");
511 }
512 }
513
514 /* That establishes that for each forward pointer from TTEntrys
515 there is a corresponding backward pointer from the eclass[]
516 arrays. However, it doesn't rule out the possibility of other,
517 bogus pointers in the eclass[] arrays. So do those similarly:
518 scan through them and check the TTEntryies they point at point
519 back. */
520
521 for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
522
523 tte = &sec->tt[i];
524 if (tte->status == Empty || tte->status == Deleted) {
525 if (tte->n_tte2ec != 0)
526 BAD("tte->n_eclasses nonzero for unused tte");
527 continue;
528 }
529
530 vg_assert(tte->status == InUse);
531
532 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
533 BAD("tte->n_eclasses out of range(2)");
534
535 for (j = 0; j < tte->n_tte2ec; j++) {
536 ec_num = tte->tte2ec_ec[j];
537 if (ec_num < 0 || ec_num >= ECLASS_N)
538 BAD("tte->eclass[..] out of range");
539 ec_idx = tte->tte2ec_ix[j];
540 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
541 BAD("tte->ec_idx[..] out of range(2)");
542 if (sec->ec2tte[ec_num][ec_idx] != i)
543 BAD("ec2tte does not point back to tte");
544 }
545 }
546
547 return True;
548
549 bad:
550 if (whassup)
551 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
552
553 # if 0
554 VG_(printf)("eclass = %d\n", i);
555 VG_(printf)("tteno = %d\n", (Int)tteno);
556 switch (tte->status) {
557 case InUse: VG_(printf)("InUse\n"); break;
558 case Deleted: VG_(printf)("Deleted\n"); break;
559 case Empty: VG_(printf)("Empty\n"); break;
560 }
561 if (tte->status != Empty) {
562 for (k = 0; k < tte->vge.n_used; k++)
563 VG_(printf)("0x%llx %d\n", tte->vge.base[k],
564 (Int)tte->vge.len[k]);
565 }
566 # endif
567
568 return False;
569
570 # undef BAD
571 }
572
573
574 /* Sanity check absolutely everything. True == check passed. */
575
576 /* forwards */
577 static Bool sanity_check_redir_tt_tc ( void );
578 static Bool sanity_check_fastcache ( void );
579
sanity_check_sector_search_order(void)580 static Bool sanity_check_sector_search_order ( void )
581 {
582 Int i, j, nListed;
583 /* assert the array is the right size */
584 vg_assert(N_SECTORS == (sizeof(sector_search_order)
585 / sizeof(sector_search_order[0])));
586 /* Check it's of the form valid_sector_numbers ++ [-1, -1, ..] */
587 for (i = 0; i < N_SECTORS; i++) {
588 if (sector_search_order[i] < 0 || sector_search_order[i] >= N_SECTORS)
589 break;
590 }
591 nListed = i;
592 for (/* */; i < N_SECTORS; i++) {
593 if (sector_search_order[i] != -1)
594 break;
595 }
596 if (i != N_SECTORS)
597 return False;
598 /* Check each sector number only appears once */
599 for (i = 0; i < N_SECTORS; i++) {
600 if (sector_search_order[i] == -1)
601 continue;
602 for (j = i+1; j < N_SECTORS; j++) {
603 if (sector_search_order[j] == sector_search_order[i])
604 return False;
605 }
606 }
607 /* Check that the number of listed sectors equals the number
608 in use, by counting nListed back down. */
609 for (i = 0; i < N_SECTORS; i++) {
610 if (sectors[i].tc != NULL)
611 nListed--;
612 }
613 if (nListed != 0)
614 return False;
615 return True;
616 }
617
sanity_check_all_sectors(void)618 static Bool sanity_check_all_sectors ( void )
619 {
620 Int sno;
621 Bool sane;
622 Sector* sec;
623 for (sno = 0; sno < N_SECTORS; sno++) {
624 sec = §ors[sno];
625 if (sec->tc == NULL)
626 continue;
627 sane = sanity_check_eclasses_in_sector( sec );
628 if (!sane)
629 return False;
630 }
631 if ( !sanity_check_redir_tt_tc() )
632 return False;
633 if ( !sanity_check_fastcache() )
634 return False;
635 if ( !sanity_check_sector_search_order() )
636 return False;
637 return True;
638 }
639
640
641
642 /*-------------------------------------------------------------*/
643 /*--- Add/find translations ---*/
644 /*-------------------------------------------------------------*/
645
vge_osize(VexGuestExtents * vge)646 static UInt vge_osize ( VexGuestExtents* vge )
647 {
648 UInt i, n = 0;
649 for (i = 0; i < vge->n_used; i++)
650 n += (UInt)vge->len[i];
651 return n;
652 }
653
isValidSector(Int sector)654 static Bool isValidSector ( Int sector )
655 {
656 if (sector < 0 || sector >= N_SECTORS)
657 return False;
658 return True;
659 }
660
HASH_TT(Addr64 key)661 static inline UInt HASH_TT ( Addr64 key )
662 {
663 UInt kHi = (UInt)(key >> 32);
664 UInt kLo = (UInt)key;
665 UInt k32 = kHi ^ kLo;
666 UInt ror = 7;
667 if (ror > 0)
668 k32 = (k32 >> ror) | (k32 << (32-ror));
669 return k32 % N_TTES_PER_SECTOR;
670 }
671
setFastCacheEntry(Addr64 key,ULong * tcptr,UInt * count)672 static void setFastCacheEntry ( Addr64 key, ULong* tcptr, UInt* count )
673 {
674 UInt cno = (UInt)VG_TT_FAST_HASH(key);
675 VG_(tt_fast)[cno].guest = (Addr)key;
676 VG_(tt_fast)[cno].host = (Addr)tcptr;
677 if (VG_(clo_profile_flags) > 0)
678 VG_(tt_fastN)[cno] = count;
679 n_fast_updates++;
680 /* This shouldn't fail. It should be assured by m_translate
681 which should reject any attempt to make translation of code
682 starting at TRANSTAB_BOGUS_GUEST_ADDR. */
683 vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
684 }
685
686 /* Invalidate the fast cache's counter array, VG_(tt_fastN). */
invalidateFastNCache(void)687 static void invalidateFastNCache ( void )
688 {
689 UInt j;
690 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
691 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
692 VG_(tt_fastN)[j+0] = NULL;
693 VG_(tt_fastN)[j+1] = NULL;
694 VG_(tt_fastN)[j+2] = NULL;
695 VG_(tt_fastN)[j+3] = NULL;
696 }
697 vg_assert(j == VG_TT_FAST_SIZE);
698 }
699
700 /* Invalidate the fast cache VG_(tt_fast). If profiling, also
701 invalidate the fast cache's counter array VG_(tt_fastN), otherwise
702 don't touch it. */
invalidateFastCache(void)703 static void invalidateFastCache ( void )
704 {
705 UInt j;
706 /* This loop is popular enough to make it worth unrolling a
707 bit, at least on ppc32. */
708 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
709 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
710 VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
711 VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
712 VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
713 VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
714 }
715
716 if (VG_(clo_profile_flags) > 0)
717 invalidateFastNCache();
718
719 vg_assert(j == VG_TT_FAST_SIZE);
720 n_fast_flushes++;
721 }
722
sanity_check_fastcache(void)723 static Bool sanity_check_fastcache ( void )
724 {
725 UInt j;
726 if (0) VG_(printf)("sanity check fastcache\n");
727 if (VG_(clo_profile_flags) > 0) {
728 /* profiling */
729 for (j = 0; j < VG_TT_FAST_SIZE; j++) {
730 if (VG_(tt_fastN)[j] == NULL
731 && VG_(tt_fast)[j].guest != TRANSTAB_BOGUS_GUEST_ADDR)
732 return False;
733 if (VG_(tt_fastN)[j] != NULL
734 && VG_(tt_fast)[j].guest == TRANSTAB_BOGUS_GUEST_ADDR)
735 return False;
736 }
737 } else {
738 /* not profiling */
739 for (j = 0; j < VG_TT_FAST_SIZE; j++) {
740 if (VG_(tt_fastN)[j] != NULL)
741 return False;
742 }
743 }
744 return True;
745 }
746
initialiseSector(Int sno)747 static void initialiseSector ( Int sno )
748 {
749 Int i;
750 SysRes sres;
751 Sector* sec;
752 vg_assert(isValidSector(sno));
753
754 { Bool sane = sanity_check_sector_search_order();
755 vg_assert(sane);
756 }
757 sec = §ors[sno];
758
759 if (sec->tc == NULL) {
760
761 /* Sector has never been used before. Need to allocate tt and
762 tc. */
763 vg_assert(sec->tt == NULL);
764 vg_assert(sec->tc_next == NULL);
765 vg_assert(sec->tt_n_inuse == 0);
766 for (i = 0; i < ECLASS_N; i++) {
767 vg_assert(sec->ec2tte_size[i] == 0);
768 vg_assert(sec->ec2tte_used[i] == 0);
769 vg_assert(sec->ec2tte[i] == NULL);
770 }
771
772 VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
773
774 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
775 if (sr_isError(sres)) {
776 VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
777 8 * tc_sector_szQ );
778 /*NOTREACHED*/
779 }
780 sec->tc = (ULong*)(AddrH)sr_Res(sres);
781
782 sres = VG_(am_mmap_anon_float_valgrind)
783 ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
784 if (sr_isError(sres)) {
785 VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
786 N_TTES_PER_SECTOR * sizeof(TTEntry) );
787 /*NOTREACHED*/
788 }
789 sec->tt = (TTEntry*)(AddrH)sr_Res(sres);
790
791 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
792 sec->tt[i].status = Empty;
793 sec->tt[i].n_tte2ec = 0;
794 }
795
796 /* Add an entry in the sector_search_order */
797 for (i = 0; i < N_SECTORS; i++) {
798 if (sector_search_order[i] == -1)
799 break;
800 }
801 vg_assert(i >= 0 && i < N_SECTORS);
802 sector_search_order[i] = sno;
803
804 if (VG_(clo_verbosity) > 2)
805 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
806
807 } else {
808
809 /* Sector has been used before. Dump the old contents. */
810 VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
811 vg_assert(sec->tt != NULL);
812 vg_assert(sec->tc_next != NULL);
813 n_dump_count += sec->tt_n_inuse;
814
815 /* Visit each just-about-to-be-abandoned translation. */
816 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
817 if (sec->tt[i].status == InUse) {
818 vg_assert(sec->tt[i].n_tte2ec >= 1);
819 vg_assert(sec->tt[i].n_tte2ec <= 3);
820 n_dump_osize += vge_osize(&sec->tt[i].vge);
821 /* Tell the tool too. */
822 if (VG_(needs).superblock_discards) {
823 VG_TDICT_CALL( tool_discard_superblock_info,
824 sec->tt[i].entry,
825 sec->tt[i].vge );
826 }
827 } else {
828 vg_assert(sec->tt[i].n_tte2ec == 0);
829 }
830 sec->tt[i].status = Empty;
831 sec->tt[i].n_tte2ec = 0;
832 }
833
834 /* Free up the eclass structures. */
835 for (i = 0; i < ECLASS_N; i++) {
836 if (sec->ec2tte_size[i] == 0) {
837 vg_assert(sec->ec2tte_used[i] == 0);
838 vg_assert(sec->ec2tte[i] == NULL);
839 } else {
840 vg_assert(sec->ec2tte[i] != NULL);
841 VG_(arena_free)(VG_AR_TTAUX, sec->ec2tte[i]);
842 sec->ec2tte[i] = NULL;
843 sec->ec2tte_size[i] = 0;
844 sec->ec2tte_used[i] = 0;
845 }
846 }
847
848 /* Sanity check: ensure it is already in
849 sector_search_order[]. */
850 for (i = 0; i < N_SECTORS; i++) {
851 if (sector_search_order[i] == sno)
852 break;
853 }
854 vg_assert(i >= 0 && i < N_SECTORS);
855
856 if (VG_(clo_verbosity) > 2)
857 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
858 }
859
860 sec->tc_next = sec->tc;
861 sec->tt_n_inuse = 0;
862
863 invalidateFastCache();
864
865 { Bool sane = sanity_check_sector_search_order();
866 vg_assert(sane);
867 }
868 }
869
invalidate_icache(void * ptr,Int nbytes)870 static void invalidate_icache ( void *ptr, Int nbytes )
871 {
872 # if defined(VGA_ppc32) || defined(VGA_ppc64)
873 Addr startaddr = (Addr) ptr;
874 Addr endaddr = startaddr + nbytes;
875 Addr cls;
876 Addr addr;
877 VexArchInfo vai;
878
879 if (nbytes == 0) return;
880 vg_assert(nbytes > 0);
881
882 VG_(machine_get_VexArchInfo)( NULL, &vai );
883 cls = vai.ppc_cache_line_szB;
884
885 /* Stay sane .. */
886 vg_assert(cls == 32 || cls == 64 || cls == 128);
887
888 startaddr &= ~(cls - 1);
889 for (addr = startaddr; addr < endaddr; addr += cls) {
890 __asm__ __volatile__("dcbst 0,%0" : : "r" (addr));
891 }
892 __asm__ __volatile__("sync");
893 for (addr = startaddr; addr < endaddr; addr += cls) {
894 __asm__ __volatile__("icbi 0,%0" : : "r" (addr));
895 }
896 __asm__ __volatile__("sync; isync");
897
898 # elif defined(VGA_x86)
899 /* no need to do anything, hardware provides coherence */
900
901 # elif defined(VGA_amd64)
902 /* no need to do anything, hardware provides coherence */
903
904 # elif defined(VGA_s390x)
905 /* no need to do anything, hardware provides coherence */
906
907 # elif defined(VGP_arm_linux)
908 /* ARM cache flushes are privileged, so we must defer to the kernel. */
909 Addr startaddr = (Addr) ptr;
910 Addr endaddr = startaddr + nbytes;
911 VG_(do_syscall2)(__NR_ARM_cacheflush, startaddr, endaddr);
912
913 # else
914 # error "Unknown ARCH"
915 # endif
916 }
917
918
919 /* Add a translation of vge to TT/TC. The translation is temporarily
920 in code[0 .. code_len-1].
921
922 pre: youngest_sector points to a valid (although possibly full)
923 sector.
924 */
VG_(add_to_transtab)925 void VG_(add_to_transtab)( VexGuestExtents* vge,
926 Addr64 entry,
927 AddrH code,
928 UInt code_len,
929 Bool is_self_checking )
930 {
931 Int tcAvailQ, reqdQ, y, i;
932 ULong *tcptr, *tcptr2;
933 UChar* srcP;
934 UChar* dstP;
935
936 vg_assert(init_done);
937 vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
938
939 /* 60000: should agree with N_TMPBUF in m_translate.c. */
940 vg_assert(code_len > 0 && code_len < 60000);
941
942 if (0)
943 VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n",
944 entry, code_len);
945
946 n_in_count++;
947 n_in_tsize += code_len;
948 n_in_osize += vge_osize(vge);
949 if (is_self_checking)
950 n_in_sc_count++;
951
952 y = youngest_sector;
953 vg_assert(isValidSector(y));
954
955 if (sectors[y].tc == NULL)
956 initialiseSector(y);
957
958 /* Try putting the translation in this sector. */
959 reqdQ = (code_len + 7) >> 3;
960
961 /* Will it fit in tc? */
962 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ]))
963 - ((ULong*)(sectors[y].tc_next));
964 vg_assert(tcAvailQ >= 0);
965 vg_assert(tcAvailQ <= tc_sector_szQ);
966
967 if (tcAvailQ < reqdQ
968 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
969 /* No. So move on to the next sector. Either it's never been
970 used before, in which case it will get its tt/tc allocated
971 now, or it has been used before, in which case it is set to be
972 empty, hence throwing out the oldest sector. */
973 vg_assert(tc_sector_szQ > 0);
974 VG_(debugLog)(1,"transtab",
975 "declare sector %d full "
976 "(TT loading %2d%%, TC loading %2d%%)\n",
977 y,
978 (100 * sectors[y].tt_n_inuse)
979 / N_TTES_PER_SECTOR,
980 (100 * (tc_sector_szQ - tcAvailQ))
981 / tc_sector_szQ);
982 youngest_sector++;
983 if (youngest_sector >= N_SECTORS)
984 youngest_sector = 0;
985 y = youngest_sector;
986 initialiseSector(y);
987 }
988
989 /* Be sure ... */
990 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ]))
991 - ((ULong*)(sectors[y].tc_next));
992 vg_assert(tcAvailQ >= 0);
993 vg_assert(tcAvailQ <= tc_sector_szQ);
994 vg_assert(tcAvailQ >= reqdQ);
995 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
996 vg_assert(sectors[y].tt_n_inuse >= 0);
997
998 /* Copy into tc. */
999 tcptr = sectors[y].tc_next;
1000 vg_assert(tcptr >= §ors[y].tc[0]);
1001 vg_assert(tcptr <= §ors[y].tc[tc_sector_szQ]);
1002
1003 dstP = (UChar*)tcptr;
1004 srcP = (UChar*)code;
1005 for (i = 0; i < code_len; i++)
1006 dstP[i] = srcP[i];
1007 sectors[y].tc_next += reqdQ;
1008 sectors[y].tt_n_inuse++;
1009
1010 invalidate_icache( dstP, code_len );
1011
1012 /* more paranoia */
1013 tcptr2 = sectors[y].tc_next;
1014 vg_assert(tcptr2 >= §ors[y].tc[0]);
1015 vg_assert(tcptr2 <= §ors[y].tc[tc_sector_szQ]);
1016
1017 /* Find an empty tt slot, and use it. There must be such a slot
1018 since tt is never allowed to get completely full. */
1019 i = HASH_TT(entry);
1020 vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
1021 while (True) {
1022 if (sectors[y].tt[i].status == Empty
1023 || sectors[y].tt[i].status == Deleted)
1024 break;
1025 i++;
1026 if (i >= N_TTES_PER_SECTOR)
1027 i = 0;
1028 }
1029
1030 sectors[y].tt[i].status = InUse;
1031 sectors[y].tt[i].tcptr = tcptr;
1032 sectors[y].tt[i].count = 0;
1033 sectors[y].tt[i].weight = 1;
1034 sectors[y].tt[i].vge = *vge;
1035 sectors[y].tt[i].entry = entry;
1036
1037 /* Update the fast-cache. */
1038 setFastCacheEntry( entry, tcptr, §ors[y].tt[i].count );
1039
1040 /* Note the eclass numbers for this translation. */
1041 upd_eclasses_after_add( §ors[y], i );
1042 }
1043
1044
1045 /* Search for the translation of the given guest address. If
1046 requested, a successful search can also cause the fast-caches to be
1047 updated.
1048 */
VG_(search_transtab)1049 Bool VG_(search_transtab) ( /*OUT*/AddrH* result,
1050 Addr64 guest_addr,
1051 Bool upd_cache )
1052 {
1053 Int i, j, k, kstart, sno;
1054
1055 vg_assert(init_done);
1056 /* Find the initial probe point just once. It will be the same in
1057 all sectors and avoids multiple expensive % operations. */
1058 n_full_lookups++;
1059 k = -1;
1060 kstart = HASH_TT(guest_addr);
1061 vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
1062
1063 /* Search in all the sectors,using sector_search_order[] as a
1064 heuristic guide as to what order to visit the sectors. */
1065 for (i = 0; i < N_SECTORS; i++) {
1066
1067 sno = sector_search_order[i];
1068 if (UNLIKELY(sno == -1))
1069 return False; /* run out of sectors to search */
1070
1071 k = kstart;
1072 for (j = 0; j < N_TTES_PER_SECTOR; j++) {
1073 n_lookup_probes++;
1074 if (sectors[sno].tt[k].status == InUse
1075 && sectors[sno].tt[k].entry == guest_addr) {
1076 /* found it */
1077 if (upd_cache)
1078 setFastCacheEntry(
1079 guest_addr, sectors[sno].tt[k].tcptr,
1080 §ors[sno].tt[k].count );
1081 if (result)
1082 *result = (AddrH)sectors[sno].tt[k].tcptr;
1083 /* pull this one one step closer to the front. For large
1084 apps this more or less halves the number of required
1085 probes. */
1086 if (i > 0) {
1087 Int tmp = sector_search_order[i-1];
1088 sector_search_order[i-1] = sector_search_order[i];
1089 sector_search_order[i] = tmp;
1090 }
1091 return True;
1092 }
1093 if (sectors[sno].tt[k].status == Empty)
1094 break; /* not found in this sector */
1095 k++;
1096 if (k == N_TTES_PER_SECTOR)
1097 k = 0;
1098 }
1099
1100 /* If we fall off the end, all entries are InUse and not
1101 matching, or Deleted. In any case we did not find it in this
1102 sector. */
1103 }
1104
1105 /* Not found in any sector. */
1106 return False;
1107 }
1108
1109
1110 /*-------------------------------------------------------------*/
1111 /*--- Delete translations. ---*/
1112 /*-------------------------------------------------------------*/
1113
1114 /* forward */
1115 static void unredir_discard_translations( Addr64, ULong );
1116
1117 /* Stuff for deleting translations which intersect with a given
1118 address range. Unfortunately, to make this run at a reasonable
1119 speed, it is complex. */
1120
1121 static inline
overlap1(Addr64 s1,ULong r1,Addr64 s2,ULong r2)1122 Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
1123 {
1124 Addr64 e1 = s1 + r1 - 1ULL;
1125 Addr64 e2 = s2 + r2 - 1ULL;
1126 if (e1 < s2 || e2 < s1)
1127 return False;
1128 return True;
1129 }
1130
1131 static inline
overlaps(Addr64 start,ULong range,VexGuestExtents * vge)1132 Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
1133 {
1134 if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
1135 return True;
1136 if (vge->n_used < 2)
1137 return False;
1138 if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
1139 return True;
1140 if (vge->n_used < 3)
1141 return False;
1142 if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
1143 return True;
1144 return False;
1145 }
1146
1147
1148 /* Delete a tt entry, and update all the eclass data accordingly. */
1149
delete_tte(Sector * sec,Int tteno)1150 static void delete_tte ( /*MOD*/Sector* sec, Int tteno )
1151 {
1152 Int i, ec_num, ec_idx;
1153 TTEntry* tte;
1154
1155 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1156 tte = &sec->tt[tteno];
1157 vg_assert(tte->status == InUse);
1158 vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1159
1160 /* Deal with the ec-to-tte links first. */
1161 for (i = 0; i < tte->n_tte2ec; i++) {
1162 ec_num = (Int)tte->tte2ec_ec[i];
1163 ec_idx = tte->tte2ec_ix[i];
1164 vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1165 vg_assert(ec_idx >= 0);
1166 vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1167 /* Assert that the two links point at each other. */
1168 vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
1169 /* "delete" the pointer back to here. */
1170 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1171 }
1172
1173 /* Now fix up this TTEntry. */
1174 tte->status = Deleted;
1175 tte->n_tte2ec = 0;
1176
1177 /* Stats .. */
1178 sec->tt_n_inuse--;
1179 n_disc_count++;
1180 n_disc_osize += vge_osize(&tte->vge);
1181
1182 /* Tell the tool too. */
1183 if (VG_(needs).superblock_discards) {
1184 VG_TDICT_CALL( tool_discard_superblock_info,
1185 tte->entry,
1186 tte->vge );
1187 }
1188 }
1189
1190
1191 /* Delete translations from sec which intersect specified range, but
1192 only consider translations in the specified eclass. */
1193
1194 static
delete_translations_in_sector_eclass(Sector * sec,Addr64 guest_start,ULong range,Int ec)1195 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec,
1196 Addr64 guest_start, ULong range,
1197 Int ec )
1198 {
1199 Int i;
1200 UShort tteno;
1201 Bool anyDeld = False;
1202 TTEntry* tte;
1203
1204 vg_assert(ec >= 0 && ec < ECLASS_N);
1205
1206 for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1207
1208 tteno = sec->ec2tte[ec][i];
1209 if (tteno == EC2TTE_DELETED) {
1210 /* already deleted */
1211 continue;
1212 }
1213
1214 vg_assert(tteno < N_TTES_PER_SECTOR);
1215
1216 tte = &sec->tt[tteno];
1217 vg_assert(tte->status == InUse);
1218
1219 if (overlaps( guest_start, range, &tte->vge )) {
1220 anyDeld = True;
1221 delete_tte( sec, (Int)tteno );
1222 }
1223
1224 }
1225
1226 return anyDeld;
1227 }
1228
1229
1230 /* Delete translations from sec which intersect specified range, the
1231 slow way, by inspecting all translations in sec. */
1232
1233 static
delete_translations_in_sector(Sector * sec,Addr64 guest_start,ULong range)1234 Bool delete_translations_in_sector ( /*MOD*/Sector* sec,
1235 Addr64 guest_start, ULong range )
1236 {
1237 Int i;
1238 Bool anyDeld = False;
1239
1240 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1241 if (sec->tt[i].status == InUse
1242 && overlaps( guest_start, range, &sec->tt[i].vge )) {
1243 anyDeld = True;
1244 delete_tte( sec, i );
1245 }
1246 }
1247
1248 return anyDeld;
1249 }
1250
1251
VG_(discard_translations)1252 void VG_(discard_translations) ( Addr64 guest_start, ULong range,
1253 HChar* who )
1254 {
1255 Sector* sec;
1256 Int sno, ec;
1257 Bool anyDeleted = False;
1258
1259 vg_assert(init_done);
1260
1261 VG_(debugLog)(2, "transtab",
1262 "discard_translations(0x%llx, %lld) req by %s\n",
1263 guest_start, range, who );
1264
1265 /* Pre-deletion sanity check */
1266 if (VG_(clo_sanity_level >= 4)) {
1267 Bool sane = sanity_check_all_sectors();
1268 vg_assert(sane);
1269 }
1270
1271 if (range == 0)
1272 return;
1273
1274 /* There are two different ways to do this.
1275
1276 If the range fits within a single address-range equivalence
1277 class, as will be the case for a cache line sized invalidation,
1278 then we only have to inspect the set of translations listed in
1279 that equivalence class, and also in the "sin-bin" equivalence
1280 class ECLASS_MISC.
1281
1282 Otherwise, the invalidation is of a larger range and probably
1283 results from munmap. In this case it's (probably!) faster just
1284 to inspect all translations, dump those we don't want, and
1285 regenerate the equivalence class information (since modifying it
1286 in-situ is even more expensive).
1287 */
1288
1289 /* First off, figure out if the range falls within a single class,
1290 and if so which one. */
1291
1292 ec = ECLASS_MISC;
1293 if (range < (1ULL << ECLASS_SHIFT))
1294 ec = range_to_eclass( guest_start, (UInt)range );
1295
1296 /* if ec is ECLASS_MISC then we aren't looking at just a single
1297 class, so use the slow scheme. Else use the fast scheme,
1298 examining 'ec' and ECLASS_MISC. */
1299
1300 if (ec != ECLASS_MISC) {
1301
1302 VG_(debugLog)(2, "transtab",
1303 " FAST, ec = %d\n", ec);
1304
1305 /* Fast scheme */
1306 vg_assert(ec >= 0 && ec < ECLASS_MISC);
1307
1308 for (sno = 0; sno < N_SECTORS; sno++) {
1309 sec = §ors[sno];
1310 if (sec->tc == NULL)
1311 continue;
1312 anyDeleted |= delete_translations_in_sector_eclass(
1313 sec, guest_start, range, ec );
1314 anyDeleted |= delete_translations_in_sector_eclass(
1315 sec, guest_start, range, ECLASS_MISC );
1316 }
1317
1318 } else {
1319
1320 /* slow scheme */
1321
1322 VG_(debugLog)(2, "transtab",
1323 " SLOW, ec = %d\n", ec);
1324
1325 for (sno = 0; sno < N_SECTORS; sno++) {
1326 sec = §ors[sno];
1327 if (sec->tc == NULL)
1328 continue;
1329 anyDeleted |= delete_translations_in_sector(
1330 sec, guest_start, range );
1331 }
1332
1333 }
1334
1335 if (anyDeleted)
1336 invalidateFastCache();
1337
1338 /* don't forget the no-redir cache */
1339 unredir_discard_translations( guest_start, range );
1340
1341 /* Post-deletion sanity check */
1342 if (VG_(clo_sanity_level >= 4)) {
1343 Int i;
1344 TTEntry* tte;
1345 Bool sane = sanity_check_all_sectors();
1346 vg_assert(sane);
1347 /* But now, also check the requested address range isn't
1348 present anywhere. */
1349 for (sno = 0; sno < N_SECTORS; sno++) {
1350 sec = §ors[sno];
1351 if (sec->tc == NULL)
1352 continue;
1353 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1354 tte = &sec->tt[i];
1355 if (tte->status != InUse)
1356 continue;
1357 vg_assert(!overlaps( guest_start, range, &tte->vge ));
1358 }
1359 }
1360 }
1361 }
1362
1363
1364 /*------------------------------------------------------------*/
1365 /*--- AUXILIARY: the unredirected TT/TC ---*/
1366 /*------------------------------------------------------------*/
1367
1368 /* A very simple translation cache which holds a small number of
1369 unredirected translations. This is completely independent of the
1370 main tt/tc structures. When unredir_tc or unredir_tt becomes full,
1371 both structures are simply dumped and we start over.
1372
1373 Since these translations are unredirected, the search key is (by
1374 definition) the first address entry in the .vge field. */
1375
1376 /* Sized to hold 500 translations of average size 1000 bytes. */
1377
1378 #define UNREDIR_SZB 1000
1379
1380 #define N_UNREDIR_TT 500
1381 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
1382
1383 typedef
1384 struct {
1385 VexGuestExtents vge;
1386 Addr hcode;
1387 Bool inUse;
1388 }
1389 UTCEntry;
1390
1391 /* We just allocate forwards in _tc, never deleting. */
1392 static ULong *unredir_tc;
1393 static Int unredir_tc_used = N_UNREDIR_TCQ;
1394
1395 /* Slots in _tt can come into use and out again (.inUse).
1396 Nevertheless _tt_highwater is maintained so that invalidations
1397 don't have to scan all the slots when only a few are in use.
1398 _tt_highwater holds the index of the highest ever allocated
1399 slot. */
1400 static UTCEntry unredir_tt[N_UNREDIR_TT];
1401 static Int unredir_tt_highwater;
1402
1403
init_unredir_tt_tc(void)1404 static void init_unredir_tt_tc ( void )
1405 {
1406 Int i;
1407 if (unredir_tc == NULL) {
1408 SysRes sres = VG_(am_mmap_anon_float_valgrind)
1409 ( N_UNREDIR_TT * UNREDIR_SZB );
1410 if (sr_isError(sres)) {
1411 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
1412 N_UNREDIR_TT * UNREDIR_SZB);
1413 /*NOTREACHED*/
1414 }
1415 unredir_tc = (ULong *)(AddrH)sr_Res(sres);
1416 }
1417 unredir_tc_used = 0;
1418 for (i = 0; i < N_UNREDIR_TT; i++)
1419 unredir_tt[i].inUse = False;
1420 unredir_tt_highwater = -1;
1421 }
1422
1423 /* Do a sanity check; return False on failure. */
sanity_check_redir_tt_tc(void)1424 static Bool sanity_check_redir_tt_tc ( void )
1425 {
1426 Int i;
1427 if (unredir_tt_highwater < -1) return False;
1428 if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
1429
1430 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
1431 if (unredir_tt[i].inUse)
1432 return False;
1433
1434 if (unredir_tc_used < 0) return False;
1435 if (unredir_tc_used > N_UNREDIR_TCQ) return False;
1436
1437 return True;
1438 }
1439
1440
1441 /* Add an UNREDIRECTED translation of vge to TT/TC. The translation
1442 is temporarily in code[0 .. code_len-1].
1443 */
VG_(add_to_unredir_transtab)1444 void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
1445 Addr64 entry,
1446 AddrH code,
1447 UInt code_len )
1448 {
1449 Int i, j, code_szQ;
1450 HChar *srcP, *dstP;
1451
1452 vg_assert(sanity_check_redir_tt_tc());
1453
1454 /* This is the whole point: it's not redirected! */
1455 vg_assert(entry == vge->base[0]);
1456
1457 /* How many unredir_tt slots are needed */
1458 code_szQ = (code_len + 7) / 8;
1459
1460 /* Look for an empty unredir_tc slot */
1461 for (i = 0; i < N_UNREDIR_TT; i++)
1462 if (!unredir_tt[i].inUse)
1463 break;
1464
1465 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
1466 /* It's full; dump everything we currently have */
1467 init_unredir_tt_tc();
1468 i = 0;
1469 }
1470
1471 vg_assert(unredir_tc_used >= 0);
1472 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
1473 vg_assert(code_szQ > 0);
1474 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
1475 vg_assert(i >= 0 && i < N_UNREDIR_TT);
1476 vg_assert(unredir_tt[i].inUse == False);
1477
1478 if (i > unredir_tt_highwater)
1479 unredir_tt_highwater = i;
1480
1481 dstP = (HChar*)&unredir_tc[unredir_tc_used];
1482 srcP = (HChar*)code;
1483 for (j = 0; j < code_len; j++)
1484 dstP[j] = srcP[j];
1485
1486 invalidate_icache( dstP, code_len );
1487
1488 unredir_tt[i].inUse = True;
1489 unredir_tt[i].vge = *vge;
1490 unredir_tt[i].hcode = (Addr)dstP;
1491
1492 unredir_tc_used += code_szQ;
1493 vg_assert(unredir_tc_used >= 0);
1494 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
1495
1496 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
1497 }
1498
VG_(search_unredir_transtab)1499 Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
1500 Addr64 guest_addr )
1501 {
1502 Int i;
1503 for (i = 0; i < N_UNREDIR_TT; i++) {
1504 if (!unredir_tt[i].inUse)
1505 continue;
1506 if (unredir_tt[i].vge.base[0] == guest_addr) {
1507 *result = (AddrH)unredir_tt[i].hcode;
1508 return True;
1509 }
1510 }
1511 return False;
1512 }
1513
unredir_discard_translations(Addr64 guest_start,ULong range)1514 static void unredir_discard_translations( Addr64 guest_start, ULong range )
1515 {
1516 Int i;
1517
1518 vg_assert(sanity_check_redir_tt_tc());
1519
1520 for (i = 0; i <= unredir_tt_highwater; i++) {
1521 if (unredir_tt[i].inUse
1522 && overlaps( guest_start, range, &unredir_tt[i].vge))
1523 unredir_tt[i].inUse = False;
1524 }
1525 }
1526
1527
1528 /*------------------------------------------------------------*/
1529 /*--- Initialisation. ---*/
1530 /*------------------------------------------------------------*/
1531
VG_(init_tt_tc)1532 void VG_(init_tt_tc) ( void )
1533 {
1534 Int i, j, avg_codeszQ;
1535
1536 vg_assert(!init_done);
1537 init_done = True;
1538
1539 /* Otherwise lots of things go wrong... */
1540 vg_assert(sizeof(ULong) == 8);
1541 vg_assert(sizeof(Addr64) == 8);
1542 /* check fast cache entries really are 2 words long */
1543 vg_assert(sizeof(Addr) == sizeof(void*));
1544 vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
1545 /* check fast cache entries are packed back-to-back with no spaces */
1546 vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
1547 /* check fast cache is aligned as we requested. Not fatal if it
1548 isn't, but we might as well make sure. */
1549 vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
1550
1551 if (VG_(clo_verbosity) > 2)
1552 VG_(message)(Vg_DebugMsg,
1553 "TT/TC: VG_(init_tt_tc) "
1554 "(startup of code management)\n");
1555
1556 /* Figure out how big each tc area should be. */
1557 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8;
1558 tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
1559
1560 /* Ensure the calculated value is not way crazy. */
1561 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
1562 vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR_USABLE);
1563
1564 /* Initialise the sectors */
1565 youngest_sector = 0;
1566 for (i = 0; i < N_SECTORS; i++) {
1567 sectors[i].tc = NULL;
1568 sectors[i].tt = NULL;
1569 sectors[i].tc_next = NULL;
1570 sectors[i].tt_n_inuse = 0;
1571 for (j = 0; j < ECLASS_N; j++) {
1572 sectors[i].ec2tte_size[j] = 0;
1573 sectors[i].ec2tte_used[j] = 0;
1574 sectors[i].ec2tte[j] = NULL;
1575 }
1576 }
1577
1578 /* Initialise the sector_search_order hint table. */
1579 for (i = 0; i < N_SECTORS; i++)
1580 sector_search_order[i] = -1;
1581
1582 /* Initialise the fast caches. If not profiling (the usual case),
1583 we have to explicitly invalidate the fastN cache as
1584 invalidateFastCache() won't do that for us. */
1585 invalidateFastCache();
1586 if (VG_(clo_profile_flags) == 0)
1587 invalidateFastNCache();
1588
1589 /* and the unredir tt/tc */
1590 init_unredir_tt_tc();
1591
1592 if (VG_(clo_verbosity) > 2) {
1593 VG_(message)(Vg_DebugMsg,
1594 "TT/TC: cache: %d sectors of %d bytes each = %d total\n",
1595 N_SECTORS, 8 * tc_sector_szQ,
1596 N_SECTORS * 8 * tc_sector_szQ );
1597 VG_(message)(Vg_DebugMsg,
1598 "TT/TC: table: %d total entries, max occupancy %d (%d%%)\n",
1599 N_SECTORS * N_TTES_PER_SECTOR,
1600 N_SECTORS * N_TTES_PER_SECTOR_USABLE,
1601 SECTOR_TT_LIMIT_PERCENT );
1602 }
1603
1604 VG_(debugLog)(2, "transtab",
1605 "cache: %d sectors of %d bytes each = %d total\n",
1606 N_SECTORS, 8 * tc_sector_szQ,
1607 N_SECTORS * 8 * tc_sector_szQ );
1608 VG_(debugLog)(2, "transtab",
1609 "table: %d total entries, max occupancy %d (%d%%)\n",
1610 N_SECTORS * N_TTES_PER_SECTOR,
1611 N_SECTORS * N_TTES_PER_SECTOR_USABLE,
1612 SECTOR_TT_LIMIT_PERCENT );
1613 }
1614
1615
1616 /*------------------------------------------------------------*/
1617 /*--- Printing out statistics. ---*/
1618 /*------------------------------------------------------------*/
1619
safe_idiv(ULong a,ULong b)1620 static ULong safe_idiv( ULong a, ULong b )
1621 {
1622 return (b == 0 ? 0 : a / b);
1623 }
1624
VG_(get_bbs_translated)1625 UInt VG_(get_bbs_translated) ( void )
1626 {
1627 return n_in_count;
1628 }
1629
VG_(print_tt_tc_stats)1630 void VG_(print_tt_tc_stats) ( void )
1631 {
1632 VG_(message)(Vg_DebugMsg,
1633 " tt/tc: %'llu tt lookups requiring %'llu probes\n",
1634 n_full_lookups, n_lookup_probes );
1635 VG_(message)(Vg_DebugMsg,
1636 " tt/tc: %'llu fast-cache updates, %'llu flushes\n",
1637 n_fast_updates, n_fast_flushes );
1638
1639 VG_(message)(Vg_DebugMsg,
1640 " transtab: new %'lld "
1641 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n",
1642 n_in_count, n_in_osize, n_in_tsize,
1643 safe_idiv(10*n_in_tsize, n_in_osize),
1644 n_in_sc_count);
1645 VG_(message)(Vg_DebugMsg,
1646 " transtab: dumped %'llu (%'llu -> ?" "?)\n",
1647 n_dump_count, n_dump_osize );
1648 VG_(message)(Vg_DebugMsg,
1649 " transtab: discarded %'llu (%'llu -> ?" "?)\n",
1650 n_disc_count, n_disc_osize );
1651
1652 if (0) {
1653 Int i;
1654 VG_(printf)("\n");
1655 for (i = 0; i < ECLASS_N; i++) {
1656 VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
1657 if (i % 16 == 15)
1658 VG_(printf)("\n");
1659 }
1660 VG_(printf)("\n\n");
1661 }
1662 }
1663
1664 /*------------------------------------------------------------*/
1665 /*--- Printing out of profiling results. ---*/
1666 /*------------------------------------------------------------*/
1667
score(TTEntry * tte)1668 static ULong score ( TTEntry* tte )
1669 {
1670 return ((ULong)tte->weight) * ((ULong)tte->count);
1671 }
1672
VG_(get_BB_profile)1673 ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops )
1674 {
1675 Int sno, i, r, s;
1676 ULong score_total;
1677
1678 /* First, compute the total weighted count, and find the top N
1679 ttes. tops contains pointers to the most-used n_tops blocks, in
1680 descending order (viz, tops[0] is the highest scorer). */
1681 for (i = 0; i < n_tops; i++) {
1682 tops[i].addr = 0;
1683 tops[i].score = 0;
1684 }
1685
1686 score_total = 0;
1687
1688 for (sno = 0; sno < N_SECTORS; sno++) {
1689 if (sectors[sno].tc == NULL)
1690 continue;
1691 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1692 if (sectors[sno].tt[i].status != InUse)
1693 continue;
1694 score_total += score(§ors[sno].tt[i]);
1695 /* Find the rank for sectors[sno].tt[i]. */
1696 r = n_tops-1;
1697 while (True) {
1698 if (r == -1)
1699 break;
1700 if (tops[r].addr == 0) {
1701 r--;
1702 continue;
1703 }
1704 if ( score(§ors[sno].tt[i]) > tops[r].score ) {
1705 r--;
1706 continue;
1707 }
1708 break;
1709 }
1710 r++;
1711 vg_assert(r >= 0 && r <= n_tops);
1712 /* This bb should be placed at r, and bbs above it shifted
1713 upwards one slot. */
1714 if (r < n_tops) {
1715 for (s = n_tops-1; s > r; s--)
1716 tops[s] = tops[s-1];
1717 tops[r].addr = sectors[sno].tt[i].entry;
1718 tops[r].score = score( §ors[sno].tt[i] );
1719 }
1720 }
1721 }
1722
1723 return score_total;
1724 }
1725
1726 /*--------------------------------------------------------------------*/
1727 /*--- end ---*/
1728 /*--------------------------------------------------------------------*/
1729