• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Management of the translation table and cache.               ---*/
4 /*---                                                 m_transtab.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of Valgrind, a dynamic binary instrumentation
9    framework.
10 
11    Copyright (C) 2000-2010 Julian Seward
12       jseward@acm.org
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27    02111-1307, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 */
31 
32 #include "pub_core_basics.h"
33 #include "pub_core_debuglog.h"
34 #include "pub_core_machine.h"    // For VG(machine_get_VexArchInfo)
35 #include "pub_core_libcbase.h"
36 #include "pub_core_libcassert.h"
37 #include "pub_core_libcprint.h"
38 #include "pub_core_options.h"
39 #include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
40 #include "pub_core_transtab.h"
41 #include "pub_core_aspacemgr.h"
42 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
43 
44 // JRS FIXME get rid of this somehow
45 #if defined(VGP_arm_linux)
46 # include "pub_core_vkiscnums.h" // __ARM_NR_cacheflush
47 # include "pub_core_syscall.h"   // VG_(do_syscallN)
48 #endif
49 
50 
51 /* #define DEBUG_TRANSTAB */
52 
53 
54 /*-------------------------------------------------------------*/
55 /*--- Management of the FIFO-based translation table+cache. ---*/
56 /*-------------------------------------------------------------*/
57 
58 /*------------------ CONSTANTS ------------------*/
59 
60 /* Number of sectors the TC is divided into.  If you need a larger
61    overall translation cache, increase this value. */
62 #define N_SECTORS 8
63 
64 /* Number of TC entries in each sector.  This needs to be a prime
65    number to work properly, it must be <= 65535 (so that a TT index
66    fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
67    'deleted') and it is strongly recommended not to change this.
68    65521 is the largest prime <= 65535. */
69 #define N_TTES_PER_SECTOR /*30011*/ /*40009*/ 65521
70 
71 /* Because each sector contains a hash table of TTEntries, we need to
72    specify the maximum allowable loading, after which the sector is
73    deemed full. */
74 #define SECTOR_TT_LIMIT_PERCENT 65
75 
76 /* The sector is deemed full when this many entries are in it. */
77 #define N_TTES_PER_SECTOR_USABLE \
78            ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
79 
80 /* Equivalence classes for fast address range deletion.  There are 1 +
81    2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
82    address range which does not fall cleanly within any specific bin.
83    Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
84 #define ECLASS_SHIFT 11
85 #define ECLASS_WIDTH 8
86 #define ECLASS_MISC  (1 << ECLASS_WIDTH)
87 #define ECLASS_N     (1 + ECLASS_MISC)
88 
89 #define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
90 
91 
92 /*------------------ TYPES ------------------*/
93 
94 /* A translation-table entry.  This indicates precisely which areas of
95    guest code are included in the translation, and contains all other
96    auxiliary info too.  */
97 typedef
98    struct {
99       /* Profiling only: the count and weight (arbitrary meaning) for
100          this translation.  Weight is a property of the translation
101          itself and computed once when the translation is created.
102          Count is an entry count for the translation and is
103          incremented by 1 every time the translation is used, if we
104          are profiling. */
105       UInt     count;
106       UShort   weight;
107 
108       /* Status of the slot.  Note, we need to be able to do lazy
109          deletion, hence the Deleted state. */
110       enum { InUse, Deleted, Empty } status;
111 
112       /* 64-bit aligned pointer to one or more 64-bit words containing
113          the corresponding host code (must be in the same sector!)
114          This is a pointer into the sector's tc (code) area. */
115       ULong* tcptr;
116 
117       /* This is the original guest address that purportedly is the
118          entry point of the translation.  You might think that .entry
119          should be the same as .vge->base[0], and most of the time it
120          is.  However, when doing redirections, that is not the case.
121          .vge must always correctly describe the guest code sections
122          from which this translation was made.  However, .entry may or
123          may not be a lie, depending on whether or not we're doing
124          redirection. */
125       Addr64 entry;
126 
127       /* This structure describes precisely what ranges of guest code
128          the translation covers, so we can decide whether or not to
129          delete it when translations of a given address range are
130          invalidated. */
131       VexGuestExtents vge;
132 
133       /* Address range summary info: these are pointers back to
134          eclass[] entries in the containing Sector.  Those entries in
135          turn point back here -- the two structures are mutually
136          redundant but both necessary to make fast deletions work.
137          The eclass info is similar to, and derived from, this entry's
138          'vge' field, but it is not the same */
139       UShort n_tte2ec;      // # tte2ec pointers (1 to 3)
140       UShort tte2ec_ec[3];  // for each, the eclass #
141       UInt   tte2ec_ix[3];  // and the index within the eclass.
142       // for i in 0 .. n_tte2ec-1
143       //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
144       // should be the index
145       // of this TTEntry in the containing Sector's tt array.
146    }
147    TTEntry;
148 
149 
150 /* Finally, a sector itself.  Each sector contains an array of
151    TCEntries, which hold code, and an array of TTEntries, containing
152    all required administrative info.  Profiling is supported using the
153    TTEntry .count and .weight fields, if required.  Each sector is
154    independent in that no cross-sector references are allowed.
155 
156    If the sector is not in use, all three pointers are NULL and
157    tt_n_inuse is zero.
158 */
159 typedef
160    struct {
161       /* The TCEntry area.  Size of this depends on the average
162          translation size.  We try and size it so it becomes full
163          precisely when this sector's translation table (tt) reaches
164          its load limit (SECTOR_TT_LIMIT_PERCENT). */
165       ULong* tc;
166 
167       /* The TTEntry array.  This is a fixed size, always containing
168          exactly N_TTES_PER_SECTOR entries. */
169       TTEntry* tt;
170 
171       /* This points to the current allocation point in tc. */
172       ULong* tc_next;
173 
174       /* The count of tt entries with state InUse. */
175       Int tt_n_inuse;
176 
177       /* Expandable arrays of tt indices for each of the ECLASS_N
178          address range equivalence classes.  These hold indices into
179          the containing sector's tt array, which in turn should point
180          back here. */
181       Int     ec2tte_size[ECLASS_N];
182       Int     ec2tte_used[ECLASS_N];
183       UShort* ec2tte[ECLASS_N];
184    }
185    Sector;
186 
187 
188 /*------------------ DECLS ------------------*/
189 
190 /* The root data structure is an array of sectors.  The index of the
191    youngest sector is recorded, and new translations are put into that
192    sector.  When it fills up, we move along to the next sector and
193    start to fill that up, wrapping around at the end of the array.
194    That way, once all N_TC_SECTORS have been bought into use for the
195    first time, and are full, we then re-use the oldest sector,
196    endlessly.
197 
198    When running, youngest sector should be between >= 0 and <
199    N_TC_SECTORS.  The initial -1 value indicates the TT/TC system is
200    not yet initialised.
201 */
202 static Sector sectors[N_SECTORS];
203 static Int    youngest_sector = -1;
204 
205 /* The number of ULongs in each TCEntry area.  This is computed once
206    at startup and does not change. */
207 static Int    tc_sector_szQ;
208 
209 
210 /* A list of sector numbers, in the order which they should be
211    searched to find translations.  This is an optimisation to be used
212    when searching for translations and should not affect
213    correctness.  -1 denotes "no entry". */
214 static Int sector_search_order[N_SECTORS];
215 
216 
217 /* Fast helper for the TC.  A direct-mapped cache which holds a set of
218    recently used (guest address, host address) pairs.  This array is
219    referred to directly from m_dispatch/dispatch-<platform>.S.
220 
221    Entries in tt_fast may refer to any valid TC entry, regardless of
222    which sector it's in.  Consequently we must be very careful to
223    invalidate this cache when TC entries are changed or disappear.
224 
225    A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
226    pointed at to cause that cache entry to miss.  This relies on the
227    assumption that no guest code actually has that address, hence a
228    value 0x1 seems good.  m_translate gives the client a synthetic
229    segfault if it tries to execute at this address.
230 */
231 /*
232 typedef
233    struct {
234       Addr guest;
235       Addr host;
236    }
237    FastCacheEntry;
238 */
239 /*global*/ __attribute__((aligned(16)))
240            FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
241 /*
242 #define TRANSTAB_BOGUS_GUEST_ADDR ((Addr)1)
243 */
244 
245 /* For profiling, we have a parallel array of pointers to .count
246    fields in TT entries.  Again, these pointers must be invalidated
247    when translations disappear.  A NULL pointer suffices to indicate
248    an unused slot.
249 
250    When not profiling (the normal case, VG_(clo_profile_flags) == 0),
251    all tt_fastN entries are set to NULL at startup and never read nor
252    written after that.
253 
254    When profiling (VG_(clo_profile_flags) > 0), tt_fast and tt_fastN
255    change together: if tt_fast[i].guest is TRANSTAB_BOGUS_GUEST_ADDR
256    then the corresponding tt_fastN[i] must be null.  If
257    tt_fast[i].guest is any other value, then tt_fastN[i] *must* point
258    to the .count field of the corresponding TT entry.
259 
260    tt_fast and tt_fastN are referred to from assembly code
261    (dispatch.S).
262 */
263 /*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE];
264 
265 
266 /* Make sure we're not used before initialisation. */
267 static Bool init_done = False;
268 
269 
270 /*------------------ STATS DECLS ------------------*/
271 
272 /* Number of fast-cache updates and flushes done. */
273 ULong n_fast_flushes = 0;
274 ULong n_fast_updates = 0;
275 
276 /* Number of full lookups done. */
277 ULong n_full_lookups = 0;
278 ULong n_lookup_probes = 0;
279 
280 /* Number/osize/tsize of translations entered; also the number of
281    those for which self-checking was requested. */
282 ULong n_in_count    = 0;
283 ULong n_in_osize    = 0;
284 ULong n_in_tsize    = 0;
285 ULong n_in_sc_count = 0;
286 
287 /* Number/osize of translations discarded due to lack of space. */
288 ULong n_dump_count = 0;
289 ULong n_dump_osize = 0;
290 
291 /* Number/osize of translations discarded due to requests to do so. */
292 ULong n_disc_count = 0;
293 ULong n_disc_osize = 0;
294 
295 
296 /*-------------------------------------------------------------*/
297 /*--- Address-range equivalence class stuff                 ---*/
298 /*-------------------------------------------------------------*/
299 
300 /* Return equivalence class number for a range. */
301 
range_to_eclass(Addr64 start,UInt len)302 static Int range_to_eclass ( Addr64 start, UInt len )
303 {
304    UInt mask   = (1 << ECLASS_WIDTH) - 1;
305    UInt lo     = (UInt)start;
306    UInt hi     = lo + len - 1;
307    UInt loBits = (lo >> ECLASS_SHIFT) & mask;
308    UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
309    if (loBits == hiBits) {
310       vg_assert(loBits < ECLASS_N-1);
311       return loBits;
312    } else {
313       return ECLASS_MISC;
314    }
315 }
316 
317 
318 /* Calculates the equivalence class numbers for any VexGuestExtent.
319    These are written in *eclasses, which must be big enough to hold 3
320    Ints.  The number written, between 1 and 3, is returned.  The
321    eclasses are presented in order, and any duplicates are removed.
322 */
323 
324 static
vexGuestExtents_to_eclasses(Int * eclasses,VexGuestExtents * vge)325 Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
326                                   VexGuestExtents* vge )
327 {
328 #  define SWAP(_lv1,_lv2) \
329       do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
330 
331    Int i, j, n_ec, r;
332 
333    vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
334 
335    n_ec = 0;
336    for (i = 0; i < vge->n_used; i++) {
337       r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
338       if (r == ECLASS_MISC)
339          goto bad;
340       /* only add if we haven't already seen it */
341       for (j = 0; j < n_ec; j++)
342          if (eclasses[j] == r)
343             break;
344       if (j == n_ec)
345          eclasses[n_ec++] = r;
346    }
347 
348    if (n_ec == 1)
349       return 1;
350 
351    if (n_ec == 2) {
352       /* sort */
353       if (eclasses[0] > eclasses[1])
354          SWAP(eclasses[0], eclasses[1]);
355       return 2;
356    }
357 
358    if (n_ec == 3) {
359       /* sort */
360       if (eclasses[0] > eclasses[2])
361          SWAP(eclasses[0], eclasses[2]);
362       if (eclasses[0] > eclasses[1])
363          SWAP(eclasses[0], eclasses[1]);
364       if (eclasses[1] > eclasses[2])
365          SWAP(eclasses[1], eclasses[2]);
366       return 3;
367    }
368 
369    /* NOTREACHED */
370    vg_assert(0);
371 
372   bad:
373    eclasses[0] = ECLASS_MISC;
374    return 1;
375 
376 #  undef SWAP
377 }
378 
379 
380 /* Add tteno to the set of entries listed for equivalence class ec in
381    this sector.  Returns used location in eclass array. */
382 
383 static
addEClassNo(Sector * sec,Int ec,UShort tteno)384 UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
385 {
386    Int    old_sz, new_sz, i, r;
387    UShort *old_ar, *new_ar;
388 
389    vg_assert(ec >= 0 && ec < ECLASS_N);
390    vg_assert(tteno < N_TTES_PER_SECTOR);
391 
392    if (0) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
393 
394    if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
395 
396       vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
397 
398       old_sz = sec->ec2tte_size[ec];
399       old_ar = sec->ec2tte[ec];
400       new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
401       new_ar = VG_(arena_malloc)(VG_AR_TTAUX, "transtab.aECN.1",
402                                  new_sz * sizeof(UShort));
403       for (i = 0; i < old_sz; i++)
404          new_ar[i] = old_ar[i];
405       if (old_ar)
406          VG_(arena_free)(VG_AR_TTAUX, old_ar);
407       sec->ec2tte_size[ec] = new_sz;
408       sec->ec2tte[ec] = new_ar;
409 
410       if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
411    }
412 
413    /* Common case */
414    r = sec->ec2tte_used[ec]++;
415    vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
416    sec->ec2tte[ec][r] = tteno;
417    return (UInt)r;
418 }
419 
420 
421 /* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
422    eclass entries to 'sec'. */
423 
424 static
upd_eclasses_after_add(Sector * sec,Int tteno)425 void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
426 {
427    Int i, r, eclasses[3];
428    TTEntry* tte;
429    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
430 
431    tte = &sec->tt[tteno];
432    r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
433 
434    vg_assert(r >= 1 && r <= 3);
435    tte->n_tte2ec = r;
436 
437    for (i = 0; i < r; i++) {
438       tte->tte2ec_ec[i] = eclasses[i];
439       tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
440    }
441 }
442 
443 
444 /* Check the eclass info in 'sec' to ensure it is consistent.  Returns
445    True if OK, False if something's not right.  Expensive. */
446 
sanity_check_eclasses_in_sector(Sector * sec)447 static Bool sanity_check_eclasses_in_sector ( Sector* sec )
448 {
449 #  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
450 
451    HChar*   whassup = NULL;
452    Int      i, j, k, n, ec_num, ec_idx;
453    TTEntry* tte;
454    UShort   tteno;
455    ULong*   tce;
456 
457    /* Basic checks on this sector */
458    if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
459       BAD("invalid sec->tt_n_inuse");
460    tce = sec->tc_next;
461    if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
462       BAD("sec->tc_next points outside tc");
463 
464    /* For each eclass ... */
465    for (i = 0; i < ECLASS_N; i++) {
466       if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
467          BAD("ec2tte_size/ec2tte mismatch(1)");
468       if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
469          BAD("ec2tte_size/ec2tte mismatch(2)");
470       if (sec->ec2tte_used[i] < 0
471           || sec->ec2tte_used[i] > sec->ec2tte_size[i])
472          BAD("implausible ec2tte_used");
473       if (sec->ec2tte_used[i] == 0)
474          continue;
475 
476       /* For each tt reference in each eclass .. ensure the reference
477          is to a valid tt entry, and that the entry's address ranges
478          really include this eclass. */
479 
480       for (j = 0; j < sec->ec2tte_used[i]; j++) {
481          tteno = sec->ec2tte[i][j];
482          if (tteno == EC2TTE_DELETED)
483             continue;
484          if (tteno >= N_TTES_PER_SECTOR)
485             BAD("implausible tteno");
486          tte = &sec->tt[tteno];
487          if (tte->status != InUse)
488             BAD("tteno points to non-inuse tte");
489          if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
490             BAD("tte->n_tte2ec out of range");
491          /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
492             must equal i.  Inspect tte's eclass info. */
493          n = 0;
494          for (k = 0; k < tte->n_tte2ec; k++) {
495             if (k < tte->n_tte2ec-1
496                 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
497                BAD("tte->tte2ec_ec[..] out of order");
498             ec_num = tte->tte2ec_ec[k];
499             if (ec_num < 0 || ec_num >= ECLASS_N)
500                BAD("tte->tte2ec_ec[..] out of range");
501             if (ec_num != i)
502                continue;
503             ec_idx = tte->tte2ec_ix[k];
504             if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
505                BAD("tte->tte2ec_ix[..] out of range");
506             if (ec_idx == j)
507                n++;
508          }
509          if (n != 1)
510             BAD("tteno does not point back at eclass");
511       }
512    }
513 
514    /* That establishes that for each forward pointer from TTEntrys
515       there is a corresponding backward pointer from the eclass[]
516       arrays.  However, it doesn't rule out the possibility of other,
517       bogus pointers in the eclass[] arrays.  So do those similarly:
518       scan through them and check the TTEntryies they point at point
519       back. */
520 
521    for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
522 
523       tte = &sec->tt[i];
524       if (tte->status == Empty || tte->status == Deleted) {
525          if (tte->n_tte2ec != 0)
526             BAD("tte->n_eclasses nonzero for unused tte");
527          continue;
528       }
529 
530       vg_assert(tte->status == InUse);
531 
532       if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
533          BAD("tte->n_eclasses out of range(2)");
534 
535       for (j = 0; j < tte->n_tte2ec; j++) {
536          ec_num = tte->tte2ec_ec[j];
537          if (ec_num < 0 || ec_num >= ECLASS_N)
538             BAD("tte->eclass[..] out of range");
539          ec_idx = tte->tte2ec_ix[j];
540          if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
541             BAD("tte->ec_idx[..] out of range(2)");
542          if (sec->ec2tte[ec_num][ec_idx] != i)
543             BAD("ec2tte does not point back to tte");
544       }
545    }
546 
547    return True;
548 
549   bad:
550    if (whassup)
551       VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
552 
553 #  if 0
554    VG_(printf)("eclass = %d\n", i);
555    VG_(printf)("tteno = %d\n", (Int)tteno);
556    switch (tte->status) {
557       case InUse:   VG_(printf)("InUse\n"); break;
558       case Deleted: VG_(printf)("Deleted\n"); break;
559       case Empty:   VG_(printf)("Empty\n"); break;
560    }
561    if (tte->status != Empty) {
562       for (k = 0; k < tte->vge.n_used; k++)
563          VG_(printf)("0x%llx %d\n", tte->vge.base[k],
564                                     (Int)tte->vge.len[k]);
565    }
566 #  endif
567 
568    return False;
569 
570 #  undef BAD
571 }
572 
573 
574 /* Sanity check absolutely everything.  True == check passed. */
575 
576 /* forwards */
577 static Bool sanity_check_redir_tt_tc ( void );
578 static Bool sanity_check_fastcache ( void );
579 
sanity_check_sector_search_order(void)580 static Bool sanity_check_sector_search_order ( void )
581 {
582    Int i, j, nListed;
583    /* assert the array is the right size */
584    vg_assert(N_SECTORS == (sizeof(sector_search_order)
585                            / sizeof(sector_search_order[0])));
586    /* Check it's of the form  valid_sector_numbers ++ [-1, -1, ..] */
587    for (i = 0; i < N_SECTORS; i++) {
588       if (sector_search_order[i] < 0 || sector_search_order[i] >= N_SECTORS)
589          break;
590    }
591    nListed = i;
592    for (/* */; i < N_SECTORS; i++) {
593       if (sector_search_order[i] != -1)
594          break;
595    }
596    if (i != N_SECTORS)
597       return False;
598    /* Check each sector number only appears once */
599    for (i = 0; i < N_SECTORS; i++) {
600       if (sector_search_order[i] == -1)
601          continue;
602       for (j = i+1; j < N_SECTORS; j++) {
603          if (sector_search_order[j] == sector_search_order[i])
604             return False;
605       }
606    }
607    /* Check that the number of listed sectors equals the number
608       in use, by counting nListed back down. */
609    for (i = 0; i < N_SECTORS; i++) {
610       if (sectors[i].tc != NULL)
611          nListed--;
612    }
613    if (nListed != 0)
614       return False;
615    return True;
616 }
617 
sanity_check_all_sectors(void)618 static Bool sanity_check_all_sectors ( void )
619 {
620    Int     sno;
621    Bool    sane;
622    Sector* sec;
623    for (sno = 0; sno < N_SECTORS; sno++) {
624       sec = &sectors[sno];
625       if (sec->tc == NULL)
626          continue;
627       sane = sanity_check_eclasses_in_sector( sec );
628       if (!sane)
629          return False;
630    }
631    if ( !sanity_check_redir_tt_tc() )
632       return False;
633    if ( !sanity_check_fastcache() )
634       return False;
635    if ( !sanity_check_sector_search_order() )
636       return False;
637    return True;
638 }
639 
640 
641 
642 /*-------------------------------------------------------------*/
643 /*--- Add/find translations                                 ---*/
644 /*-------------------------------------------------------------*/
645 
vge_osize(VexGuestExtents * vge)646 static UInt vge_osize ( VexGuestExtents* vge )
647 {
648    UInt i, n = 0;
649    for (i = 0; i < vge->n_used; i++)
650       n += (UInt)vge->len[i];
651    return n;
652 }
653 
isValidSector(Int sector)654 static Bool isValidSector ( Int sector )
655 {
656    if (sector < 0 || sector >= N_SECTORS)
657       return False;
658    return True;
659 }
660 
HASH_TT(Addr64 key)661 static inline UInt HASH_TT ( Addr64 key )
662 {
663    UInt kHi = (UInt)(key >> 32);
664    UInt kLo = (UInt)key;
665    UInt k32 = kHi ^ kLo;
666    UInt ror = 7;
667    if (ror > 0)
668       k32 = (k32 >> ror) | (k32 << (32-ror));
669    return k32 % N_TTES_PER_SECTOR;
670 }
671 
setFastCacheEntry(Addr64 key,ULong * tcptr,UInt * count)672 static void setFastCacheEntry ( Addr64 key, ULong* tcptr, UInt* count )
673 {
674    UInt cno = (UInt)VG_TT_FAST_HASH(key);
675    VG_(tt_fast)[cno].guest = (Addr)key;
676    VG_(tt_fast)[cno].host  = (Addr)tcptr;
677    if (VG_(clo_profile_flags) > 0)
678       VG_(tt_fastN)[cno] = count;
679    n_fast_updates++;
680    /* This shouldn't fail.  It should be assured by m_translate
681       which should reject any attempt to make translation of code
682       starting at TRANSTAB_BOGUS_GUEST_ADDR. */
683    vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
684 }
685 
686 /* Invalidate the fast cache's counter array, VG_(tt_fastN). */
invalidateFastNCache(void)687 static void invalidateFastNCache ( void )
688 {
689    UInt j;
690    vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
691    for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
692       VG_(tt_fastN)[j+0] = NULL;
693       VG_(tt_fastN)[j+1] = NULL;
694       VG_(tt_fastN)[j+2] = NULL;
695       VG_(tt_fastN)[j+3] = NULL;
696    }
697    vg_assert(j == VG_TT_FAST_SIZE);
698 }
699 
700 /* Invalidate the fast cache VG_(tt_fast).  If profiling, also
701    invalidate the fast cache's counter array VG_(tt_fastN), otherwise
702    don't touch it. */
invalidateFastCache(void)703 static void invalidateFastCache ( void )
704 {
705    UInt j;
706    /* This loop is popular enough to make it worth unrolling a
707       bit, at least on ppc32. */
708    vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
709    for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
710       VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
711       VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
712       VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
713       VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
714    }
715 
716    if (VG_(clo_profile_flags) > 0)
717       invalidateFastNCache();
718 
719    vg_assert(j == VG_TT_FAST_SIZE);
720    n_fast_flushes++;
721 }
722 
sanity_check_fastcache(void)723 static Bool sanity_check_fastcache ( void )
724 {
725    UInt j;
726    if (0) VG_(printf)("sanity check fastcache\n");
727    if (VG_(clo_profile_flags) > 0) {
728       /* profiling */
729       for (j = 0; j < VG_TT_FAST_SIZE; j++) {
730          if (VG_(tt_fastN)[j] == NULL
731              && VG_(tt_fast)[j].guest != TRANSTAB_BOGUS_GUEST_ADDR)
732             return False;
733          if (VG_(tt_fastN)[j] != NULL
734              && VG_(tt_fast)[j].guest == TRANSTAB_BOGUS_GUEST_ADDR)
735             return False;
736       }
737    } else {
738       /* not profiling */
739       for (j = 0; j < VG_TT_FAST_SIZE; j++) {
740          if (VG_(tt_fastN)[j] != NULL)
741             return False;
742       }
743    }
744    return True;
745 }
746 
initialiseSector(Int sno)747 static void initialiseSector ( Int sno )
748 {
749    Int    i;
750    SysRes sres;
751    Sector* sec;
752    vg_assert(isValidSector(sno));
753 
754    { Bool sane = sanity_check_sector_search_order();
755      vg_assert(sane);
756    }
757    sec = &sectors[sno];
758 
759    if (sec->tc == NULL) {
760 
761       /* Sector has never been used before.  Need to allocate tt and
762          tc. */
763       vg_assert(sec->tt == NULL);
764       vg_assert(sec->tc_next == NULL);
765       vg_assert(sec->tt_n_inuse == 0);
766       for (i = 0; i < ECLASS_N; i++) {
767          vg_assert(sec->ec2tte_size[i] == 0);
768          vg_assert(sec->ec2tte_used[i] == 0);
769          vg_assert(sec->ec2tte[i] == NULL);
770       }
771 
772       VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
773 
774       sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
775       if (sr_isError(sres)) {
776          VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
777                                      8 * tc_sector_szQ );
778 	 /*NOTREACHED*/
779       }
780       sec->tc = (ULong*)(AddrH)sr_Res(sres);
781 
782       sres = VG_(am_mmap_anon_float_valgrind)
783                 ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
784       if (sr_isError(sres)) {
785          VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
786                                      N_TTES_PER_SECTOR * sizeof(TTEntry) );
787 	 /*NOTREACHED*/
788       }
789       sec->tt = (TTEntry*)(AddrH)sr_Res(sres);
790 
791       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
792          sec->tt[i].status   = Empty;
793          sec->tt[i].n_tte2ec = 0;
794       }
795 
796       /* Add an entry in the sector_search_order */
797       for (i = 0; i < N_SECTORS; i++) {
798          if (sector_search_order[i] == -1)
799             break;
800       }
801       vg_assert(i >= 0 && i < N_SECTORS);
802       sector_search_order[i] = sno;
803 
804       if (VG_(clo_verbosity) > 2)
805          VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
806 
807    } else {
808 
809       /* Sector has been used before.  Dump the old contents. */
810       VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
811       vg_assert(sec->tt != NULL);
812       vg_assert(sec->tc_next != NULL);
813       n_dump_count += sec->tt_n_inuse;
814 
815       /* Visit each just-about-to-be-abandoned translation. */
816       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
817          if (sec->tt[i].status == InUse) {
818             vg_assert(sec->tt[i].n_tte2ec >= 1);
819             vg_assert(sec->tt[i].n_tte2ec <= 3);
820             n_dump_osize += vge_osize(&sec->tt[i].vge);
821             /* Tell the tool too. */
822             if (VG_(needs).superblock_discards) {
823                VG_TDICT_CALL( tool_discard_superblock_info,
824                               sec->tt[i].entry,
825                               sec->tt[i].vge );
826             }
827          } else {
828             vg_assert(sec->tt[i].n_tte2ec == 0);
829          }
830          sec->tt[i].status   = Empty;
831          sec->tt[i].n_tte2ec = 0;
832       }
833 
834       /* Free up the eclass structures. */
835       for (i = 0; i < ECLASS_N; i++) {
836          if (sec->ec2tte_size[i] == 0) {
837             vg_assert(sec->ec2tte_used[i] == 0);
838             vg_assert(sec->ec2tte[i] == NULL);
839          } else {
840             vg_assert(sec->ec2tte[i] != NULL);
841             VG_(arena_free)(VG_AR_TTAUX, sec->ec2tte[i]);
842             sec->ec2tte[i] = NULL;
843             sec->ec2tte_size[i] = 0;
844             sec->ec2tte_used[i] = 0;
845          }
846       }
847 
848       /* Sanity check: ensure it is already in
849          sector_search_order[]. */
850       for (i = 0; i < N_SECTORS; i++) {
851          if (sector_search_order[i] == sno)
852             break;
853       }
854       vg_assert(i >= 0 && i < N_SECTORS);
855 
856       if (VG_(clo_verbosity) > 2)
857          VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
858    }
859 
860    sec->tc_next = sec->tc;
861    sec->tt_n_inuse = 0;
862 
863    invalidateFastCache();
864 
865    { Bool sane = sanity_check_sector_search_order();
866      vg_assert(sane);
867    }
868 }
869 
invalidate_icache(void * ptr,Int nbytes)870 static void invalidate_icache ( void *ptr, Int nbytes )
871 {
872 #  if defined(VGA_ppc32) || defined(VGA_ppc64)
873    Addr startaddr = (Addr) ptr;
874    Addr endaddr   = startaddr + nbytes;
875    Addr cls;
876    Addr addr;
877    VexArchInfo vai;
878 
879    if (nbytes == 0) return;
880    vg_assert(nbytes > 0);
881 
882    VG_(machine_get_VexArchInfo)( NULL, &vai );
883    cls = vai.ppc_cache_line_szB;
884 
885    /* Stay sane .. */
886    vg_assert(cls == 32 || cls == 64 || cls == 128);
887 
888    startaddr &= ~(cls - 1);
889    for (addr = startaddr; addr < endaddr; addr += cls) {
890       __asm__ __volatile__("dcbst 0,%0" : : "r" (addr));
891    }
892    __asm__ __volatile__("sync");
893    for (addr = startaddr; addr < endaddr; addr += cls) {
894       __asm__ __volatile__("icbi 0,%0" : : "r" (addr));
895    }
896    __asm__ __volatile__("sync; isync");
897 
898 #  elif defined(VGA_x86)
899    /* no need to do anything, hardware provides coherence */
900 
901 #  elif defined(VGA_amd64)
902    /* no need to do anything, hardware provides coherence */
903 
904 #  elif defined(VGP_arm_linux)
905    /* ARM cache flushes are privileged, so we must defer to the kernel. */
906    Addr startaddr = (Addr) ptr;
907    Addr endaddr   = startaddr + nbytes;
908    VG_(do_syscall2)(__NR_ARM_cacheflush, startaddr, endaddr);
909 
910 #  else
911 #    error "Unknown ARCH"
912 #  endif
913 }
914 
915 
916 /* Add a translation of vge to TT/TC.  The translation is temporarily
917    in code[0 .. code_len-1].
918 
919    pre: youngest_sector points to a valid (although possibly full)
920    sector.
921 */
VG_(add_to_transtab)922 void VG_(add_to_transtab)( VexGuestExtents* vge,
923                            Addr64           entry,
924                            AddrH            code,
925                            UInt             code_len,
926                            Bool             is_self_checking )
927 {
928    Int    tcAvailQ, reqdQ, y, i;
929    ULong  *tcptr, *tcptr2;
930    UChar* srcP;
931    UChar* dstP;
932 
933    vg_assert(init_done);
934    vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
935 
936    /* 60000: should agree with N_TMPBUF in m_translate.c. */
937    vg_assert(code_len > 0 && code_len < 60000);
938 
939    if (0)
940       VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n",
941                   entry, code_len);
942 
943    n_in_count++;
944    n_in_tsize += code_len;
945    n_in_osize += vge_osize(vge);
946    if (is_self_checking)
947       n_in_sc_count++;
948 
949    y = youngest_sector;
950    vg_assert(isValidSector(y));
951 
952    if (sectors[y].tc == NULL)
953       initialiseSector(y);
954 
955    /* Try putting the translation in this sector. */
956    reqdQ = (code_len + 7) >> 3;
957 
958    /* Will it fit in tc? */
959    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
960               - ((ULong*)(sectors[y].tc_next));
961    vg_assert(tcAvailQ >= 0);
962    vg_assert(tcAvailQ <= tc_sector_szQ);
963 
964    if (tcAvailQ < reqdQ
965        || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
966       /* No.  So move on to the next sector.  Either it's never been
967          used before, in which case it will get its tt/tc allocated
968          now, or it has been used before, in which case it is set to be
969          empty, hence throwing out the oldest sector. */
970       vg_assert(tc_sector_szQ > 0);
971       VG_(debugLog)(1,"transtab",
972                       "declare sector %d full "
973                       "(TT loading %2d%%, TC loading %2d%%)\n",
974                       y,
975                       (100 * sectors[y].tt_n_inuse)
976                          / N_TTES_PER_SECTOR,
977                       (100 * (tc_sector_szQ - tcAvailQ))
978                          / tc_sector_szQ);
979       youngest_sector++;
980       if (youngest_sector >= N_SECTORS)
981          youngest_sector = 0;
982       y = youngest_sector;
983       initialiseSector(y);
984    }
985 
986    /* Be sure ... */
987    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
988               - ((ULong*)(sectors[y].tc_next));
989    vg_assert(tcAvailQ >= 0);
990    vg_assert(tcAvailQ <= tc_sector_szQ);
991    vg_assert(tcAvailQ >= reqdQ);
992    vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
993    vg_assert(sectors[y].tt_n_inuse >= 0);
994 
995    /* Copy into tc. */
996    tcptr = sectors[y].tc_next;
997    vg_assert(tcptr >= &sectors[y].tc[0]);
998    vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
999 
1000    dstP = (UChar*)tcptr;
1001    srcP = (UChar*)code;
1002    for (i = 0; i < code_len; i++)
1003       dstP[i] = srcP[i];
1004    sectors[y].tc_next += reqdQ;
1005    sectors[y].tt_n_inuse++;
1006 
1007    invalidate_icache( dstP, code_len );
1008 
1009    /* more paranoia */
1010    tcptr2 = sectors[y].tc_next;
1011    vg_assert(tcptr2 >= &sectors[y].tc[0]);
1012    vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
1013 
1014    /* Find an empty tt slot, and use it.  There must be such a slot
1015       since tt is never allowed to get completely full. */
1016    i = HASH_TT(entry);
1017    vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
1018    while (True) {
1019       if (sectors[y].tt[i].status == Empty
1020           || sectors[y].tt[i].status == Deleted)
1021          break;
1022       i++;
1023       if (i >= N_TTES_PER_SECTOR)
1024          i = 0;
1025    }
1026 
1027    sectors[y].tt[i].status = InUse;
1028    sectors[y].tt[i].tcptr  = tcptr;
1029    sectors[y].tt[i].count  = 0;
1030    sectors[y].tt[i].weight = 1;
1031    sectors[y].tt[i].vge    = *vge;
1032    sectors[y].tt[i].entry  = entry;
1033 
1034    /* Update the fast-cache. */
1035    setFastCacheEntry( entry, tcptr, &sectors[y].tt[i].count );
1036 
1037    /* Note the eclass numbers for this translation. */
1038    upd_eclasses_after_add( &sectors[y], i );
1039 }
1040 
1041 
1042 /* Search for the translation of the given guest address.  If
1043    requested, a successful search can also cause the fast-caches to be
1044    updated.
1045 */
VG_(search_transtab)1046 Bool VG_(search_transtab) ( /*OUT*/AddrH* result,
1047                             Addr64        guest_addr,
1048                             Bool          upd_cache )
1049 {
1050    Int i, j, k, kstart, sno;
1051 
1052    vg_assert(init_done);
1053    /* Find the initial probe point just once.  It will be the same in
1054       all sectors and avoids multiple expensive % operations. */
1055    n_full_lookups++;
1056    k      = -1;
1057    kstart = HASH_TT(guest_addr);
1058    vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
1059 
1060    /* Search in all the sectors,using sector_search_order[] as a
1061       heuristic guide as to what order to visit the sectors. */
1062    for (i = 0; i < N_SECTORS; i++) {
1063 
1064       sno = sector_search_order[i];
1065       if (UNLIKELY(sno == -1))
1066          return False; /* run out of sectors to search */
1067 
1068       k = kstart;
1069       for (j = 0; j < N_TTES_PER_SECTOR; j++) {
1070          n_lookup_probes++;
1071          if (sectors[sno].tt[k].status == InUse
1072              && sectors[sno].tt[k].entry == guest_addr) {
1073             /* found it */
1074             if (upd_cache)
1075                setFastCacheEntry(
1076                   guest_addr, sectors[sno].tt[k].tcptr,
1077                               &sectors[sno].tt[k].count );
1078             if (result)
1079                *result = (AddrH)sectors[sno].tt[k].tcptr;
1080             /* pull this one one step closer to the front.  For large
1081                apps this more or less halves the number of required
1082                probes. */
1083             if (i > 0) {
1084                Int tmp = sector_search_order[i-1];
1085                sector_search_order[i-1] = sector_search_order[i];
1086                sector_search_order[i] = tmp;
1087             }
1088             return True;
1089          }
1090          if (sectors[sno].tt[k].status == Empty)
1091             break; /* not found in this sector */
1092          k++;
1093          if (k == N_TTES_PER_SECTOR)
1094             k = 0;
1095       }
1096 
1097       /* If we fall off the end, all entries are InUse and not
1098          matching, or Deleted.  In any case we did not find it in this
1099          sector. */
1100    }
1101 
1102    /* Not found in any sector. */
1103    return False;
1104 }
1105 
1106 
1107 /*-------------------------------------------------------------*/
1108 /*--- Delete translations.                                  ---*/
1109 /*-------------------------------------------------------------*/
1110 
1111 /* forward */
1112 static void unredir_discard_translations( Addr64, ULong );
1113 
1114 /* Stuff for deleting translations which intersect with a given
1115    address range.  Unfortunately, to make this run at a reasonable
1116    speed, it is complex. */
1117 
1118 static inline
overlap1(Addr64 s1,ULong r1,Addr64 s2,ULong r2)1119 Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
1120 {
1121    Addr64 e1 = s1 + r1 - 1ULL;
1122    Addr64 e2 = s2 + r2 - 1ULL;
1123    if (e1 < s2 || e2 < s1)
1124       return False;
1125    return True;
1126 }
1127 
1128 static inline
overlaps(Addr64 start,ULong range,VexGuestExtents * vge)1129 Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
1130 {
1131    if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
1132       return True;
1133    if (vge->n_used < 2)
1134       return False;
1135    if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
1136       return True;
1137    if (vge->n_used < 3)
1138       return False;
1139    if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
1140       return True;
1141    return False;
1142 }
1143 
1144 
1145 /* Delete a tt entry, and update all the eclass data accordingly. */
1146 
delete_tte(Sector * sec,Int tteno)1147 static void delete_tte ( /*MOD*/Sector* sec, Int tteno )
1148 {
1149    Int      i, ec_num, ec_idx;
1150    TTEntry* tte;
1151 
1152    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1153    tte = &sec->tt[tteno];
1154    vg_assert(tte->status == InUse);
1155    vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1156 
1157    /* Deal with the ec-to-tte links first. */
1158    for (i = 0; i < tte->n_tte2ec; i++) {
1159       ec_num = (Int)tte->tte2ec_ec[i];
1160       ec_idx = tte->tte2ec_ix[i];
1161       vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1162       vg_assert(ec_idx >= 0);
1163       vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1164       /* Assert that the two links point at each other. */
1165       vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
1166       /* "delete" the pointer back to here. */
1167       sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1168    }
1169 
1170    /* Now fix up this TTEntry. */
1171    tte->status   = Deleted;
1172    tte->n_tte2ec = 0;
1173 
1174    /* Stats .. */
1175    sec->tt_n_inuse--;
1176    n_disc_count++;
1177    n_disc_osize += vge_osize(&tte->vge);
1178 
1179    /* Tell the tool too. */
1180    if (VG_(needs).superblock_discards) {
1181       VG_TDICT_CALL( tool_discard_superblock_info,
1182                      tte->entry,
1183                      tte->vge );
1184    }
1185 }
1186 
1187 
1188 /* Delete translations from sec which intersect specified range, but
1189    only consider translations in the specified eclass. */
1190 
1191 static
delete_translations_in_sector_eclass(Sector * sec,Addr64 guest_start,ULong range,Int ec)1192 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec,
1193                                             Addr64 guest_start, ULong range,
1194                                             Int ec )
1195 {
1196    Int      i;
1197    UShort   tteno;
1198    Bool     anyDeld = False;
1199    TTEntry* tte;
1200 
1201    vg_assert(ec >= 0 && ec < ECLASS_N);
1202 
1203    for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1204 
1205       tteno = sec->ec2tte[ec][i];
1206       if (tteno == EC2TTE_DELETED) {
1207          /* already deleted */
1208          continue;
1209       }
1210 
1211       vg_assert(tteno < N_TTES_PER_SECTOR);
1212 
1213       tte = &sec->tt[tteno];
1214       vg_assert(tte->status == InUse);
1215 
1216       if (overlaps( guest_start, range, &tte->vge )) {
1217          anyDeld = True;
1218          delete_tte( sec, (Int)tteno );
1219       }
1220 
1221    }
1222 
1223    return anyDeld;
1224 }
1225 
1226 
1227 /* Delete translations from sec which intersect specified range, the
1228    slow way, by inspecting all translations in sec. */
1229 
1230 static
delete_translations_in_sector(Sector * sec,Addr64 guest_start,ULong range)1231 Bool delete_translations_in_sector ( /*MOD*/Sector* sec,
1232                                      Addr64 guest_start, ULong range )
1233 {
1234    Int  i;
1235    Bool anyDeld = False;
1236 
1237    for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1238       if (sec->tt[i].status == InUse
1239           && overlaps( guest_start, range, &sec->tt[i].vge )) {
1240          anyDeld = True;
1241          delete_tte( sec, i );
1242       }
1243    }
1244 
1245    return anyDeld;
1246 }
1247 
1248 
VG_(discard_translations)1249 void VG_(discard_translations) ( Addr64 guest_start, ULong range,
1250                                  HChar* who )
1251 {
1252    Sector* sec;
1253    Int     sno, ec;
1254    Bool    anyDeleted = False;
1255 
1256    vg_assert(init_done);
1257 
1258    VG_(debugLog)(2, "transtab",
1259                     "discard_translations(0x%llx, %lld) req by %s\n",
1260                     guest_start, range, who );
1261 
1262    /* Pre-deletion sanity check */
1263    if (VG_(clo_sanity_level >= 4)) {
1264       Bool sane = sanity_check_all_sectors();
1265       vg_assert(sane);
1266    }
1267 
1268    if (range == 0)
1269       return;
1270 
1271    /* There are two different ways to do this.
1272 
1273       If the range fits within a single address-range equivalence
1274       class, as will be the case for a cache line sized invalidation,
1275       then we only have to inspect the set of translations listed in
1276       that equivalence class, and also in the "sin-bin" equivalence
1277       class ECLASS_MISC.
1278 
1279       Otherwise, the invalidation is of a larger range and probably
1280       results from munmap.  In this case it's (probably!) faster just
1281       to inspect all translations, dump those we don't want, and
1282       regenerate the equivalence class information (since modifying it
1283       in-situ is even more expensive).
1284    */
1285 
1286    /* First off, figure out if the range falls within a single class,
1287       and if so which one. */
1288 
1289    ec = ECLASS_MISC;
1290    if (range < (1ULL << ECLASS_SHIFT))
1291       ec = range_to_eclass( guest_start, (UInt)range );
1292 
1293    /* if ec is ECLASS_MISC then we aren't looking at just a single
1294       class, so use the slow scheme.  Else use the fast scheme,
1295       examining 'ec' and ECLASS_MISC. */
1296 
1297    if (ec != ECLASS_MISC) {
1298 
1299       VG_(debugLog)(2, "transtab",
1300                        "                    FAST, ec = %d\n", ec);
1301 
1302       /* Fast scheme */
1303       vg_assert(ec >= 0 && ec < ECLASS_MISC);
1304 
1305       for (sno = 0; sno < N_SECTORS; sno++) {
1306          sec = &sectors[sno];
1307          if (sec->tc == NULL)
1308             continue;
1309          anyDeleted |= delete_translations_in_sector_eclass(
1310                          sec, guest_start, range, ec );
1311          anyDeleted |= delete_translations_in_sector_eclass(
1312                          sec, guest_start, range, ECLASS_MISC );
1313       }
1314 
1315    } else {
1316 
1317       /* slow scheme */
1318 
1319       VG_(debugLog)(2, "transtab",
1320                        "                    SLOW, ec = %d\n", ec);
1321 
1322       for (sno = 0; sno < N_SECTORS; sno++) {
1323          sec = &sectors[sno];
1324          if (sec->tc == NULL)
1325             continue;
1326          anyDeleted |= delete_translations_in_sector(
1327                          sec, guest_start, range );
1328       }
1329 
1330    }
1331 
1332    if (anyDeleted)
1333       invalidateFastCache();
1334 
1335    /* don't forget the no-redir cache */
1336    unredir_discard_translations( guest_start, range );
1337 
1338    /* Post-deletion sanity check */
1339    if (VG_(clo_sanity_level >= 4)) {
1340       Int      i;
1341       TTEntry* tte;
1342       Bool     sane = sanity_check_all_sectors();
1343       vg_assert(sane);
1344       /* But now, also check the requested address range isn't
1345          present anywhere. */
1346       for (sno = 0; sno < N_SECTORS; sno++) {
1347          sec = &sectors[sno];
1348          if (sec->tc == NULL)
1349             continue;
1350          for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1351             tte = &sec->tt[i];
1352             if (tte->status != InUse)
1353                continue;
1354             vg_assert(!overlaps( guest_start, range, &tte->vge ));
1355          }
1356       }
1357    }
1358 }
1359 
1360 
1361 /*------------------------------------------------------------*/
1362 /*--- AUXILIARY: the unredirected TT/TC                    ---*/
1363 /*------------------------------------------------------------*/
1364 
1365 /* A very simple translation cache which holds a small number of
1366    unredirected translations.  This is completely independent of the
1367    main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
1368    both structures are simply dumped and we start over.
1369 
1370    Since these translations are unredirected, the search key is (by
1371    definition) the first address entry in the .vge field. */
1372 
1373 /* Sized to hold 500 translations of average size 1000 bytes. */
1374 
1375 #define UNREDIR_SZB   1000
1376 
1377 #define N_UNREDIR_TT  500
1378 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
1379 
1380 typedef
1381    struct {
1382       VexGuestExtents vge;
1383       Addr            hcode;
1384       Bool            inUse;
1385    }
1386    UTCEntry;
1387 
1388 /* We just allocate forwards in _tc, never deleting. */
1389 static ULong    *unredir_tc;
1390 static Int      unredir_tc_used = N_UNREDIR_TCQ;
1391 
1392 /* Slots in _tt can come into use and out again (.inUse).
1393    Nevertheless _tt_highwater is maintained so that invalidations
1394    don't have to scan all the slots when only a few are in use.
1395    _tt_highwater holds the index of the highest ever allocated
1396    slot. */
1397 static UTCEntry unredir_tt[N_UNREDIR_TT];
1398 static Int      unredir_tt_highwater;
1399 
1400 
init_unredir_tt_tc(void)1401 static void init_unredir_tt_tc ( void )
1402 {
1403    Int i;
1404    if (unredir_tc == NULL) {
1405       SysRes sres = VG_(am_mmap_anon_float_valgrind)
1406                        ( N_UNREDIR_TT * UNREDIR_SZB );
1407       if (sr_isError(sres)) {
1408          VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
1409                                      N_UNREDIR_TT * UNREDIR_SZB);
1410          /*NOTREACHED*/
1411       }
1412       unredir_tc = (ULong *)(AddrH)sr_Res(sres);
1413    }
1414    unredir_tc_used = 0;
1415    for (i = 0; i < N_UNREDIR_TT; i++)
1416       unredir_tt[i].inUse = False;
1417    unredir_tt_highwater = -1;
1418 }
1419 
1420 /* Do a sanity check; return False on failure. */
sanity_check_redir_tt_tc(void)1421 static Bool sanity_check_redir_tt_tc ( void )
1422 {
1423    Int i;
1424    if (unredir_tt_highwater < -1) return False;
1425    if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
1426 
1427    for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
1428       if (unredir_tt[i].inUse)
1429          return False;
1430 
1431    if (unredir_tc_used < 0) return False;
1432    if (unredir_tc_used > N_UNREDIR_TCQ) return False;
1433 
1434    return True;
1435 }
1436 
1437 
1438 /* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
1439    is temporarily in code[0 .. code_len-1].
1440 */
VG_(add_to_unredir_transtab)1441 void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
1442                                    Addr64           entry,
1443                                    AddrH            code,
1444                                    UInt             code_len )
1445 {
1446    Int   i, j, code_szQ;
1447    HChar *srcP, *dstP;
1448 
1449    vg_assert(sanity_check_redir_tt_tc());
1450 
1451    /* This is the whole point: it's not redirected! */
1452    vg_assert(entry == vge->base[0]);
1453 
1454    /* How many unredir_tt slots are needed */
1455    code_szQ = (code_len + 7) / 8;
1456 
1457    /* Look for an empty unredir_tc slot */
1458    for (i = 0; i < N_UNREDIR_TT; i++)
1459       if (!unredir_tt[i].inUse)
1460          break;
1461 
1462    if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
1463       /* It's full; dump everything we currently have */
1464       init_unredir_tt_tc();
1465       i = 0;
1466    }
1467 
1468    vg_assert(unredir_tc_used >= 0);
1469    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
1470    vg_assert(code_szQ > 0);
1471    vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
1472    vg_assert(i >= 0 && i < N_UNREDIR_TT);
1473    vg_assert(unredir_tt[i].inUse == False);
1474 
1475    if (i > unredir_tt_highwater)
1476       unredir_tt_highwater = i;
1477 
1478    dstP = (HChar*)&unredir_tc[unredir_tc_used];
1479    srcP = (HChar*)code;
1480    for (j = 0; j < code_len; j++)
1481       dstP[j] = srcP[j];
1482 
1483    invalidate_icache( dstP, code_len );
1484 
1485    unredir_tt[i].inUse = True;
1486    unredir_tt[i].vge   = *vge;
1487    unredir_tt[i].hcode = (Addr)dstP;
1488 
1489    unredir_tc_used += code_szQ;
1490    vg_assert(unredir_tc_used >= 0);
1491    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
1492 
1493    vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
1494 }
1495 
VG_(search_unredir_transtab)1496 Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
1497                                     Addr64        guest_addr )
1498 {
1499    Int i;
1500    for (i = 0; i < N_UNREDIR_TT; i++) {
1501       if (!unredir_tt[i].inUse)
1502          continue;
1503       if (unredir_tt[i].vge.base[0] == guest_addr) {
1504          *result = (AddrH)unredir_tt[i].hcode;
1505          return True;
1506       }
1507    }
1508    return False;
1509 }
1510 
unredir_discard_translations(Addr64 guest_start,ULong range)1511 static void unredir_discard_translations( Addr64 guest_start, ULong range )
1512 {
1513    Int i;
1514 
1515    vg_assert(sanity_check_redir_tt_tc());
1516 
1517    for (i = 0; i <= unredir_tt_highwater; i++) {
1518       if (unredir_tt[i].inUse
1519           && overlaps( guest_start, range, &unredir_tt[i].vge))
1520          unredir_tt[i].inUse = False;
1521    }
1522 }
1523 
1524 
1525 /*------------------------------------------------------------*/
1526 /*--- Initialisation.                                      ---*/
1527 /*------------------------------------------------------------*/
1528 
VG_(init_tt_tc)1529 void VG_(init_tt_tc) ( void )
1530 {
1531    Int i, j, avg_codeszQ;
1532 
1533    vg_assert(!init_done);
1534    init_done = True;
1535 
1536    /* Otherwise lots of things go wrong... */
1537    vg_assert(sizeof(ULong) == 8);
1538    vg_assert(sizeof(Addr64) == 8);
1539    /* check fast cache entries really are 2 words long */
1540    vg_assert(sizeof(Addr) == sizeof(void*));
1541    vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
1542    /* check fast cache entries are packed back-to-back with no spaces */
1543    vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
1544    /* check fast cache is aligned as we requested.  Not fatal if it
1545       isn't, but we might as well make sure. */
1546    vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
1547 
1548    if (VG_(clo_verbosity) > 2)
1549       VG_(message)(Vg_DebugMsg,
1550                    "TT/TC: VG_(init_tt_tc) "
1551                    "(startup of code management)\n");
1552 
1553    /* Figure out how big each tc area should be.  */
1554    avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
1555    tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
1556 
1557    /* Ensure the calculated value is not way crazy. */
1558    vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
1559    vg_assert(tc_sector_szQ <= 80 * N_TTES_PER_SECTOR_USABLE);
1560 
1561    /* Initialise the sectors */
1562    youngest_sector = 0;
1563    for (i = 0; i < N_SECTORS; i++) {
1564       sectors[i].tc = NULL;
1565       sectors[i].tt = NULL;
1566       sectors[i].tc_next = NULL;
1567       sectors[i].tt_n_inuse = 0;
1568       for (j = 0; j < ECLASS_N; j++) {
1569          sectors[i].ec2tte_size[j] = 0;
1570          sectors[i].ec2tte_used[j] = 0;
1571          sectors[i].ec2tte[j] = NULL;
1572       }
1573    }
1574 
1575    /* Initialise the sector_search_order hint table. */
1576    for (i = 0; i < N_SECTORS; i++)
1577       sector_search_order[i] = -1;
1578 
1579    /* Initialise the fast caches.  If not profiling (the usual case),
1580       we have to explicitly invalidate the fastN cache as
1581       invalidateFastCache() won't do that for us. */
1582    invalidateFastCache();
1583    if (VG_(clo_profile_flags) == 0)
1584       invalidateFastNCache();
1585 
1586    /* and the unredir tt/tc */
1587    init_unredir_tt_tc();
1588 
1589    if (VG_(clo_verbosity) > 2) {
1590       VG_(message)(Vg_DebugMsg,
1591          "TT/TC: cache: %d sectors of %d bytes each = %d total\n",
1592           N_SECTORS, 8 * tc_sector_szQ,
1593           N_SECTORS * 8 * tc_sector_szQ );
1594       VG_(message)(Vg_DebugMsg,
1595          "TT/TC: table: %d total entries, max occupancy %d (%d%%)\n",
1596          N_SECTORS * N_TTES_PER_SECTOR,
1597          N_SECTORS * N_TTES_PER_SECTOR_USABLE,
1598          SECTOR_TT_LIMIT_PERCENT );
1599    }
1600 
1601    VG_(debugLog)(2, "transtab",
1602       "cache: %d sectors of %d bytes each = %d total\n",
1603        N_SECTORS, 8 * tc_sector_szQ,
1604        N_SECTORS * 8 * tc_sector_szQ );
1605    VG_(debugLog)(2, "transtab",
1606       "table: %d total entries, max occupancy %d (%d%%)\n",
1607       N_SECTORS * N_TTES_PER_SECTOR,
1608       N_SECTORS * N_TTES_PER_SECTOR_USABLE,
1609       SECTOR_TT_LIMIT_PERCENT );
1610 }
1611 
1612 
1613 /*------------------------------------------------------------*/
1614 /*--- Printing out statistics.                             ---*/
1615 /*------------------------------------------------------------*/
1616 
safe_idiv(ULong a,ULong b)1617 static ULong safe_idiv( ULong a, ULong b )
1618 {
1619    return (b == 0 ? 0 : a / b);
1620 }
1621 
VG_(get_bbs_translated)1622 UInt VG_(get_bbs_translated) ( void )
1623 {
1624    return n_in_count;
1625 }
1626 
VG_(print_tt_tc_stats)1627 void VG_(print_tt_tc_stats) ( void )
1628 {
1629    VG_(message)(Vg_DebugMsg,
1630       "    tt/tc: %'llu tt lookups requiring %'llu probes\n",
1631       n_full_lookups, n_lookup_probes );
1632    VG_(message)(Vg_DebugMsg,
1633       "    tt/tc: %'llu fast-cache updates, %'llu flushes\n",
1634       n_fast_updates, n_fast_flushes );
1635 
1636    VG_(message)(Vg_DebugMsg,
1637                 " transtab: new        %'lld "
1638                 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n",
1639                 n_in_count, n_in_osize, n_in_tsize,
1640                 safe_idiv(10*n_in_tsize, n_in_osize),
1641                 n_in_sc_count);
1642    VG_(message)(Vg_DebugMsg,
1643                 " transtab: dumped     %'llu (%'llu -> ?" "?)\n",
1644                 n_dump_count, n_dump_osize );
1645    VG_(message)(Vg_DebugMsg,
1646                 " transtab: discarded  %'llu (%'llu -> ?" "?)\n",
1647                 n_disc_count, n_disc_osize );
1648 
1649    if (0) {
1650       Int i;
1651       VG_(printf)("\n");
1652       for (i = 0; i < ECLASS_N; i++) {
1653          VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
1654          if (i % 16 == 15)
1655             VG_(printf)("\n");
1656       }
1657       VG_(printf)("\n\n");
1658    }
1659 }
1660 
1661 /*------------------------------------------------------------*/
1662 /*--- Printing out of profiling results.                   ---*/
1663 /*------------------------------------------------------------*/
1664 
score(TTEntry * tte)1665 static ULong score ( TTEntry* tte )
1666 {
1667    return ((ULong)tte->weight) * ((ULong)tte->count);
1668 }
1669 
VG_(get_BB_profile)1670 ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops )
1671 {
1672    Int   sno, i, r, s;
1673    ULong score_total;
1674 
1675    /* First, compute the total weighted count, and find the top N
1676       ttes.  tops contains pointers to the most-used n_tops blocks, in
1677       descending order (viz, tops[0] is the highest scorer). */
1678    for (i = 0; i < n_tops; i++) {
1679       tops[i].addr  = 0;
1680       tops[i].score = 0;
1681    }
1682 
1683    score_total = 0;
1684 
1685    for (sno = 0; sno < N_SECTORS; sno++) {
1686       if (sectors[sno].tc == NULL)
1687          continue;
1688       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1689          if (sectors[sno].tt[i].status != InUse)
1690             continue;
1691          score_total += score(&sectors[sno].tt[i]);
1692          /* Find the rank for sectors[sno].tt[i]. */
1693          r = n_tops-1;
1694          while (True) {
1695             if (r == -1)
1696                break;
1697              if (tops[r].addr == 0) {
1698                r--;
1699                continue;
1700              }
1701              if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
1702                 r--;
1703                 continue;
1704              }
1705              break;
1706          }
1707          r++;
1708          vg_assert(r >= 0 && r <= n_tops);
1709          /* This bb should be placed at r, and bbs above it shifted
1710             upwards one slot. */
1711          if (r < n_tops) {
1712             for (s = n_tops-1; s > r; s--)
1713                tops[s] = tops[s-1];
1714             tops[r].addr  = sectors[sno].tt[i].entry;
1715             tops[r].score = score( &sectors[sno].tt[i] );
1716          }
1717       }
1718    }
1719 
1720    return score_total;
1721 }
1722 
1723 /*--------------------------------------------------------------------*/
1724 /*--- end                                                          ---*/
1725 /*--------------------------------------------------------------------*/
1726