• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Management of the translation table and cache.               ---*/
4 /*---                                                 m_transtab.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of Valgrind, a dynamic binary instrumentation
9    framework.
10 
11    Copyright (C) 2000-2012 Julian Seward
12       jseward@acm.org
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27    02111-1307, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 */
31 
32 #include "pub_core_basics.h"
33 #include "pub_core_debuglog.h"
34 #include "pub_core_machine.h"    // For VG_(machine_get_VexArchInfo)
35 #include "pub_core_libcbase.h"
36 #include "pub_core_vki.h"        // to keep pub_core_libproc.h happy, sigh
37 #include "pub_core_libcproc.h"   // VG_(invalidate_icache)
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcprint.h"
40 #include "pub_core_options.h"
41 #include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
42 #include "pub_core_transtab.h"
43 #include "pub_core_aspacemgr.h"
44 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
45 #include "pub_core_xarray.h"
46 #include "pub_core_dispatch.h"   // For VG_(disp_cp*) addresses
47 
48 
49 /* #define DEBUG_TRANSTAB */
50 
51 
52 /*-------------------------------------------------------------*/
53 /*--- Management of the FIFO-based translation table+cache. ---*/
54 /*-------------------------------------------------------------*/
55 
56 /*------------------ CONSTANTS ------------------*/
57 
58 /* Number of sectors the TC is divided into.  If you need a larger
59    overall translation cache, increase this value. */
60 #define N_SECTORS 8
61 
62 /* Number of TC entries in each sector.  This needs to be a prime
63    number to work properly, it must be <= 65535 (so that a TT index
64    fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
65    'deleted') and it is strongly recommended not to change this.
66    65521 is the largest prime <= 65535. */
67 #define N_TTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
68 
69 /* Because each sector contains a hash table of TTEntries, we need to
70    specify the maximum allowable loading, after which the sector is
71    deemed full. */
72 #define SECTOR_TT_LIMIT_PERCENT 65
73 
74 /* The sector is deemed full when this many entries are in it. */
75 #define N_TTES_PER_SECTOR_USABLE \
76            ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
77 
78 /* Equivalence classes for fast address range deletion.  There are 1 +
79    2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
80    address range which does not fall cleanly within any specific bin.
81    Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
82 #define ECLASS_SHIFT 11
83 #define ECLASS_WIDTH 8
84 #define ECLASS_MISC  (1 << ECLASS_WIDTH)
85 #define ECLASS_N     (1 + ECLASS_MISC)
86 
87 #define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
88 
89 
90 /*------------------ TYPES ------------------*/
91 
92 /* In edges ("to-me") in the graph created by chaining. */
93 typedef
94    struct {
95       UInt from_sNo;   /* sector number */
96       UInt from_tteNo; /* TTE number in given sector */
97       UInt from_offs;  /* code offset from TCEntry::tcptr where the patch is */
98       Bool to_fastEP;  /* Is the patch to a fast or slow entry point? */
99    }
100    InEdge;
101 
102 
103 /* Out edges ("from-me") in the graph created by chaining. */
104 typedef
105    struct {
106       UInt to_sNo;    /* sector number */
107       UInt to_tteNo;  /* TTE number in given sector */
108       UInt from_offs; /* code offset in owning translation where patch is */
109    }
110    OutEdge;
111 
112 
113 #define N_FIXED_IN_EDGE_ARR 3
114 typedef
115    struct {
116       UInt     n_fixed; /* 0 .. N_FIXED_IN_EDGE_ARR */
117       InEdge   fixed[N_FIXED_IN_EDGE_ARR];
118       XArray*  var; /* XArray* of InEdgeArr */
119    }
120    InEdgeArr;
121 
122 #define N_FIXED_OUT_EDGE_ARR 2
123 typedef
124    struct {
125       UInt    n_fixed; /* 0 .. N_FIXED_OUT_EDGE_ARR */
126       OutEdge fixed[N_FIXED_OUT_EDGE_ARR];
127       XArray* var; /* XArray* of OutEdgeArr */
128    }
129    OutEdgeArr;
130 
131 
132 /* A translation-table entry.  This indicates precisely which areas of
133    guest code are included in the translation, and contains all other
134    auxiliary info too.  */
135 typedef
136    struct {
137       /* Profiling only: the count and weight (arbitrary meaning) for
138          this translation.  Weight is a property of the translation
139          itself and computed once when the translation is created.
140          Count is an entry count for the translation and is
141          incremented by 1 every time the translation is used, if we
142          are profiling. */
143       ULong    count;
144       UShort   weight;
145 
146       /* Status of the slot.  Note, we need to be able to do lazy
147          deletion, hence the Deleted state. */
148       enum { InUse, Deleted, Empty } status;
149 
150       /* 64-bit aligned pointer to one or more 64-bit words containing
151          the corresponding host code (must be in the same sector!)
152          This is a pointer into the sector's tc (code) area. */
153       ULong* tcptr;
154 
155       /* This is the original guest address that purportedly is the
156          entry point of the translation.  You might think that .entry
157          should be the same as .vge->base[0], and most of the time it
158          is.  However, when doing redirections, that is not the case.
159          .vge must always correctly describe the guest code sections
160          from which this translation was made.  However, .entry may or
161          may not be a lie, depending on whether or not we're doing
162          redirection. */
163       Addr64 entry;
164 
165       /* This structure describes precisely what ranges of guest code
166          the translation covers, so we can decide whether or not to
167          delete it when translations of a given address range are
168          invalidated. */
169       VexGuestExtents vge;
170 
171       /* Address range summary info: these are pointers back to
172          eclass[] entries in the containing Sector.  Those entries in
173          turn point back here -- the two structures are mutually
174          redundant but both necessary to make fast deletions work.
175          The eclass info is similar to, and derived from, this entry's
176          'vge' field, but it is not the same */
177       UShort n_tte2ec;      // # tte2ec pointers (1 to 3)
178       UShort tte2ec_ec[3];  // for each, the eclass #
179       UInt   tte2ec_ix[3];  // and the index within the eclass.
180       // for i in 0 .. n_tte2ec-1
181       //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
182       // should be the index
183       // of this TTEntry in the containing Sector's tt array.
184 
185       /* Admin information for chaining.  'in_edges' is a set of the
186          patch points which jump to this translation -- hence are
187          predecessors in the control flow graph.  'out_edges' points
188          to successors in the control flow graph -- translations to
189          which this one has a patched jump.  In short these are just
190          backwards and forwards edges in the graph of patched-together
191          blocks.  The 'in_edges' contain slightly more info, enough
192          that we can undo the chaining of each mentioned patch point.
193          The 'out_edges' list exists only so that we can visit the
194          'in_edges' entries of all blocks we're patched through to, in
195          order to remove ourselves from then when we're deleted. */
196 
197       /* A translation can disappear for two reasons:
198           1. erased (as part of the oldest sector cleanup) when the
199              youngest sector is full.
200           2. discarded due to calls to VG_(discard_translations).
201              VG_(discard_translations) sets the status of the
202              translation to 'Deleted'.
203              A.o., the gdbserver discards one or more translations
204              when a breakpoint is inserted or removed at an Addr,
205              or when single stepping mode is enabled/disabled
206              or when a translation is instrumented for gdbserver
207              (all the target jumps of this translation are
208               invalidated).
209 
210          So, it is possible that the translation A to be patched
211          (to obtain a patched jump from A to B) is invalidated
212          after B is translated and before A is patched.
213          In case a translation is erased or discarded, the patching
214          cannot be done.  VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
215          are checking the 'from' translation still exists before
216          doing the patching.
217 
218          Is it safe to erase or discard the current translation E being
219          executed ? Amazing, but yes, it is safe.
220          Here is the explanation:
221 
222          The translation E being executed can only be erased if a new
223          translation N is being done. A new translation is done only
224          if the host addr is a not yet patched jump to another
225          translation. In such a case, the guest address of N is
226          assigned to the PC in the VEX state. Control is returned
227          to the scheduler. N will be translated. This can erase the
228          translation E (in case of sector full). VG_(tt_tc_do_chaining)
229          will not do the chaining to a non found translation E.
230          The execution will continue at the current guest PC
231          (i.e. the translation N).
232          => it is safe to erase the current translation being executed.
233 
234          The current translation E being executed can also be discarded
235          (e.g. by gdbserver). VG_(discard_translations) will mark
236          this translation E as Deleted, but the translation itself
237          is not erased. In particular, its host code can only
238          be overwritten or erased in case a new translation is done.
239          A new translation will only be done if a not yet translated
240          jump is to be executed. The execution of the Deleted translation
241          E will continue till a non patched jump is encountered.
242          This situation is then similar to the 'erasing' case above :
243          the current translation E can be erased or overwritten, as the
244          execution will continue at the new translation N.
245 
246       */
247 
248       /* It is possible, although very unlikely, that a block A has
249          more than one patched jump to block B.  This could happen if
250          (eg) A finishes "jcond B; jmp B".
251 
252          This means in turn that B's in_edges set can list A more than
253          once (twice in this example).  However, each such entry must
254          have a different from_offs, since a patched jump can only
255          jump to one place at once (it's meaningless for it to have
256          multiple destinations.)  IOW, the successor and predecessor
257          edges in the graph are not uniquely determined by a
258          TTEntry --> TTEntry pair, but rather by a
259          (TTEntry,offset) --> TTEntry triple.
260 
261          If A has multiple edges to B then B will mention A multiple
262          times in its in_edges.  To make things simpler, we then
263          require that A mentions B exactly the same number of times in
264          its out_edges.  Furthermore, a matching out-in pair must have
265          the same offset (from_offs).  This facilitates sanity
266          checking, and it facilitates establishing the invariant that
267          a out_edges set may not have duplicates when using the
268          equality defined by (TTEntry,offset).  Hence the out_edges
269          and in_edges sets really do have both have set semantics.
270 
271          eg if  A has been patched to B at offsets 42 and 87 (in A)
272          then   A.out_edges = { (B,42), (B,87) }   (in any order)
273          and    B.in_edges  = { (A,42), (A,87) }   (in any order)
274 
275          Hence for each node pair P->Q in the graph, there's a 1:1
276          mapping between P.out_edges and Q.in_edges.
277       */
278       InEdgeArr  in_edges;
279       OutEdgeArr out_edges;
280    }
281    TTEntry;
282 
283 
284 /* A structure used for mapping host code addresses back to the
285    relevant TTEntry.  Used when doing chaining, for finding the
286    TTEntry to which some arbitrary patch address belongs. */
287 typedef
288    struct {
289       UChar* start;
290       UInt   len;
291       UInt   tteNo;
292    }
293    HostExtent;
294 
295 /* Finally, a sector itself.  Each sector contains an array of
296    TCEntries, which hold code, and an array of TTEntries, containing
297    all required administrative info.  Profiling is supported using the
298    TTEntry .count and .weight fields, if required.
299 
300    If the sector is not in use, all three pointers are NULL and
301    tt_n_inuse is zero.
302 */
303 typedef
304    struct {
305       /* The TCEntry area.  Size of this depends on the average
306          translation size.  We try and size it so it becomes full
307          precisely when this sector's translation table (tt) reaches
308          its load limit (SECTOR_TT_LIMIT_PERCENT). */
309       ULong* tc;
310 
311       /* The TTEntry array.  This is a fixed size, always containing
312          exactly N_TTES_PER_SECTOR entries. */
313       TTEntry* tt;
314 
315       /* This points to the current allocation point in tc. */
316       ULong* tc_next;
317 
318       /* The count of tt entries with state InUse. */
319       Int tt_n_inuse;
320 
321       /* Expandable arrays of tt indices for each of the ECLASS_N
322          address range equivalence classes.  These hold indices into
323          the containing sector's tt array, which in turn should point
324          back here. */
325       Int     ec2tte_size[ECLASS_N];
326       Int     ec2tte_used[ECLASS_N];
327       UShort* ec2tte[ECLASS_N];
328 
329       /* The host extents.  The [start, +len) ranges are constructed
330          in strictly non-overlapping order, so we can binary search
331          them at any time. */
332       XArray* host_extents; /* XArray* of HostExtent */
333    }
334    Sector;
335 
336 
337 /*------------------ DECLS ------------------*/
338 
339 /* The root data structure is an array of sectors.  The index of the
340    youngest sector is recorded, and new translations are put into that
341    sector.  When it fills up, we move along to the next sector and
342    start to fill that up, wrapping around at the end of the array.
343    That way, once all N_TC_SECTORS have been bought into use for the
344    first time, and are full, we then re-use the oldest sector,
345    endlessly.
346 
347    When running, youngest sector should be between >= 0 and <
348    N_TC_SECTORS.  The initial -1 value indicates the TT/TC system is
349    not yet initialised.
350 */
351 static Sector sectors[N_SECTORS];
352 static Int    youngest_sector = -1;
353 
354 /* The number of ULongs in each TCEntry area.  This is computed once
355    at startup and does not change. */
356 static Int    tc_sector_szQ;
357 
358 
359 /* A list of sector numbers, in the order which they should be
360    searched to find translations.  This is an optimisation to be used
361    when searching for translations and should not affect
362    correctness.  -1 denotes "no entry". */
363 static Int sector_search_order[N_SECTORS];
364 
365 
366 /* Fast helper for the TC.  A direct-mapped cache which holds a set of
367    recently used (guest address, host address) pairs.  This array is
368    referred to directly from m_dispatch/dispatch-<platform>.S.
369 
370    Entries in tt_fast may refer to any valid TC entry, regardless of
371    which sector it's in.  Consequently we must be very careful to
372    invalidate this cache when TC entries are changed or disappear.
373 
374    A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
375    pointed at to cause that cache entry to miss.  This relies on the
376    assumption that no guest code actually has that address, hence a
377    value 0x1 seems good.  m_translate gives the client a synthetic
378    segfault if it tries to execute at this address.
379 */
380 /*
381 typedef
382    struct {
383       Addr guest;
384       Addr host;
385    }
386    FastCacheEntry;
387 */
388 /*global*/ __attribute__((aligned(16)))
389            FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
390 
391 /* Make sure we're not used before initialisation. */
392 static Bool init_done = False;
393 
394 
395 /*------------------ STATS DECLS ------------------*/
396 
397 /* Number of fast-cache updates and flushes done. */
398 static ULong n_fast_flushes = 0;
399 static ULong n_fast_updates = 0;
400 
401 /* Number of full lookups done. */
402 static ULong n_full_lookups = 0;
403 static ULong n_lookup_probes = 0;
404 
405 /* Number/osize/tsize of translations entered; also the number of
406    those for which self-checking was requested. */
407 static ULong n_in_count    = 0;
408 static ULong n_in_osize    = 0;
409 static ULong n_in_tsize    = 0;
410 static ULong n_in_sc_count = 0;
411 
412 /* Number/osize of translations discarded due to lack of space. */
413 static ULong n_dump_count = 0;
414 static ULong n_dump_osize = 0;
415 
416 /* Number/osize of translations discarded due to requests to do so. */
417 static ULong n_disc_count = 0;
418 static ULong n_disc_osize = 0;
419 
420 
421 /*-------------------------------------------------------------*/
422 /*--- Misc                                                  ---*/
423 /*-------------------------------------------------------------*/
424 
ttaux_malloc(HChar * tag,SizeT n)425 static void* ttaux_malloc ( HChar* tag, SizeT n )
426 {
427    return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
428 }
429 
ttaux_free(void * p)430 static void ttaux_free ( void* p )
431 {
432    VG_(arena_free)(VG_AR_TTAUX, p);
433 }
434 
435 
436 /*-------------------------------------------------------------*/
437 /*--- Chaining support                                      ---*/
438 /*-------------------------------------------------------------*/
439 
index_tte(UInt sNo,UInt tteNo)440 static inline TTEntry* index_tte ( UInt sNo, UInt tteNo )
441 {
442    vg_assert(sNo < N_SECTORS);
443    vg_assert(tteNo < N_TTES_PER_SECTOR);
444    Sector* s = &sectors[sNo];
445    vg_assert(s->tt);
446    TTEntry* tte = &s->tt[tteNo];
447    vg_assert(tte->status == InUse);
448    return tte;
449 }
450 
InEdge__init(InEdge * ie)451 static void InEdge__init ( InEdge* ie )
452 {
453    ie->from_sNo   = -1; /* invalid */
454    ie->from_tteNo = 0;
455    ie->from_offs  = 0;
456    ie->to_fastEP  = False;
457 }
458 
OutEdge__init(OutEdge * oe)459 static void OutEdge__init ( OutEdge* oe )
460 {
461    oe->to_sNo    = -1; /* invalid */
462    oe->to_tteNo  = 0;
463    oe->from_offs = 0;
464 }
465 
TTEntry__init(TTEntry * tte)466 static void TTEntry__init ( TTEntry* tte )
467 {
468    VG_(memset)(tte, 0, sizeof(*tte));
469 }
470 
InEdgeArr__size(InEdgeArr * iea)471 static UWord InEdgeArr__size ( InEdgeArr* iea )
472 {
473    if (iea->var) {
474       vg_assert(iea->n_fixed == 0);
475       return VG_(sizeXA)(iea->var);
476    } else {
477       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
478       return iea->n_fixed;
479    }
480 }
481 
InEdgeArr__makeEmpty(InEdgeArr * iea)482 static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
483 {
484    if (iea->var) {
485       vg_assert(iea->n_fixed == 0);
486       VG_(deleteXA)(iea->var);
487       iea->var = NULL;
488    } else {
489       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
490       iea->n_fixed = 0;
491    }
492 }
493 
494 static
InEdgeArr__index(InEdgeArr * iea,UWord i)495 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
496 {
497    if (iea->var) {
498       vg_assert(iea->n_fixed == 0);
499       return (InEdge*)VG_(indexXA)(iea->var, i);
500    } else {
501       vg_assert(i < iea->n_fixed);
502       return &iea->fixed[i];
503    }
504 }
505 
506 static
InEdgeArr__deleteIndex(InEdgeArr * iea,UWord i)507 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
508 {
509    if (iea->var) {
510       vg_assert(iea->n_fixed == 0);
511       VG_(removeIndexXA)(iea->var, i);
512    } else {
513       vg_assert(i < iea->n_fixed);
514       for (; i+1 < iea->n_fixed; i++) {
515          iea->fixed[i] = iea->fixed[i+1];
516       }
517       iea->n_fixed--;
518    }
519 }
520 
521 static
InEdgeArr__add(InEdgeArr * iea,InEdge * ie)522 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
523 {
524    if (iea->var) {
525       vg_assert(iea->n_fixed == 0);
526       VG_(addToXA)(iea->var, ie);
527    } else {
528       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
529       if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
530          /* The fixed array is full, so we have to initialise an
531             XArray and copy the fixed array into it. */
532          iea->var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
533                                ttaux_free,
534                                sizeof(InEdge));
535          UWord i;
536          for (i = 0; i < iea->n_fixed; i++) {
537             VG_(addToXA)(iea->var, &iea->fixed[i]);
538          }
539          VG_(addToXA)(iea->var, ie);
540          iea->n_fixed = 0;
541       } else {
542          /* Just add to the fixed array. */
543          iea->fixed[iea->n_fixed++] = *ie;
544       }
545    }
546 }
547 
OutEdgeArr__size(OutEdgeArr * oea)548 static UWord OutEdgeArr__size ( OutEdgeArr* oea )
549 {
550    if (oea->var) {
551       vg_assert(oea->n_fixed == 0);
552       return VG_(sizeXA)(oea->var);
553    } else {
554       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
555       return oea->n_fixed;
556    }
557 }
558 
OutEdgeArr__makeEmpty(OutEdgeArr * oea)559 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
560 {
561    if (oea->var) {
562       vg_assert(oea->n_fixed == 0);
563       VG_(deleteXA)(oea->var);
564       oea->var = NULL;
565    } else {
566       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
567       oea->n_fixed = 0;
568    }
569 }
570 
571 static
OutEdgeArr__index(OutEdgeArr * oea,UWord i)572 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
573 {
574    if (oea->var) {
575       vg_assert(oea->n_fixed == 0);
576       return (OutEdge*)VG_(indexXA)(oea->var, i);
577    } else {
578       vg_assert(i < oea->n_fixed);
579       return &oea->fixed[i];
580    }
581 }
582 
583 static
OutEdgeArr__deleteIndex(OutEdgeArr * oea,UWord i)584 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
585 {
586    if (oea->var) {
587       vg_assert(oea->n_fixed == 0);
588       VG_(removeIndexXA)(oea->var, i);
589    } else {
590       vg_assert(i < oea->n_fixed);
591       for (; i+1 < oea->n_fixed; i++) {
592          oea->fixed[i] = oea->fixed[i+1];
593       }
594       oea->n_fixed--;
595    }
596 }
597 
598 static
OutEdgeArr__add(OutEdgeArr * oea,OutEdge * oe)599 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
600 {
601    if (oea->var) {
602       vg_assert(oea->n_fixed == 0);
603       VG_(addToXA)(oea->var, oe);
604    } else {
605       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
606       if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
607          /* The fixed array is full, so we have to initialise an
608             XArray and copy the fixed array into it. */
609          oea->var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
610                                ttaux_free,
611                                sizeof(OutEdge));
612          UWord i;
613          for (i = 0; i < oea->n_fixed; i++) {
614             VG_(addToXA)(oea->var, &oea->fixed[i]);
615          }
616          VG_(addToXA)(oea->var, oe);
617          oea->n_fixed = 0;
618       } else {
619          /* Just add to the fixed array. */
620          oea->fixed[oea->n_fixed++] = *oe;
621       }
622    }
623 }
624 
625 static
HostExtent__cmpOrd(void * v1,void * v2)626 Int HostExtent__cmpOrd ( void* v1, void* v2 )
627 {
628    HostExtent* hx1 = (HostExtent*)v1;
629    HostExtent* hx2 = (HostExtent*)v2;
630    if (hx1->start + hx1->len <= hx2->start) return -1;
631    if (hx2->start + hx2->len <= hx1->start) return 1;
632    return 0; /* partial overlap */
633 }
634 
635 static __attribute__((noinline))
find_TTEntry_from_hcode(UInt * from_sNo,UInt * from_tteNo,void * hcode)636 Bool find_TTEntry_from_hcode( /*OUT*/UInt* from_sNo,
637                               /*OUT*/UInt* from_tteNo,
638                               void* hcode )
639 {
640    Int i;
641 
642    /* Search order logic copied from VG_(search_transtab). */
643    for (i = 0; i < N_SECTORS; i++) {
644       Int sno = sector_search_order[i];
645       if (UNLIKELY(sno == -1))
646          return False; /* run out of sectors to search */
647 
648       Sector* sec = &sectors[sno];
649       XArray* /* of HostExtent */ host_extents = sec->host_extents;
650       vg_assert(host_extents);
651 
652       HostExtent key;
653       VG_(memset)(&key, 0, sizeof(key));
654       key.start = hcode;
655       key.len = 1;
656       Word firstW = -1, lastW = -1;
657       Bool found  = VG_(lookupXA_UNSAFE)(
658                        host_extents, &key, &firstW, &lastW,
659                        (Int(*)(void*,void*))HostExtent__cmpOrd
660                     );
661       vg_assert(firstW == lastW); // always true, even if not found
662       if (found) {
663          HostExtent* hx = VG_(indexXA)(host_extents, firstW);
664          UInt tteNo = hx->tteNo;
665          /* Do some additional sanity checks. */
666          vg_assert(tteNo <= N_TTES_PER_SECTOR);
667          /* Entry might have been invalidated. Consider this
668             as not found. */
669          if (sec->tt[tteNo].status == Deleted)
670             return False;
671          vg_assert(sec->tt[tteNo].status == InUse);
672          /* Can only half check that the found TTEntry contains hcode,
673             due to not having a length value for the hcode in the
674             TTEntry. */
675          vg_assert((UChar*)sec->tt[tteNo].tcptr <= (UChar*)hcode);
676          /* Looks plausible */
677          *from_sNo   = sno;
678          *from_tteNo = (UInt)tteNo;
679          return True;
680       }
681    }
682    return False;
683 }
684 
685 
686 /* Figure out whether or not hcode is jitted code present in the main
687    code cache (but not in the no-redir cache).  Used for sanity
688    checking. */
is_in_the_main_TC(void * hcode)689 static Bool is_in_the_main_TC ( void* hcode )
690 {
691    Int i, sno;
692    for (i = 0; i < N_SECTORS; i++) {
693       sno = sector_search_order[i];
694       if (sno == -1)
695          break; /* run out of sectors to search */
696       if ((UChar*)hcode >= (UChar*)sectors[sno].tc
697           && (UChar*)hcode <= (UChar*)sectors[sno].tc_next
698                               + sizeof(ULong) - 1)
699          return True;
700    }
701    return False;
702 }
703 
704 
705 /* Fulfill a chaining request, and record admin info so we
706    can undo it later, if required.
707 */
VG_(tt_tc_do_chaining)708 void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
709                               UInt  to_sNo,
710                               UInt  to_tteNo,
711                               Bool  to_fastEP )
712 {
713    /* Get the CPU info established at startup. */
714    VexArch vex_arch = VexArch_INVALID;
715    VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
716 
717    // host_code is where we're patching to.  So it needs to
718    // take into account, whether we're jumping to the slow
719    // or fast entry point.  By definition, the fast entry point
720    // is exactly one event check's worth of code along from
721    // the slow (tcptr) entry point.
722    TTEntry* to_tte    = index_tte(to_sNo, to_tteNo);
723    void*    host_code = ((UChar*)to_tte->tcptr)
724                         + (to_fastEP ? LibVEX_evCheckSzB(vex_arch) : 0);
725 
726    // stay sane -- the patch point (dst) is in this sector's code cache
727    vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
728    vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
729                                    + sizeof(ULong) - 1 );
730 
731    /* Find the TTEntry for the from__ code.  This isn't simple since
732       we only know the patch address, which is going to be somewhere
733       inside the from_ block. */
734    UInt from_sNo   = (UInt)-1;
735    UInt from_tteNo = (UInt)-1;
736    Bool from_found
737       = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
738                                  from__patch_addr );
739    if (!from_found) {
740       // The from code might have been discarded due to sector re-use
741       // or marked Deleted due to translation invalidation.
742       // In such a case, don't do the chaining.
743       VG_(debugLog)(1,"transtab",
744                     "host code %p not found (discarded? sector recycled?)"
745                     " => no chaining done\n",
746                     from__patch_addr);
747       return;
748    }
749 
750    TTEntry* from_tte = index_tte(from_sNo, from_tteNo);
751 
752    /* Get VEX to do the patching itself.  We have to hand it off
753       since it is host-dependent. */
754    VexInvalRange vir
755       = LibVEX_Chain(
756            vex_arch,
757            from__patch_addr,
758            VG_(fnptr_to_fnentry)(
759               to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
760                         : &VG_(disp_cp_chain_me_to_slowEP)),
761            (void*)host_code
762         );
763    VG_(invalidate_icache)( (void*)vir.start, vir.len );
764 
765    /* Now do the tricky bit -- update the ch_succs and ch_preds info
766       for the two translations involved, so we can undo the chaining
767       later, which we will have to do if the to_ block gets removed
768       for whatever reason. */
769 
770    /* This is the new from_ -> to_ link to add. */
771    InEdge ie;
772    InEdge__init(&ie);
773    ie.from_sNo   = from_sNo;
774    ie.from_tteNo = from_tteNo;
775    ie.to_fastEP  = to_fastEP;
776    HWord from_offs = (HWord)( (UChar*)from__patch_addr
777                               - (UChar*)from_tte->tcptr );
778    vg_assert(from_offs < 100000/* let's say */);
779    ie.from_offs  = (UInt)from_offs;
780 
781    /* This is the new to_ -> from_ backlink to add. */
782    OutEdge oe;
783    OutEdge__init(&oe);
784    oe.to_sNo    = to_sNo;
785    oe.to_tteNo  = to_tteNo;
786    oe.from_offs = (UInt)from_offs;
787 
788    /* Add .. */
789    InEdgeArr__add(&to_tte->in_edges, &ie);
790    OutEdgeArr__add(&from_tte->out_edges, &oe);
791 }
792 
793 
794 /* Unchain one patch, as described by the specified InEdge.  For
795    sanity check purposes only (to check that the patched location is
796    as expected) it also requires the fast and slow entry point
797    addresses of the destination block (that is, the block that owns
798    this InEdge). */
799 __attribute__((noinline))
unchain_one(VexArch vex_arch,InEdge * ie,void * to_fastEPaddr,void * to_slowEPaddr)800 static void unchain_one ( VexArch vex_arch,
801                           InEdge* ie,
802                           void* to_fastEPaddr, void* to_slowEPaddr )
803 {
804    vg_assert(ie);
805    TTEntry* tte
806       = index_tte(ie->from_sNo, ie->from_tteNo);
807    UChar* place_to_patch
808       = ((HChar*)tte->tcptr) + ie->from_offs;
809    UChar* disp_cp_chain_me
810       = VG_(fnptr_to_fnentry)(
811            ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
812                          : &VG_(disp_cp_chain_me_to_slowEP)
813         );
814    UChar* place_to_jump_to_EXPECTED
815       = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
816 
817    // stay sane: both src and dst for this unchaining are
818    // in the main code cache
819    vg_assert( is_in_the_main_TC(place_to_patch) ); // src
820    vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
821    // dst check is ok because LibVEX_UnChain checks that
822    // place_to_jump_to_EXPECTED really is the current dst, and
823    // asserts if it isn't.
824    VexInvalRange vir
825        = LibVEX_UnChain( vex_arch, place_to_patch,
826                          place_to_jump_to_EXPECTED, disp_cp_chain_me );
827    VG_(invalidate_icache)( (void*)vir.start, vir.len );
828 }
829 
830 
831 /* The specified block is about to be deleted.  Update the preds and
832    succs of its associated blocks accordingly.  This includes undoing
833    any chained jumps to this block. */
834 static
unchain_in_preparation_for_deletion(VexArch vex_arch,UInt here_sNo,UInt here_tteNo)835 void unchain_in_preparation_for_deletion ( VexArch vex_arch,
836                                            UInt here_sNo, UInt here_tteNo )
837 {
838    if (0)
839       VG_(printf)("QQQ unchain_in_prep %u.%u\n", here_sNo, here_tteNo);
840    UWord    i, j, n, m;
841    Int      evCheckSzB = LibVEX_evCheckSzB(vex_arch);
842    TTEntry* here_tte   = index_tte(here_sNo, here_tteNo);
843    vg_assert(here_tte->status == InUse);
844 
845    /* Visit all InEdges owned by here_tte. */
846    n = InEdgeArr__size(&here_tte->in_edges);
847    for (i = 0; i < n; i++) {
848       InEdge* ie = InEdgeArr__index(&here_tte->in_edges, i);
849       // Undo the chaining.
850       UChar* here_slow_EP = (UChar*)here_tte->tcptr;
851       UChar* here_fast_EP = here_slow_EP + evCheckSzB;
852       unchain_one(vex_arch, ie, here_fast_EP, here_slow_EP);
853       // Find the corresponding entry in the "from" node's out_edges,
854       // and remove it.
855       TTEntry* from_tte = index_tte(ie->from_sNo, ie->from_tteNo);
856       m = OutEdgeArr__size(&from_tte->out_edges);
857       vg_assert(m > 0); // it must have at least one entry
858       for (j = 0; j < m; j++) {
859          OutEdge* oe = OutEdgeArr__index(&from_tte->out_edges, j);
860          if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
861              && oe->from_offs == ie->from_offs)
862            break;
863       }
864       vg_assert(j < m); // "oe must be findable"
865       OutEdgeArr__deleteIndex(&from_tte->out_edges, j);
866    }
867 
868    /* Visit all OutEdges owned by here_tte. */
869    n = OutEdgeArr__size(&here_tte->out_edges);
870    for (i = 0; i < n; i++) {
871       OutEdge* oe = OutEdgeArr__index(&here_tte->out_edges, i);
872       // Find the corresponding entry in the "to" node's in_edges,
873       // and remove it.
874       TTEntry* to_tte = index_tte(oe->to_sNo, oe->to_tteNo);
875       m = InEdgeArr__size(&to_tte->in_edges);
876       vg_assert(m > 0); // it must have at least one entry
877       for (j = 0; j < m; j++) {
878          InEdge* ie = InEdgeArr__index(&to_tte->in_edges, j);
879          if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
880              && ie->from_offs == oe->from_offs)
881            break;
882       }
883       vg_assert(j < m); // "ie must be findable"
884       InEdgeArr__deleteIndex(&to_tte->in_edges, j);
885    }
886 
887    InEdgeArr__makeEmpty(&here_tte->in_edges);
888    OutEdgeArr__makeEmpty(&here_tte->out_edges);
889 }
890 
891 
892 /*-------------------------------------------------------------*/
893 /*--- Address-range equivalence class stuff                 ---*/
894 /*-------------------------------------------------------------*/
895 
896 /* Return equivalence class number for a range. */
897 
range_to_eclass(Addr64 start,UInt len)898 static Int range_to_eclass ( Addr64 start, UInt len )
899 {
900    UInt mask   = (1 << ECLASS_WIDTH) - 1;
901    UInt lo     = (UInt)start;
902    UInt hi     = lo + len - 1;
903    UInt loBits = (lo >> ECLASS_SHIFT) & mask;
904    UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
905    if (loBits == hiBits) {
906       vg_assert(loBits < ECLASS_N-1);
907       return loBits;
908    } else {
909       return ECLASS_MISC;
910    }
911 }
912 
913 
914 /* Calculates the equivalence class numbers for any VexGuestExtent.
915    These are written in *eclasses, which must be big enough to hold 3
916    Ints.  The number written, between 1 and 3, is returned.  The
917    eclasses are presented in order, and any duplicates are removed.
918 */
919 
920 static
vexGuestExtents_to_eclasses(Int * eclasses,VexGuestExtents * vge)921 Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
922                                   VexGuestExtents* vge )
923 {
924 #  define SWAP(_lv1,_lv2) \
925       do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
926 
927    Int i, j, n_ec, r;
928 
929    vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
930 
931    n_ec = 0;
932    for (i = 0; i < vge->n_used; i++) {
933       r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
934       if (r == ECLASS_MISC)
935          goto bad;
936       /* only add if we haven't already seen it */
937       for (j = 0; j < n_ec; j++)
938          if (eclasses[j] == r)
939             break;
940       if (j == n_ec)
941          eclasses[n_ec++] = r;
942    }
943 
944    if (n_ec == 1)
945       return 1;
946 
947    if (n_ec == 2) {
948       /* sort */
949       if (eclasses[0] > eclasses[1])
950          SWAP(eclasses[0], eclasses[1]);
951       return 2;
952    }
953 
954    if (n_ec == 3) {
955       /* sort */
956       if (eclasses[0] > eclasses[2])
957          SWAP(eclasses[0], eclasses[2]);
958       if (eclasses[0] > eclasses[1])
959          SWAP(eclasses[0], eclasses[1]);
960       if (eclasses[1] > eclasses[2])
961          SWAP(eclasses[1], eclasses[2]);
962       return 3;
963    }
964 
965    /* NOTREACHED */
966    vg_assert(0);
967 
968   bad:
969    eclasses[0] = ECLASS_MISC;
970    return 1;
971 
972 #  undef SWAP
973 }
974 
975 
976 /* Add tteno to the set of entries listed for equivalence class ec in
977    this sector.  Returns used location in eclass array. */
978 
979 static
addEClassNo(Sector * sec,Int ec,UShort tteno)980 UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
981 {
982    Int    old_sz, new_sz, i, r;
983    UShort *old_ar, *new_ar;
984 
985    vg_assert(ec >= 0 && ec < ECLASS_N);
986    vg_assert(tteno < N_TTES_PER_SECTOR);
987 
988    if (0) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
989 
990    if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
991 
992       vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
993 
994       old_sz = sec->ec2tte_size[ec];
995       old_ar = sec->ec2tte[ec];
996       new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
997       new_ar = ttaux_malloc("transtab.aECN.1",
998                             new_sz * sizeof(UShort));
999       for (i = 0; i < old_sz; i++)
1000          new_ar[i] = old_ar[i];
1001       if (old_ar)
1002          ttaux_free(old_ar);
1003       sec->ec2tte_size[ec] = new_sz;
1004       sec->ec2tte[ec] = new_ar;
1005 
1006       if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
1007    }
1008 
1009    /* Common case */
1010    r = sec->ec2tte_used[ec]++;
1011    vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
1012    sec->ec2tte[ec][r] = tteno;
1013    return (UInt)r;
1014 }
1015 
1016 
1017 /* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
1018    eclass entries to 'sec'. */
1019 
1020 static
upd_eclasses_after_add(Sector * sec,Int tteno)1021 void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
1022 {
1023    Int i, r, eclasses[3];
1024    TTEntry* tte;
1025    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1026 
1027    tte = &sec->tt[tteno];
1028    r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
1029 
1030    vg_assert(r >= 1 && r <= 3);
1031    tte->n_tte2ec = r;
1032 
1033    for (i = 0; i < r; i++) {
1034       tte->tte2ec_ec[i] = eclasses[i];
1035       tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
1036    }
1037 }
1038 
1039 
1040 /* Check the eclass info in 'sec' to ensure it is consistent.  Returns
1041    True if OK, False if something's not right.  Expensive. */
1042 
sanity_check_eclasses_in_sector(Sector * sec)1043 static Bool sanity_check_eclasses_in_sector ( Sector* sec )
1044 {
1045 #  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
1046 
1047    HChar*   whassup = NULL;
1048    Int      i, j, k, n, ec_num, ec_idx;
1049    TTEntry* tte;
1050    UShort   tteno;
1051    ULong*   tce;
1052 
1053    /* Basic checks on this sector */
1054    if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
1055       BAD("invalid sec->tt_n_inuse");
1056    tce = sec->tc_next;
1057    if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
1058       BAD("sec->tc_next points outside tc");
1059 
1060    /* For each eclass ... */
1061    for (i = 0; i < ECLASS_N; i++) {
1062       if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
1063          BAD("ec2tte_size/ec2tte mismatch(1)");
1064       if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
1065          BAD("ec2tte_size/ec2tte mismatch(2)");
1066       if (sec->ec2tte_used[i] < 0
1067           || sec->ec2tte_used[i] > sec->ec2tte_size[i])
1068          BAD("implausible ec2tte_used");
1069       if (sec->ec2tte_used[i] == 0)
1070          continue;
1071 
1072       /* For each tt reference in each eclass .. ensure the reference
1073          is to a valid tt entry, and that the entry's address ranges
1074          really include this eclass. */
1075 
1076       for (j = 0; j < sec->ec2tte_used[i]; j++) {
1077          tteno = sec->ec2tte[i][j];
1078          if (tteno == EC2TTE_DELETED)
1079             continue;
1080          if (tteno >= N_TTES_PER_SECTOR)
1081             BAD("implausible tteno");
1082          tte = &sec->tt[tteno];
1083          if (tte->status != InUse)
1084             BAD("tteno points to non-inuse tte");
1085          if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1086             BAD("tte->n_tte2ec out of range");
1087          /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
1088             must equal i.  Inspect tte's eclass info. */
1089          n = 0;
1090          for (k = 0; k < tte->n_tte2ec; k++) {
1091             if (k < tte->n_tte2ec-1
1092                 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
1093                BAD("tte->tte2ec_ec[..] out of order");
1094             ec_num = tte->tte2ec_ec[k];
1095             if (ec_num < 0 || ec_num >= ECLASS_N)
1096                BAD("tte->tte2ec_ec[..] out of range");
1097             if (ec_num != i)
1098                continue;
1099             ec_idx = tte->tte2ec_ix[k];
1100             if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
1101                BAD("tte->tte2ec_ix[..] out of range");
1102             if (ec_idx == j)
1103                n++;
1104          }
1105          if (n != 1)
1106             BAD("tteno does not point back at eclass");
1107       }
1108    }
1109 
1110    /* That establishes that for each forward pointer from TTEntrys
1111       there is a corresponding backward pointer from the eclass[]
1112       arrays.  However, it doesn't rule out the possibility of other,
1113       bogus pointers in the eclass[] arrays.  So do those similarly:
1114       scan through them and check the TTEntryies they point at point
1115       back. */
1116 
1117    for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
1118 
1119       tte = &sec->tt[i];
1120       if (tte->status == Empty || tte->status == Deleted) {
1121          if (tte->n_tte2ec != 0)
1122             BAD("tte->n_eclasses nonzero for unused tte");
1123          continue;
1124       }
1125 
1126       vg_assert(tte->status == InUse);
1127 
1128       if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1129          BAD("tte->n_eclasses out of range(2)");
1130 
1131       for (j = 0; j < tte->n_tte2ec; j++) {
1132          ec_num = tte->tte2ec_ec[j];
1133          if (ec_num < 0 || ec_num >= ECLASS_N)
1134             BAD("tte->eclass[..] out of range");
1135          ec_idx = tte->tte2ec_ix[j];
1136          if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
1137             BAD("tte->ec_idx[..] out of range(2)");
1138          if (sec->ec2tte[ec_num][ec_idx] != i)
1139             BAD("ec2tte does not point back to tte");
1140       }
1141    }
1142 
1143    return True;
1144 
1145   bad:
1146    if (whassup)
1147       VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
1148 
1149 #  if 0
1150    VG_(printf)("eclass = %d\n", i);
1151    VG_(printf)("tteno = %d\n", (Int)tteno);
1152    switch (tte->status) {
1153       case InUse:   VG_(printf)("InUse\n"); break;
1154       case Deleted: VG_(printf)("Deleted\n"); break;
1155       case Empty:   VG_(printf)("Empty\n"); break;
1156    }
1157    if (tte->status != Empty) {
1158       for (k = 0; k < tte->vge.n_used; k++)
1159          VG_(printf)("0x%llx %d\n", tte->vge.base[k],
1160                                     (Int)tte->vge.len[k]);
1161    }
1162 #  endif
1163 
1164    return False;
1165 
1166 #  undef BAD
1167 }
1168 
1169 
1170 /* Sanity check absolutely everything.  True == check passed. */
1171 
1172 /* forwards */
1173 static Bool sanity_check_redir_tt_tc ( void );
1174 
sanity_check_sector_search_order(void)1175 static Bool sanity_check_sector_search_order ( void )
1176 {
1177    Int i, j, nListed;
1178    /* assert the array is the right size */
1179    vg_assert(N_SECTORS == (sizeof(sector_search_order)
1180                            / sizeof(sector_search_order[0])));
1181    /* Check it's of the form  valid_sector_numbers ++ [-1, -1, ..] */
1182    for (i = 0; i < N_SECTORS; i++) {
1183       if (sector_search_order[i] < 0 || sector_search_order[i] >= N_SECTORS)
1184          break;
1185    }
1186    nListed = i;
1187    for (/* */; i < N_SECTORS; i++) {
1188       if (sector_search_order[i] != -1)
1189          break;
1190    }
1191    if (i != N_SECTORS)
1192       return False;
1193    /* Check each sector number only appears once */
1194    for (i = 0; i < N_SECTORS; i++) {
1195       if (sector_search_order[i] == -1)
1196          continue;
1197       for (j = i+1; j < N_SECTORS; j++) {
1198          if (sector_search_order[j] == sector_search_order[i])
1199             return False;
1200       }
1201    }
1202    /* Check that the number of listed sectors equals the number
1203       in use, by counting nListed back down. */
1204    for (i = 0; i < N_SECTORS; i++) {
1205       if (sectors[i].tc != NULL)
1206          nListed--;
1207    }
1208    if (nListed != 0)
1209       return False;
1210    return True;
1211 }
1212 
sanity_check_all_sectors(void)1213 static Bool sanity_check_all_sectors ( void )
1214 {
1215    Int     sno;
1216    Bool    sane;
1217    Sector* sec;
1218    for (sno = 0; sno < N_SECTORS; sno++) {
1219       sec = &sectors[sno];
1220       if (sec->tc == NULL)
1221          continue;
1222       sane = sanity_check_eclasses_in_sector( sec );
1223       if (!sane)
1224          return False;
1225    }
1226    if ( !sanity_check_redir_tt_tc() )
1227       return False;
1228    if ( !sanity_check_sector_search_order() )
1229       return False;
1230    return True;
1231 }
1232 
1233 
1234 
1235 /*-------------------------------------------------------------*/
1236 /*--- Add/find translations                                 ---*/
1237 /*-------------------------------------------------------------*/
1238 
vge_osize(VexGuestExtents * vge)1239 static UInt vge_osize ( VexGuestExtents* vge )
1240 {
1241    UInt i, n = 0;
1242    for (i = 0; i < vge->n_used; i++)
1243       n += (UInt)vge->len[i];
1244    return n;
1245 }
1246 
isValidSector(Int sector)1247 static Bool isValidSector ( Int sector )
1248 {
1249    if (sector < 0 || sector >= N_SECTORS)
1250       return False;
1251    return True;
1252 }
1253 
HASH_TT(Addr64 key)1254 static inline UInt HASH_TT ( Addr64 key )
1255 {
1256    UInt kHi = (UInt)(key >> 32);
1257    UInt kLo = (UInt)key;
1258    UInt k32 = kHi ^ kLo;
1259    UInt ror = 7;
1260    if (ror > 0)
1261       k32 = (k32 >> ror) | (k32 << (32-ror));
1262    return k32 % N_TTES_PER_SECTOR;
1263 }
1264 
setFastCacheEntry(Addr64 key,ULong * tcptr)1265 static void setFastCacheEntry ( Addr64 key, ULong* tcptr )
1266 {
1267    UInt cno = (UInt)VG_TT_FAST_HASH(key);
1268    VG_(tt_fast)[cno].guest = (Addr)key;
1269    VG_(tt_fast)[cno].host  = (Addr)tcptr;
1270    n_fast_updates++;
1271    /* This shouldn't fail.  It should be assured by m_translate
1272       which should reject any attempt to make translation of code
1273       starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1274    vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
1275 }
1276 
1277 /* Invalidate the fast cache VG_(tt_fast). */
invalidateFastCache(void)1278 static void invalidateFastCache ( void )
1279 {
1280    UInt j;
1281    /* This loop is popular enough to make it worth unrolling a
1282       bit, at least on ppc32. */
1283    vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
1284    for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
1285       VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1286       VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1287       VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1288       VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1289    }
1290 
1291    vg_assert(j == VG_TT_FAST_SIZE);
1292    n_fast_flushes++;
1293 }
1294 
initialiseSector(Int sno)1295 static void initialiseSector ( Int sno )
1296 {
1297    Int     i;
1298    SysRes  sres;
1299    Sector* sec;
1300    vg_assert(isValidSector(sno));
1301 
1302    { Bool sane = sanity_check_sector_search_order();
1303      vg_assert(sane);
1304    }
1305    sec = &sectors[sno];
1306 
1307    if (sec->tc == NULL) {
1308 
1309       /* Sector has never been used before.  Need to allocate tt and
1310          tc. */
1311       vg_assert(sec->tt == NULL);
1312       vg_assert(sec->tc_next == NULL);
1313       vg_assert(sec->tt_n_inuse == 0);
1314       for (i = 0; i < ECLASS_N; i++) {
1315          vg_assert(sec->ec2tte_size[i] == 0);
1316          vg_assert(sec->ec2tte_used[i] == 0);
1317          vg_assert(sec->ec2tte[i] == NULL);
1318       }
1319       vg_assert(sec->host_extents == NULL);
1320 
1321       VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
1322 
1323       sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
1324       if (sr_isError(sres)) {
1325          VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
1326                                      8 * tc_sector_szQ );
1327 	 /*NOTREACHED*/
1328       }
1329       sec->tc = (ULong*)(AddrH)sr_Res(sres);
1330 
1331       sres = VG_(am_mmap_anon_float_valgrind)
1332                 ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
1333       if (sr_isError(sres)) {
1334          VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
1335                                      N_TTES_PER_SECTOR * sizeof(TTEntry) );
1336 	 /*NOTREACHED*/
1337       }
1338       sec->tt = (TTEntry*)(AddrH)sr_Res(sres);
1339 
1340       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1341          sec->tt[i].status   = Empty;
1342          sec->tt[i].n_tte2ec = 0;
1343       }
1344 
1345       /* Set up the host_extents array. */
1346       sec->host_extents
1347          = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
1348                       ttaux_free,
1349                       sizeof(HostExtent));
1350 
1351       /* Add an entry in the sector_search_order */
1352       for (i = 0; i < N_SECTORS; i++) {
1353          if (sector_search_order[i] == -1)
1354             break;
1355       }
1356       vg_assert(i >= 0 && i < N_SECTORS);
1357       sector_search_order[i] = sno;
1358 
1359       if (VG_(clo_verbosity) > 2)
1360          VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
1361 
1362    } else {
1363 
1364       /* Sector has been used before.  Dump the old contents. */
1365       VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
1366       vg_assert(sec->tt != NULL);
1367       vg_assert(sec->tc_next != NULL);
1368       n_dump_count += sec->tt_n_inuse;
1369 
1370       VexArch vex_arch = VexArch_INVALID;
1371       VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
1372 
1373       /* Visit each just-about-to-be-abandoned translation. */
1374       if (0) VG_(printf)("QQQ unlink-entire-sector: %d START\n", sno);
1375       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1376          if (sec->tt[i].status == InUse) {
1377             vg_assert(sec->tt[i].n_tte2ec >= 1);
1378             vg_assert(sec->tt[i].n_tte2ec <= 3);
1379             n_dump_osize += vge_osize(&sec->tt[i].vge);
1380             /* Tell the tool too. */
1381             if (VG_(needs).superblock_discards) {
1382                VG_TDICT_CALL( tool_discard_superblock_info,
1383                               sec->tt[i].entry,
1384                               sec->tt[i].vge );
1385             }
1386             unchain_in_preparation_for_deletion(vex_arch, sno, i);
1387          } else {
1388             vg_assert(sec->tt[i].n_tte2ec == 0);
1389          }
1390          sec->tt[i].status   = Empty;
1391          sec->tt[i].n_tte2ec = 0;
1392       }
1393       if (0) VG_(printf)("QQQ unlink-entire-sector: %d END\n", sno);
1394 
1395       /* Free up the eclass structures. */
1396       for (i = 0; i < ECLASS_N; i++) {
1397          if (sec->ec2tte_size[i] == 0) {
1398             vg_assert(sec->ec2tte_used[i] == 0);
1399             vg_assert(sec->ec2tte[i] == NULL);
1400          } else {
1401             vg_assert(sec->ec2tte[i] != NULL);
1402             ttaux_free(sec->ec2tte[i]);
1403             sec->ec2tte[i] = NULL;
1404             sec->ec2tte_size[i] = 0;
1405             sec->ec2tte_used[i] = 0;
1406          }
1407       }
1408 
1409       /* Empty out the host extents array. */
1410       vg_assert(sec->host_extents != NULL);
1411       VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
1412       vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
1413 
1414       /* Sanity check: ensure it is already in
1415          sector_search_order[]. */
1416       for (i = 0; i < N_SECTORS; i++) {
1417          if (sector_search_order[i] == sno)
1418             break;
1419       }
1420       vg_assert(i >= 0 && i < N_SECTORS);
1421 
1422       if (VG_(clo_verbosity) > 2)
1423          VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
1424    }
1425 
1426    sec->tc_next = sec->tc;
1427    sec->tt_n_inuse = 0;
1428 
1429    invalidateFastCache();
1430 
1431    { Bool sane = sanity_check_sector_search_order();
1432      vg_assert(sane);
1433    }
1434 }
1435 
1436 
1437 /* Add a translation of vge to TT/TC.  The translation is temporarily
1438    in code[0 .. code_len-1].
1439 
1440    pre: youngest_sector points to a valid (although possibly full)
1441    sector.
1442 */
VG_(add_to_transtab)1443 void VG_(add_to_transtab)( VexGuestExtents* vge,
1444                            Addr64           entry,
1445                            AddrH            code,
1446                            UInt             code_len,
1447                            Bool             is_self_checking,
1448                            Int              offs_profInc,
1449                            UInt             n_guest_instrs,
1450                            VexArch          arch_host )
1451 {
1452    Int    tcAvailQ, reqdQ, y, i;
1453    ULong  *tcptr, *tcptr2;
1454    UChar* srcP;
1455    UChar* dstP;
1456 
1457    vg_assert(init_done);
1458    vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1459 
1460    /* 60000: should agree with N_TMPBUF in m_translate.c. */
1461    vg_assert(code_len > 0 && code_len < 60000);
1462 
1463    /* Generally stay sane */
1464    vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
1465 
1466    if (0)
1467       VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n",
1468                   entry, code_len);
1469 
1470    n_in_count++;
1471    n_in_tsize += code_len;
1472    n_in_osize += vge_osize(vge);
1473    if (is_self_checking)
1474       n_in_sc_count++;
1475 
1476    y = youngest_sector;
1477    vg_assert(isValidSector(y));
1478 
1479    if (sectors[y].tc == NULL)
1480       initialiseSector(y);
1481 
1482    /* Try putting the translation in this sector. */
1483    reqdQ = (code_len + 7) >> 3;
1484 
1485    /* Will it fit in tc? */
1486    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1487               - ((ULong*)(sectors[y].tc_next));
1488    vg_assert(tcAvailQ >= 0);
1489    vg_assert(tcAvailQ <= tc_sector_szQ);
1490 
1491    if (tcAvailQ < reqdQ
1492        || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
1493       /* No.  So move on to the next sector.  Either it's never been
1494          used before, in which case it will get its tt/tc allocated
1495          now, or it has been used before, in which case it is set to be
1496          empty, hence throwing out the oldest sector. */
1497       vg_assert(tc_sector_szQ > 0);
1498       VG_(debugLog)(1,"transtab",
1499                       "declare sector %d full "
1500                       "(TT loading %2d%%, TC loading %2d%%)\n",
1501                       y,
1502                       (100 * sectors[y].tt_n_inuse)
1503                          / N_TTES_PER_SECTOR,
1504                       (100 * (tc_sector_szQ - tcAvailQ))
1505                          / tc_sector_szQ);
1506       youngest_sector++;
1507       if (youngest_sector >= N_SECTORS)
1508          youngest_sector = 0;
1509       y = youngest_sector;
1510       initialiseSector(y);
1511    }
1512 
1513    /* Be sure ... */
1514    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1515               - ((ULong*)(sectors[y].tc_next));
1516    vg_assert(tcAvailQ >= 0);
1517    vg_assert(tcAvailQ <= tc_sector_szQ);
1518    vg_assert(tcAvailQ >= reqdQ);
1519    vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
1520    vg_assert(sectors[y].tt_n_inuse >= 0);
1521 
1522    /* Copy into tc. */
1523    tcptr = sectors[y].tc_next;
1524    vg_assert(tcptr >= &sectors[y].tc[0]);
1525    vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
1526 
1527    dstP = (UChar*)tcptr;
1528    srcP = (UChar*)code;
1529    VG_(memcpy)(dstP, srcP, code_len);
1530    sectors[y].tc_next += reqdQ;
1531    sectors[y].tt_n_inuse++;
1532 
1533    /* more paranoia */
1534    tcptr2 = sectors[y].tc_next;
1535    vg_assert(tcptr2 >= &sectors[y].tc[0]);
1536    vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
1537 
1538    /* Find an empty tt slot, and use it.  There must be such a slot
1539       since tt is never allowed to get completely full. */
1540    i = HASH_TT(entry);
1541    vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
1542    while (True) {
1543       if (sectors[y].tt[i].status == Empty
1544           || sectors[y].tt[i].status == Deleted)
1545          break;
1546       i++;
1547       if (i >= N_TTES_PER_SECTOR)
1548          i = 0;
1549    }
1550 
1551    TTEntry__init(&sectors[y].tt[i]);
1552    sectors[y].tt[i].status = InUse;
1553    sectors[y].tt[i].tcptr  = tcptr;
1554    sectors[y].tt[i].count  = 0;
1555    sectors[y].tt[i].weight = n_guest_instrs == 0 ? 1 : n_guest_instrs;
1556    sectors[y].tt[i].vge    = *vge;
1557    sectors[y].tt[i].entry  = entry;
1558 
1559    /* Patch in the profile counter location, if necessary. */
1560    if (offs_profInc != -1) {
1561       vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
1562       VexInvalRange vir
1563          = LibVEX_PatchProfInc( arch_host,
1564                                 dstP + offs_profInc,
1565                                 &sectors[y].tt[i].count );
1566       VG_(invalidate_icache)( (void*)vir.start, vir.len );
1567    }
1568 
1569    VG_(invalidate_icache)( dstP, code_len );
1570 
1571    /* Add this entry to the host_extents map, checking that we're
1572       adding in order. */
1573    { HostExtent hx;
1574      hx.start = (UChar*)tcptr;
1575      hx.len   = code_len;
1576      hx.tteNo = i;
1577      vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
1578      XArray* hx_array = sectors[y].host_extents;
1579      vg_assert(hx_array);
1580      Word n = VG_(sizeXA)(hx_array);
1581      if (n > 0) {
1582         HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
1583         vg_assert(hx_prev->start + hx_prev->len <= hx.start);
1584      }
1585      VG_(addToXA)(hx_array, &hx);
1586    }
1587 
1588    /* Update the fast-cache. */
1589    setFastCacheEntry( entry, tcptr );
1590 
1591    /* Note the eclass numbers for this translation. */
1592    upd_eclasses_after_add( &sectors[y], i );
1593 }
1594 
1595 
1596 /* Search for the translation of the given guest address.  If
1597    requested, a successful search can also cause the fast-caches to be
1598    updated.
1599 */
VG_(search_transtab)1600 Bool VG_(search_transtab) ( /*OUT*/AddrH* res_hcode,
1601                             /*OUT*/UInt*  res_sNo,
1602                             /*OUT*/UInt*  res_tteNo,
1603                             Addr64        guest_addr,
1604                             Bool          upd_cache )
1605 {
1606    Int i, j, k, kstart, sno;
1607 
1608    vg_assert(init_done);
1609    /* Find the initial probe point just once.  It will be the same in
1610       all sectors and avoids multiple expensive % operations. */
1611    n_full_lookups++;
1612    k      = -1;
1613    kstart = HASH_TT(guest_addr);
1614    vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
1615 
1616    /* Search in all the sectors,using sector_search_order[] as a
1617       heuristic guide as to what order to visit the sectors. */
1618    for (i = 0; i < N_SECTORS; i++) {
1619 
1620       sno = sector_search_order[i];
1621       if (UNLIKELY(sno == -1))
1622          return False; /* run out of sectors to search */
1623 
1624       k = kstart;
1625       for (j = 0; j < N_TTES_PER_SECTOR; j++) {
1626          n_lookup_probes++;
1627          if (sectors[sno].tt[k].status == InUse
1628              && sectors[sno].tt[k].entry == guest_addr) {
1629             /* found it */
1630             if (upd_cache)
1631                setFastCacheEntry(
1632                   guest_addr, sectors[sno].tt[k].tcptr );
1633             if (res_hcode)
1634                *res_hcode = (AddrH)sectors[sno].tt[k].tcptr;
1635             if (res_sNo)
1636                *res_sNo = sno;
1637             if (res_tteNo)
1638                *res_tteNo = k;
1639             /* pull this one one step closer to the front.  For large
1640                apps this more or less halves the number of required
1641                probes. */
1642             if (i > 0) {
1643                Int tmp = sector_search_order[i-1];
1644                sector_search_order[i-1] = sector_search_order[i];
1645                sector_search_order[i] = tmp;
1646             }
1647             return True;
1648          }
1649          if (sectors[sno].tt[k].status == Empty)
1650             break; /* not found in this sector */
1651          k++;
1652          if (k == N_TTES_PER_SECTOR)
1653             k = 0;
1654       }
1655 
1656       /* If we fall off the end, all entries are InUse and not
1657          matching, or Deleted.  In any case we did not find it in this
1658          sector. */
1659    }
1660 
1661    /* Not found in any sector. */
1662    return False;
1663 }
1664 
1665 
1666 /*-------------------------------------------------------------*/
1667 /*--- Delete translations.                                  ---*/
1668 /*-------------------------------------------------------------*/
1669 
1670 /* forward */
1671 static void unredir_discard_translations( Addr64, ULong );
1672 
1673 /* Stuff for deleting translations which intersect with a given
1674    address range.  Unfortunately, to make this run at a reasonable
1675    speed, it is complex. */
1676 
1677 static inline
overlap1(Addr64 s1,ULong r1,Addr64 s2,ULong r2)1678 Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
1679 {
1680    Addr64 e1 = s1 + r1 - 1ULL;
1681    Addr64 e2 = s2 + r2 - 1ULL;
1682    if (e1 < s2 || e2 < s1)
1683       return False;
1684    return True;
1685 }
1686 
1687 static inline
overlaps(Addr64 start,ULong range,VexGuestExtents * vge)1688 Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
1689 {
1690    if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
1691       return True;
1692    if (vge->n_used < 2)
1693       return False;
1694    if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
1695       return True;
1696    if (vge->n_used < 3)
1697       return False;
1698    if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
1699       return True;
1700    return False;
1701 }
1702 
1703 
1704 /* Delete a tt entry, and update all the eclass data accordingly. */
1705 
delete_tte(Sector * sec,UInt secNo,Int tteno,VexArch vex_arch)1706 static void delete_tte ( /*MOD*/Sector* sec, UInt secNo, Int tteno,
1707                          VexArch vex_arch )
1708 {
1709    Int      i, ec_num, ec_idx;
1710    TTEntry* tte;
1711 
1712    /* sec and secNo are mutually redundant; cross-check. */
1713    vg_assert(sec == &sectors[secNo]);
1714 
1715    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1716    tte = &sec->tt[tteno];
1717    vg_assert(tte->status == InUse);
1718    vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1719 
1720    /* Unchain .. */
1721    unchain_in_preparation_for_deletion(vex_arch, secNo, tteno);
1722 
1723    /* Deal with the ec-to-tte links first. */
1724    for (i = 0; i < tte->n_tte2ec; i++) {
1725       ec_num = (Int)tte->tte2ec_ec[i];
1726       ec_idx = tte->tte2ec_ix[i];
1727       vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1728       vg_assert(ec_idx >= 0);
1729       vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1730       /* Assert that the two links point at each other. */
1731       vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
1732       /* "delete" the pointer back to here. */
1733       sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1734    }
1735 
1736    /* Now fix up this TTEntry. */
1737    tte->status   = Deleted;
1738    tte->n_tte2ec = 0;
1739 
1740    /* Stats .. */
1741    sec->tt_n_inuse--;
1742    n_disc_count++;
1743    n_disc_osize += vge_osize(&tte->vge);
1744 
1745    /* Tell the tool too. */
1746    if (VG_(needs).superblock_discards) {
1747       VG_TDICT_CALL( tool_discard_superblock_info,
1748                      tte->entry,
1749                      tte->vge );
1750    }
1751 }
1752 
1753 
1754 /* Delete translations from sec which intersect specified range, but
1755    only consider translations in the specified eclass. */
1756 
1757 static
delete_translations_in_sector_eclass(Sector * sec,UInt secNo,Addr64 guest_start,ULong range,Int ec,VexArch vex_arch)1758 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, UInt secNo,
1759                                             Addr64 guest_start, ULong range,
1760                                             Int ec,
1761                                             VexArch vex_arch )
1762 {
1763    Int      i;
1764    UShort   tteno;
1765    Bool     anyDeld = False;
1766    TTEntry* tte;
1767 
1768    vg_assert(ec >= 0 && ec < ECLASS_N);
1769 
1770    for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1771 
1772       tteno = sec->ec2tte[ec][i];
1773       if (tteno == EC2TTE_DELETED) {
1774          /* already deleted */
1775          continue;
1776       }
1777 
1778       vg_assert(tteno < N_TTES_PER_SECTOR);
1779 
1780       tte = &sec->tt[tteno];
1781       vg_assert(tte->status == InUse);
1782 
1783       if (overlaps( guest_start, range, &tte->vge )) {
1784          anyDeld = True;
1785          delete_tte( sec, secNo, (Int)tteno, vex_arch );
1786       }
1787 
1788    }
1789 
1790    return anyDeld;
1791 }
1792 
1793 
1794 /* Delete translations from sec which intersect specified range, the
1795    slow way, by inspecting all translations in sec. */
1796 
1797 static
delete_translations_in_sector(Sector * sec,UInt secNo,Addr64 guest_start,ULong range,VexArch vex_arch)1798 Bool delete_translations_in_sector ( /*MOD*/Sector* sec, UInt secNo,
1799                                      Addr64 guest_start, ULong range,
1800                                      VexArch vex_arch )
1801 {
1802    Int  i;
1803    Bool anyDeld = False;
1804 
1805    for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1806       if (sec->tt[i].status == InUse
1807           && overlaps( guest_start, range, &sec->tt[i].vge )) {
1808          anyDeld = True;
1809          delete_tte( sec, secNo, i, vex_arch );
1810       }
1811    }
1812 
1813    return anyDeld;
1814 }
1815 
1816 
VG_(discard_translations)1817 void VG_(discard_translations) ( Addr64 guest_start, ULong range,
1818                                  HChar* who )
1819 {
1820    Sector* sec;
1821    Int     sno, ec;
1822    Bool    anyDeleted = False;
1823 
1824    vg_assert(init_done);
1825 
1826    VG_(debugLog)(2, "transtab",
1827                     "discard_translations(0x%llx, %lld) req by %s\n",
1828                     guest_start, range, who );
1829 
1830    /* Pre-deletion sanity check */
1831    if (VG_(clo_sanity_level >= 4)) {
1832       Bool sane = sanity_check_all_sectors();
1833       vg_assert(sane);
1834    }
1835 
1836    if (range == 0)
1837       return;
1838 
1839    VexArch vex_arch = VexArch_INVALID;
1840    VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
1841 
1842    /* There are two different ways to do this.
1843 
1844       If the range fits within a single address-range equivalence
1845       class, as will be the case for a cache line sized invalidation,
1846       then we only have to inspect the set of translations listed in
1847       that equivalence class, and also in the "sin-bin" equivalence
1848       class ECLASS_MISC.
1849 
1850       Otherwise, the invalidation is of a larger range and probably
1851       results from munmap.  In this case it's (probably!) faster just
1852       to inspect all translations, dump those we don't want, and
1853       regenerate the equivalence class information (since modifying it
1854       in-situ is even more expensive).
1855    */
1856 
1857    /* First off, figure out if the range falls within a single class,
1858       and if so which one. */
1859 
1860    ec = ECLASS_MISC;
1861    if (range < (1ULL << ECLASS_SHIFT))
1862       ec = range_to_eclass( guest_start, (UInt)range );
1863 
1864    /* if ec is ECLASS_MISC then we aren't looking at just a single
1865       class, so use the slow scheme.  Else use the fast scheme,
1866       examining 'ec' and ECLASS_MISC. */
1867 
1868    if (ec != ECLASS_MISC) {
1869 
1870       VG_(debugLog)(2, "transtab",
1871                        "                    FAST, ec = %d\n", ec);
1872 
1873       /* Fast scheme */
1874       vg_assert(ec >= 0 && ec < ECLASS_MISC);
1875 
1876       for (sno = 0; sno < N_SECTORS; sno++) {
1877          sec = &sectors[sno];
1878          if (sec->tc == NULL)
1879             continue;
1880          anyDeleted |= delete_translations_in_sector_eclass(
1881                           sec, sno, guest_start, range, ec,
1882                           vex_arch
1883                        );
1884          anyDeleted |= delete_translations_in_sector_eclass(
1885                           sec, sno, guest_start, range, ECLASS_MISC,
1886                           vex_arch
1887                        );
1888       }
1889 
1890    } else {
1891 
1892       /* slow scheme */
1893 
1894       VG_(debugLog)(2, "transtab",
1895                        "                    SLOW, ec = %d\n", ec);
1896 
1897       for (sno = 0; sno < N_SECTORS; sno++) {
1898          sec = &sectors[sno];
1899          if (sec->tc == NULL)
1900             continue;
1901          anyDeleted |= delete_translations_in_sector(
1902                           sec, sno, guest_start, range, vex_arch );
1903       }
1904 
1905    }
1906 
1907    if (anyDeleted)
1908       invalidateFastCache();
1909 
1910    /* don't forget the no-redir cache */
1911    unredir_discard_translations( guest_start, range );
1912 
1913    /* Post-deletion sanity check */
1914    if (VG_(clo_sanity_level >= 4)) {
1915       Int      i;
1916       TTEntry* tte;
1917       Bool     sane = sanity_check_all_sectors();
1918       vg_assert(sane);
1919       /* But now, also check the requested address range isn't
1920          present anywhere. */
1921       for (sno = 0; sno < N_SECTORS; sno++) {
1922          sec = &sectors[sno];
1923          if (sec->tc == NULL)
1924             continue;
1925          for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1926             tte = &sec->tt[i];
1927             if (tte->status != InUse)
1928                continue;
1929             vg_assert(!overlaps( guest_start, range, &tte->vge ));
1930          }
1931       }
1932    }
1933 }
1934 
1935 
1936 /*------------------------------------------------------------*/
1937 /*--- AUXILIARY: the unredirected TT/TC                    ---*/
1938 /*------------------------------------------------------------*/
1939 
1940 /* A very simple translation cache which holds a small number of
1941    unredirected translations.  This is completely independent of the
1942    main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
1943    both structures are simply dumped and we start over.
1944 
1945    Since these translations are unredirected, the search key is (by
1946    definition) the first address entry in the .vge field. */
1947 
1948 /* Sized to hold 500 translations of average size 1000 bytes. */
1949 
1950 #define UNREDIR_SZB   1000
1951 
1952 #define N_UNREDIR_TT  500
1953 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
1954 
1955 typedef
1956    struct {
1957       VexGuestExtents vge;
1958       Addr            hcode;
1959       Bool            inUse;
1960    }
1961    UTCEntry;
1962 
1963 /* We just allocate forwards in _tc, never deleting. */
1964 static ULong    *unredir_tc;
1965 static Int      unredir_tc_used = N_UNREDIR_TCQ;
1966 
1967 /* Slots in _tt can come into use and out again (.inUse).
1968    Nevertheless _tt_highwater is maintained so that invalidations
1969    don't have to scan all the slots when only a few are in use.
1970    _tt_highwater holds the index of the highest ever allocated
1971    slot. */
1972 static UTCEntry unredir_tt[N_UNREDIR_TT];
1973 static Int      unredir_tt_highwater;
1974 
1975 
init_unredir_tt_tc(void)1976 static void init_unredir_tt_tc ( void )
1977 {
1978    Int i;
1979    if (unredir_tc == NULL) {
1980       SysRes sres = VG_(am_mmap_anon_float_valgrind)
1981                        ( N_UNREDIR_TT * UNREDIR_SZB );
1982       if (sr_isError(sres)) {
1983          VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
1984                                      N_UNREDIR_TT * UNREDIR_SZB);
1985          /*NOTREACHED*/
1986       }
1987       unredir_tc = (ULong *)(AddrH)sr_Res(sres);
1988    }
1989    unredir_tc_used = 0;
1990    for (i = 0; i < N_UNREDIR_TT; i++)
1991       unredir_tt[i].inUse = False;
1992    unredir_tt_highwater = -1;
1993 }
1994 
1995 /* Do a sanity check; return False on failure. */
sanity_check_redir_tt_tc(void)1996 static Bool sanity_check_redir_tt_tc ( void )
1997 {
1998    Int i;
1999    if (unredir_tt_highwater < -1) return False;
2000    if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
2001 
2002    for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
2003       if (unredir_tt[i].inUse)
2004          return False;
2005 
2006    if (unredir_tc_used < 0) return False;
2007    if (unredir_tc_used > N_UNREDIR_TCQ) return False;
2008 
2009    return True;
2010 }
2011 
2012 
2013 /* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
2014    is temporarily in code[0 .. code_len-1].
2015 */
VG_(add_to_unredir_transtab)2016 void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
2017                                    Addr64           entry,
2018                                    AddrH            code,
2019                                    UInt             code_len )
2020 {
2021    Int   i, j, code_szQ;
2022    HChar *srcP, *dstP;
2023 
2024    vg_assert(sanity_check_redir_tt_tc());
2025 
2026    /* This is the whole point: it's not redirected! */
2027    vg_assert(entry == vge->base[0]);
2028 
2029    /* How many unredir_tt slots are needed */
2030    code_szQ = (code_len + 7) / 8;
2031 
2032    /* Look for an empty unredir_tc slot */
2033    for (i = 0; i < N_UNREDIR_TT; i++)
2034       if (!unredir_tt[i].inUse)
2035          break;
2036 
2037    if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
2038       /* It's full; dump everything we currently have */
2039       init_unredir_tt_tc();
2040       i = 0;
2041    }
2042 
2043    vg_assert(unredir_tc_used >= 0);
2044    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2045    vg_assert(code_szQ > 0);
2046    vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
2047    vg_assert(i >= 0 && i < N_UNREDIR_TT);
2048    vg_assert(unredir_tt[i].inUse == False);
2049 
2050    if (i > unredir_tt_highwater)
2051       unredir_tt_highwater = i;
2052 
2053    dstP = (HChar*)&unredir_tc[unredir_tc_used];
2054    srcP = (HChar*)code;
2055    for (j = 0; j < code_len; j++)
2056       dstP[j] = srcP[j];
2057 
2058    VG_(invalidate_icache)( dstP, code_len );
2059 
2060    unredir_tt[i].inUse = True;
2061    unredir_tt[i].vge   = *vge;
2062    unredir_tt[i].hcode = (Addr)dstP;
2063 
2064    unredir_tc_used += code_szQ;
2065    vg_assert(unredir_tc_used >= 0);
2066    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2067 
2068    vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
2069 }
2070 
VG_(search_unredir_transtab)2071 Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
2072                                     Addr64        guest_addr )
2073 {
2074    Int i;
2075    for (i = 0; i < N_UNREDIR_TT; i++) {
2076       if (!unredir_tt[i].inUse)
2077          continue;
2078       if (unredir_tt[i].vge.base[0] == guest_addr) {
2079          *result = (AddrH)unredir_tt[i].hcode;
2080          return True;
2081       }
2082    }
2083    return False;
2084 }
2085 
unredir_discard_translations(Addr64 guest_start,ULong range)2086 static void unredir_discard_translations( Addr64 guest_start, ULong range )
2087 {
2088    Int i;
2089 
2090    vg_assert(sanity_check_redir_tt_tc());
2091 
2092    for (i = 0; i <= unredir_tt_highwater; i++) {
2093       if (unredir_tt[i].inUse
2094           && overlaps( guest_start, range, &unredir_tt[i].vge))
2095          unredir_tt[i].inUse = False;
2096    }
2097 }
2098 
2099 
2100 /*------------------------------------------------------------*/
2101 /*--- Initialisation.                                      ---*/
2102 /*------------------------------------------------------------*/
2103 
VG_(init_tt_tc)2104 void VG_(init_tt_tc) ( void )
2105 {
2106    Int i, j, avg_codeszQ;
2107 
2108    vg_assert(!init_done);
2109    init_done = True;
2110 
2111    /* Otherwise lots of things go wrong... */
2112    vg_assert(sizeof(ULong) == 8);
2113    vg_assert(sizeof(Addr64) == 8);
2114    /* check fast cache entries really are 2 words long */
2115    vg_assert(sizeof(Addr) == sizeof(void*));
2116    vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
2117    /* check fast cache entries are packed back-to-back with no spaces */
2118    vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
2119    /* check fast cache is aligned as we requested.  Not fatal if it
2120       isn't, but we might as well make sure. */
2121    vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
2122 
2123    if (VG_(clo_verbosity) > 2)
2124       VG_(message)(Vg_DebugMsg,
2125                    "TT/TC: VG_(init_tt_tc) "
2126                    "(startup of code management)\n");
2127 
2128    /* Figure out how big each tc area should be.  */
2129    avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
2130    tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
2131 
2132    /* Ensure the calculated value is not way crazy. */
2133    vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
2134    vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR_USABLE);
2135 
2136    /* Initialise the sectors */
2137    youngest_sector = 0;
2138    for (i = 0; i < N_SECTORS; i++) {
2139       sectors[i].tc = NULL;
2140       sectors[i].tt = NULL;
2141       sectors[i].tc_next = NULL;
2142       sectors[i].tt_n_inuse = 0;
2143       for (j = 0; j < ECLASS_N; j++) {
2144          sectors[i].ec2tte_size[j] = 0;
2145          sectors[i].ec2tte_used[j] = 0;
2146          sectors[i].ec2tte[j] = NULL;
2147       }
2148       sectors[i].host_extents = NULL;
2149    }
2150 
2151    /* Initialise the sector_search_order hint table. */
2152    for (i = 0; i < N_SECTORS; i++)
2153       sector_search_order[i] = -1;
2154 
2155    /* Initialise the fast cache. */
2156    invalidateFastCache();
2157 
2158    /* and the unredir tt/tc */
2159    init_unredir_tt_tc();
2160 
2161    if (VG_(clo_verbosity) > 2) {
2162       VG_(message)(Vg_DebugMsg,
2163          "TT/TC: cache: %d sectors of %d bytes each = %d total\n",
2164           N_SECTORS, 8 * tc_sector_szQ,
2165           N_SECTORS * 8 * tc_sector_szQ );
2166       VG_(message)(Vg_DebugMsg,
2167          "TT/TC: table: %d total entries, max occupancy %d (%d%%)\n",
2168          N_SECTORS * N_TTES_PER_SECTOR,
2169          N_SECTORS * N_TTES_PER_SECTOR_USABLE,
2170          SECTOR_TT_LIMIT_PERCENT );
2171    }
2172 
2173    VG_(debugLog)(2, "transtab",
2174       "cache: %d sectors of %d bytes each = %d total\n",
2175        N_SECTORS, 8 * tc_sector_szQ,
2176        N_SECTORS * 8 * tc_sector_szQ );
2177    VG_(debugLog)(2, "transtab",
2178       "table: %d total entries, max occupancy %d (%d%%)\n",
2179       N_SECTORS * N_TTES_PER_SECTOR,
2180       N_SECTORS * N_TTES_PER_SECTOR_USABLE,
2181       SECTOR_TT_LIMIT_PERCENT );
2182 }
2183 
2184 
2185 /*------------------------------------------------------------*/
2186 /*--- Printing out statistics.                             ---*/
2187 /*------------------------------------------------------------*/
2188 
safe_idiv(ULong a,ULong b)2189 static ULong safe_idiv( ULong a, ULong b )
2190 {
2191    return (b == 0 ? 0 : a / b);
2192 }
2193 
VG_(get_bbs_translated)2194 UInt VG_(get_bbs_translated) ( void )
2195 {
2196    return n_in_count;
2197 }
2198 
VG_(print_tt_tc_stats)2199 void VG_(print_tt_tc_stats) ( void )
2200 {
2201    VG_(message)(Vg_DebugMsg,
2202       "    tt/tc: %'llu tt lookups requiring %'llu probes\n",
2203       n_full_lookups, n_lookup_probes );
2204    VG_(message)(Vg_DebugMsg,
2205       "    tt/tc: %'llu fast-cache updates, %'llu flushes\n",
2206       n_fast_updates, n_fast_flushes );
2207 
2208    VG_(message)(Vg_DebugMsg,
2209                 " transtab: new        %'lld "
2210                 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n",
2211                 n_in_count, n_in_osize, n_in_tsize,
2212                 safe_idiv(10*n_in_tsize, n_in_osize),
2213                 n_in_sc_count);
2214    VG_(message)(Vg_DebugMsg,
2215                 " transtab: dumped     %'llu (%'llu -> ?" "?)\n",
2216                 n_dump_count, n_dump_osize );
2217    VG_(message)(Vg_DebugMsg,
2218                 " transtab: discarded  %'llu (%'llu -> ?" "?)\n",
2219                 n_disc_count, n_disc_osize );
2220 
2221    if (0) {
2222       Int i;
2223       VG_(printf)("\n");
2224       for (i = 0; i < ECLASS_N; i++) {
2225          VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
2226          if (i % 16 == 15)
2227             VG_(printf)("\n");
2228       }
2229       VG_(printf)("\n\n");
2230    }
2231 }
2232 
2233 /*------------------------------------------------------------*/
2234 /*--- Printing out of profiling results.                   ---*/
2235 /*------------------------------------------------------------*/
2236 
score(TTEntry * tte)2237 static ULong score ( TTEntry* tte )
2238 {
2239    return ((ULong)tte->weight) * ((ULong)tte->count);
2240 }
2241 
VG_(get_BB_profile)2242 ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops )
2243 {
2244    Int   sno, i, r, s;
2245    ULong score_total;
2246 
2247    /* First, compute the total weighted count, and find the top N
2248       ttes.  tops contains pointers to the most-used n_tops blocks, in
2249       descending order (viz, tops[0] is the highest scorer). */
2250    for (i = 0; i < n_tops; i++) {
2251       tops[i].addr  = 0;
2252       tops[i].score = 0;
2253    }
2254 
2255    score_total = 0;
2256 
2257    for (sno = 0; sno < N_SECTORS; sno++) {
2258       if (sectors[sno].tc == NULL)
2259          continue;
2260       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2261          if (sectors[sno].tt[i].status != InUse)
2262             continue;
2263          score_total += score(&sectors[sno].tt[i]);
2264          /* Find the rank for sectors[sno].tt[i]. */
2265          r = n_tops-1;
2266          while (True) {
2267             if (r == -1)
2268                break;
2269              if (tops[r].addr == 0) {
2270                r--;
2271                continue;
2272              }
2273              if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
2274                 r--;
2275                 continue;
2276              }
2277              break;
2278          }
2279          r++;
2280          vg_assert(r >= 0 && r <= n_tops);
2281          /* This bb should be placed at r, and bbs above it shifted
2282             upwards one slot. */
2283          if (r < n_tops) {
2284             for (s = n_tops-1; s > r; s--)
2285                tops[s] = tops[s-1];
2286             tops[r].addr  = sectors[sno].tt[i].entry;
2287             tops[r].score = score( &sectors[sno].tt[i] );
2288          }
2289       }
2290    }
2291 
2292    return score_total;
2293 }
2294 
2295 /*--------------------------------------------------------------------*/
2296 /*--- end                                                          ---*/
2297 /*--------------------------------------------------------------------*/
2298