• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Management of the translation table and cache.               ---*/
4 /*---                                                 m_transtab.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of Valgrind, a dynamic binary instrumentation
9    framework.
10 
11    Copyright (C) 2000-2017 Julian Seward
12       jseward@acm.org
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27    02111-1307, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 */
31 
32 #include "pub_core_basics.h"
33 #include "pub_core_debuglog.h"
34 #include "pub_core_machine.h"    // For VG_(machine_get_VexArchInfo)
35 #include "pub_core_libcbase.h"
36 #include "pub_core_vki.h"        // to keep pub_core_libproc.h happy, sigh
37 #include "pub_core_libcproc.h"   // VG_(invalidate_icache)
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcprint.h"
40 #include "pub_core_options.h"
41 #include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
42 #include "pub_core_transtab.h"
43 #include "pub_core_aspacemgr.h"
44 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
45 #include "pub_core_xarray.h"
46 #include "pub_core_dispatch.h"   // For VG_(disp_cp*) addresses
47 
48 
49 #define DEBUG_TRANSTAB 0
50 
51 
52 /*-------------------------------------------------------------*/
53 /*--- Management of the FIFO-based translation table+cache. ---*/
54 /*-------------------------------------------------------------*/
55 
56 /* Nr of sectors provided via command line parameter. */
57 UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT;
58 /* Nr of sectors.
59    Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */
60 static SECno n_sectors = 0;
61 
62 /* Average size of a transtab code entry. 0 means to use the tool
63    provided default. */
64 UInt VG_(clo_avg_transtab_entry_size) = 0;
65 
66 /*------------------ CONSTANTS ------------------*/
67 /* Number of entries in hash table of each sector.  This needs to be a prime
68    number to work properly, it must be <= 65535 (so that a TTE index
69    fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED, HTT_DELETED)
70    to denote 'deleted') and  0xFFFE (HTT_EMPTY) to denote 'Empty' in the
71    hash table.
72    It is strongly recommended not to change this.
73    65521 is the largest prime <= 65535. */
74 #define N_HTTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
75 
76 #define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
77 #define HTT_DELETED     EC2TTE_DELETED
78 #define HTT_EMPTY       0XFFFE
79 
80 // HTTno is the Sector->htt hash table index. Must be the same type as TTEno.
81 typedef UShort HTTno;
82 
83 /* Because each sector contains a hash table of TTEntries, we need to
84    specify the maximum allowable loading, after which the sector is
85    deemed full. */
86 #define SECTOR_TT_LIMIT_PERCENT 65
87 
88 /* The sector is deemed full when this many entries are in it. */
89 #define N_TTES_PER_SECTOR \
90            ((N_HTTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
91 
92 /* Equivalence classes for fast address range deletion.  There are 1 +
93    2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
94    address range which does not fall cleanly within any specific bin.
95    Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32.
96    ECLASS_N must fit in a EclassNo. */
97 #define ECLASS_SHIFT 13
98 #define ECLASS_WIDTH 9
99 #define ECLASS_MISC  (1 << ECLASS_WIDTH)
100 #define ECLASS_N     (1 + ECLASS_MISC)
101 STATIC_ASSERT(ECLASS_SHIFT + ECLASS_WIDTH < 32);
102 
103 typedef UShort EClassNo;
104 
105 /*------------------ TYPES ------------------*/
106 
107 /* In edges ("to-me") in the graph created by chaining. */
108 typedef
109    struct {
110       SECno from_sNo;   /* sector number */
111       TTEno from_tteNo; /* TTE number in given sector */
112       UInt  from_offs: (sizeof(UInt)*8)-1;  /* code offset from TCEntry::tcptr
113                                                where the patch is */
114       Bool  to_fastEP:1; /* Is the patch to a fast or slow entry point? */
115    }
116    InEdge;
117 
118 
119 /* Out edges ("from-me") in the graph created by chaining. */
120 typedef
121    struct {
122       SECno to_sNo;    /* sector number */
123       TTEno to_tteNo;  /* TTE number in given sector */
124       UInt  from_offs; /* code offset in owning translation where patch is */
125    }
126    OutEdge;
127 
128 
129 #define N_FIXED_IN_EDGE_ARR 3
130 typedef
131    struct {
132       Bool     has_var:1; /* True if var is used (then n_fixed must be 0) */
133       UInt     n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_IN_EDGE_ARR */
134       union {
135          InEdge   fixed[N_FIXED_IN_EDGE_ARR];      /* if !has_var */
136          XArray*  var; /* XArray* of InEdgeArr */  /* if  has_var */
137       } edges;
138    }
139    InEdgeArr;
140 
141 #define N_FIXED_OUT_EDGE_ARR 2
142 typedef
143    struct {
144       Bool    has_var:1; /* True if var is used (then n_fixed must be 0) */
145       UInt    n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_OUT_EDGE_ARR */
146       union {
147          OutEdge fixed[N_FIXED_OUT_EDGE_ARR];      /* if !has_var */
148          XArray* var; /* XArray* of OutEdgeArr */  /* if  has_var */
149       } edges;
150    }
151    OutEdgeArr;
152 
153 
154 /* A translation-table entry.  This indicates precisely which areas of
155    guest code are included in the translation, and contains all other
156    auxiliary info too.  These are split into hold and cold parts,
157    TTEntryH and TTEntryC, so as to be more cache-friendly
158    (a.k.a. "faster") when searching for translations that intersect
159    with a certain guest code address range, for deletion.  In the
160    worst case this can involve a sequential scan through all the hot
161    parts, so they are packed as compactly as possible -- 32 bytes on a
162    64-bit platform, 20 bytes on a 32-bit platform.
163 
164    Each TTEntry is logically a matching cold-and-hot pair, and in fact
165    it was originally one structure.  First, the cold part.
166 */
167 typedef
168    struct {
169       union {
170          struct {
171             /* Profiling only: the count and weight (arbitrary meaning) for
172                this translation.  Weight is a property of the translation
173                itself and computed once when the translation is created.
174                Count is an entry count for the translation and is
175                incremented by 1 every time the translation is used, if we
176                are profiling. */
177             ULong    count;
178             UShort   weight;
179          } prof; // if status == InUse
180          TTEno next_empty_tte; // if status != InUse
181       } usage;
182 
183       /* 64-bit aligned pointer to one or more 64-bit words containing
184          the corresponding host code (must be in the same sector!)
185          This is a pointer into the sector's tc (code) area. */
186       ULong* tcptr;
187 
188       /* This is the original guest address that purportedly is the
189          entry point of the translation.  You might think that .entry
190          should be the same as .vge->base[0], and most of the time it
191          is.  However, when doing redirections, that is not the case.
192          .vge must always correctly describe the guest code sections
193          from which this translation was made.  However, .entry may or
194          may not be a lie, depending on whether or not we're doing
195          redirection. */
196       Addr entry;
197 
198       /* Address range summary info: these are pointers back to
199          eclass[] entries in the containing Sector.  Those entries in
200          turn point back here -- the two structures are mutually
201          redundant but both necessary to make fast deletions work.
202          The eclass info is similar to, and derived from, this entry's
203          'vge' field, but it is not the same */
204       UShort   n_tte2ec;      // # tte2ec pointers (1 to 3)
205       EClassNo tte2ec_ec[3];  // for each, the eclass #
206       UInt     tte2ec_ix[3];  // and the index within the eclass.
207       // for i in 0 .. n_tte2ec-1
208       //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
209       // should be the index
210       // of this TTEntry in the containing Sector's tt array.
211 
212       /* Admin information for chaining.  'in_edges' is a set of the
213          patch points which jump to this translation -- hence are
214          predecessors in the control flow graph.  'out_edges' points
215          to successors in the control flow graph -- translations to
216          which this one has a patched jump.  In short these are just
217          backwards and forwards edges in the graph of patched-together
218          blocks.  The 'in_edges' contain slightly more info, enough
219          that we can undo the chaining of each mentioned patch point.
220          The 'out_edges' list exists only so that we can visit the
221          'in_edges' entries of all blocks we're patched through to, in
222          order to remove ourselves from then when we're deleted. */
223 
224       /* A translation can disappear for two reasons:
225           1. erased (as part of the oldest sector cleanup) when the
226              youngest sector is full.
227           2. discarded due to calls to VG_(discard_translations).
228              VG_(discard_translations) sets the status of the
229              translation to 'Deleted'.
230              A.o., the gdbserver discards one or more translations
231              when a breakpoint is inserted or removed at an Addr,
232              or when single stepping mode is enabled/disabled
233              or when a translation is instrumented for gdbserver
234              (all the target jumps of this translation are
235               invalidated).
236 
237          So, it is possible that the translation A to be patched
238          (to obtain a patched jump from A to B) is invalidated
239          after B is translated and before A is patched.
240          In case a translation is erased or discarded, the patching
241          cannot be done.  VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
242          are checking the 'from' translation still exists before
243          doing the patching.
244 
245          Is it safe to erase or discard the current translation E being
246          executed ? Amazing, but yes, it is safe.
247          Here is the explanation:
248 
249          The translation E being executed can only be erased if a new
250          translation N is being done. A new translation is done only
251          if the host addr is a not yet patched jump to another
252          translation. In such a case, the guest address of N is
253          assigned to the PC in the VEX state. Control is returned
254          to the scheduler. N will be translated. This can erase the
255          translation E (in case of sector full). VG_(tt_tc_do_chaining)
256          will not do the chaining to a non found translation E.
257          The execution will continue at the current guest PC
258          (i.e. the translation N).
259          => it is safe to erase the current translation being executed.
260 
261          The current translation E being executed can also be discarded
262          (e.g. by gdbserver). VG_(discard_translations) will mark
263          this translation E as Deleted, but the translation itself
264          is not erased. In particular, its host code can only
265          be overwritten or erased in case a new translation is done.
266          A new translation will only be done if a not yet translated
267          jump is to be executed. The execution of the Deleted translation
268          E will continue till a non patched jump is encountered.
269          This situation is then similar to the 'erasing' case above :
270          the current translation E can be erased or overwritten, as the
271          execution will continue at the new translation N.
272       */
273 
274       /* It is possible, although very unlikely, that a block A has
275          more than one patched jump to block B.  This could happen if
276          (eg) A finishes "jcond B; jmp B".
277 
278          This means in turn that B's in_edges set can list A more than
279          once (twice in this example).  However, each such entry must
280          have a different from_offs, since a patched jump can only
281          jump to one place at once (it's meaningless for it to have
282          multiple destinations.)  IOW, the successor and predecessor
283          edges in the graph are not uniquely determined by a
284          TTEntry --> TTEntry pair, but rather by a
285          (TTEntry,offset) --> TTEntry triple.
286 
287          If A has multiple edges to B then B will mention A multiple
288          times in its in_edges.  To make things simpler, we then
289          require that A mentions B exactly the same number of times in
290          its out_edges.  Furthermore, a matching out-in pair must have
291          the same offset (from_offs).  This facilitates sanity
292          checking, and it facilitates establishing the invariant that
293          a out_edges set may not have duplicates when using the
294          equality defined by (TTEntry,offset).  Hence the out_edges
295          and in_edges sets really do have both have set semantics.
296 
297          eg if  A has been patched to B at offsets 42 and 87 (in A)
298          then   A.out_edges = { (B,42), (B,87) }   (in any order)
299          and    B.in_edges  = { (A,42), (A,87) }   (in any order)
300 
301          Hence for each node pair P->Q in the graph, there's a 1:1
302          mapping between P.out_edges and Q.in_edges.
303       */
304       InEdgeArr  in_edges;
305       OutEdgeArr out_edges;
306    }
307    TTEntryC;
308 
309 /* And this is the hot part. */
310 typedef
311    struct {
312       /* This structure describes precisely what ranges of guest code
313          the translation covers, so we can decide whether or not to
314          delete it when translations of a given address range are
315          invalidated.  Originally this was a VexGuestExtents, but that
316          itself is 32 bytes on a 64-bit target, and we really want to
317          squeeze in an 8-bit |status| field into the 32 byte field, so
318          as to get 2 of them in a 64 byte LLC line.  Hence the
319          VexGuestExtents fields are inlined, the _n_used field is
320          changed to a UChar (it only ever has the values 1, 2 or 3)
321          and the 8-bit status field is placed in byte 31 of the
322          structure. */
323       /* ORIGINALLY: VexGuestExtents vge; */
324       Addr   vge_base[3];
325       UShort vge_len[3];
326       UChar  vge_n_used;  /* BEWARE: is UShort in VexGuestExtents */
327 
328       /* Status of the slot.  Note, we need to be able to do lazy
329          deletion, hence the Deleted state. */
330       enum { InUse, Deleted, Empty } status : 8;
331    }
332    TTEntryH;
333 
334 
335 /* Impedance matchers, that convert between a VexGuestExtents and a
336    TTEntryH, ignoring TTEntryH::status, which doesn't exist in a
337    VexGuestExtents -- it is entirely unrelated. */
338 
339 /* Takes a VexGuestExtents and pushes it into a TTEntryH.  The
340    TTEntryH.status field is left unchanged. */
341 static
TTEntryH__from_VexGuestExtents(TTEntryH * tteH,const VexGuestExtents * vge)342 inline void TTEntryH__from_VexGuestExtents ( /*MOD*/TTEntryH* tteH,
343                                              const VexGuestExtents* vge )
344 {
345    tteH->vge_base[0] = vge->base[0];
346    tteH->vge_base[1] = vge->base[1];
347    tteH->vge_base[2] = vge->base[2];
348    tteH->vge_len[0]  = vge->len[0];
349    tteH->vge_len[1]  = vge->len[1];
350    tteH->vge_len[2]  = vge->len[2];
351    tteH->vge_n_used  = (UChar)vge->n_used; /* BEWARE: no range check. */
352 }
353 
354 /* Takes a TTEntryH and pushes the vge_ components into a VexGuestExtents. */
355 static
TTEntryH__to_VexGuestExtents(VexGuestExtents * vge,const TTEntryH * tteH)356 inline void TTEntryH__to_VexGuestExtents ( /*MOD*/VexGuestExtents* vge,
357                                            const TTEntryH* tteH )
358 {
359    vge->base[0] = tteH->vge_base[0];
360    vge->base[1] = tteH->vge_base[1];
361    vge->base[2] = tteH->vge_base[2];
362    vge->len[0]  = tteH->vge_len[0] ;
363    vge->len[1]  = tteH->vge_len[1] ;
364    vge->len[2]  = tteH->vge_len[2] ;
365    vge->n_used  = (UShort)tteH->vge_n_used ;
366 }
367 
368 
369 /* A structure used for mapping host code addresses back to the
370    relevant TTEntry.  Used when doing chaining, for finding the
371    TTEntry to which some arbitrary patch address belongs. */
372 typedef
373    struct {
374       UChar* start;
375       UInt   len;
376       TTEno  tteNo;
377    }
378    HostExtent;
379 
380 /* Finally, a sector itself.  Each sector contains an array of
381    TCEntries, which hold code, and an array of TTEntries, containing
382    all required administrative info.  Profiling is supported using the
383    TTEntry usage.prof.count and usage.prof.weight fields, if required.
384 
385    If the sector is not in use, all three pointers are NULL and
386    tt_n_inuse is zero.
387 */
388 typedef
389    struct {
390       /* The TCEntry area.  Size of this depends on the average
391          translation size.  We try and size it so it becomes full
392          precisely when this sector's translation table (tt) reaches
393          its load limit (SECTOR_TT_LIMIT_PERCENT). */
394       ULong* tc;
395 
396       /* An hash table, mapping guest address to an index in the tt array.
397          htt is a fixed size, always containing
398          exactly N_HTTES_PER_SECTOR entries. */
399       TTEno* htt;
400 
401       /* The TTEntry{C,H} arrays.  These are a fixed size, always
402          containing exactly N_TTES_PER_SECTOR entries. */
403       TTEntryC* ttC;
404       TTEntryH* ttH;
405 
406       /* This points to the current allocation point in tc. */
407       ULong* tc_next;
408 
409       /* The count of tt entries with state InUse. */
410       Int tt_n_inuse;
411 
412       /* A list of Empty/Deleted entries, chained by tte->next_empty_tte */
413       TTEno empty_tt_list;
414 
415       /* Expandable arrays of tt indices for each of the ECLASS_N
416          address range equivalence classes.  These hold indices into
417          the containing sector's tt array, which in turn should point
418          back here. */
419       Int     ec2tte_size[ECLASS_N];
420       Int     ec2tte_used[ECLASS_N];
421       TTEno*  ec2tte[ECLASS_N];
422 
423       /* The host extents.  The [start, +len) ranges are constructed
424          in strictly non-overlapping order, so we can binary search
425          them at any time. */
426       XArray* host_extents; /* XArray* of HostExtent */
427    }
428    Sector;
429 
430 
431 /*------------------ DECLS ------------------*/
432 
433 /* The root data structure is an array of sectors.  The index of the
434    youngest sector is recorded, and new translations are put into that
435    sector.  When it fills up, we move along to the next sector and
436    start to fill that up, wrapping around at the end of the array.
437    That way, once all N_TC_SECTORS have been bought into use for the
438    first time, and are full, we then re-use the oldest sector,
439    endlessly.
440 
441    When running, youngest sector should be between >= 0 and <
442    N_TC_SECTORS.  The initial  value indicates the TT/TC system is
443    not yet initialised.
444 */
445 static Sector sectors[MAX_N_SECTORS];
446 static Int    youngest_sector = INV_SNO;
447 
448 /* The number of ULongs in each TCEntry area.  This is computed once
449    at startup and does not change. */
450 static Int    tc_sector_szQ = 0;
451 
452 
453 /* A list of sector numbers, in the order which they should be
454    searched to find translations.  This is an optimisation to be used
455    when searching for translations and should not affect
456    correctness.  INV_SNO denotes "no entry". */
457 static SECno sector_search_order[MAX_N_SECTORS];
458 
459 
460 /* Fast helper for the TC.  A direct-mapped cache which holds a set of
461    recently used (guest address, host address) pairs.  This array is
462    referred to directly from m_dispatch/dispatch-<platform>.S.
463 
464    Entries in tt_fast may refer to any valid TC entry, regardless of
465    which sector it's in.  Consequently we must be very careful to
466    invalidate this cache when TC entries are changed or disappear.
467 
468    A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
469    pointed at to cause that cache entry to miss.  This relies on the
470    assumption that no guest code actually has that address, hence a
471    value 0x1 seems good.  m_translate gives the client a synthetic
472    segfault if it tries to execute at this address.
473 */
474 /*
475 typedef
476    struct {
477       Addr guest;
478       Addr host;
479    }
480    FastCacheEntry;
481 */
482 /*global*/ __attribute__((aligned(16)))
483            FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
484 
485 /* Make sure we're not used before initialisation. */
486 static Bool init_done = False;
487 
488 
489 /*------------------ STATS DECLS ------------------*/
490 
491 /* Number of fast-cache updates and flushes done. */
492 static ULong n_fast_flushes = 0;
493 static ULong n_fast_updates = 0;
494 
495 /* Number of full lookups done. */
496 static ULong n_full_lookups = 0;
497 static ULong n_lookup_probes = 0;
498 
499 /* Number/osize/tsize of translations entered; also the number of
500    those for which self-checking was requested. */
501 static ULong n_in_count    = 0;
502 static ULong n_in_osize    = 0;
503 static ULong n_in_tsize    = 0;
504 static ULong n_in_sc_count = 0;
505 
506 /* Number/osize of translations discarded due to lack of space. */
507 static ULong n_dump_count = 0;
508 static ULong n_dump_osize = 0;
509 static ULong n_sectors_recycled = 0;
510 
511 /* Number/osize of translations discarded due to requests to do so. */
512 static ULong n_disc_count = 0;
513 static ULong n_disc_osize = 0;
514 
515 
516 /*-------------------------------------------------------------*/
517 /*--- Misc                                                  ---*/
518 /*-------------------------------------------------------------*/
519 
ttaux_malloc(const HChar * tag,SizeT n)520 static void* ttaux_malloc ( const HChar* tag, SizeT n )
521 {
522    return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
523 }
524 
ttaux_free(void * p)525 static void ttaux_free ( void* p )
526 {
527    VG_(arena_free)(VG_AR_TTAUX, p);
528 }
529 
530 
531 /*-------------------------------------------------------------*/
532 /*--- Chaining support                                      ---*/
533 /*-------------------------------------------------------------*/
534 
index_tteC(SECno sNo,TTEno tteNo)535 static inline TTEntryC* index_tteC ( SECno sNo, TTEno tteNo )
536 {
537    vg_assert(sNo < n_sectors);
538    vg_assert(tteNo < N_TTES_PER_SECTOR);
539    Sector* s = &sectors[sNo];
540    vg_assert(s->ttC && s->ttH);
541    TTEntryC* tteC = &s->ttC[tteNo];
542    TTEntryH* tteH = &s->ttH[tteNo];
543    vg_assert(tteH->status == InUse);
544    return tteC;
545 }
546 
index_tteH(SECno sNo,TTEno tteNo)547 static inline TTEntryH* index_tteH ( SECno sNo, TTEno tteNo )
548 {
549    vg_assert(sNo < n_sectors);
550    vg_assert(tteNo < N_TTES_PER_SECTOR);
551    Sector* s = &sectors[sNo];
552    vg_assert(s->ttH);
553    TTEntryH* tteH = &s->ttH[tteNo];
554    vg_assert(tteH->status == InUse);
555    return tteH;
556 }
557 
InEdge__init(InEdge * ie)558 static void InEdge__init ( InEdge* ie )
559 {
560    ie->from_sNo   = INV_SNO; /* invalid */
561    ie->from_tteNo = 0;
562    ie->from_offs  = 0;
563    ie->to_fastEP  = False;
564 }
565 
OutEdge__init(OutEdge * oe)566 static void OutEdge__init ( OutEdge* oe )
567 {
568    oe->to_sNo    = INV_SNO; /* invalid */
569    oe->to_tteNo  = 0;
570    oe->from_offs = 0;
571 }
572 
TTEntryC__init(TTEntryC * tteC)573 static void TTEntryC__init ( TTEntryC* tteC )
574 {
575    VG_(bzero_inline)(tteC, sizeof(*tteC));
576 }
577 
TTEntryH__init(TTEntryH * tteH)578 static void TTEntryH__init ( TTEntryH* tteH )
579 {
580    VG_(bzero_inline)(tteH, sizeof(*tteH));
581 }
582 
InEdgeArr__size(const InEdgeArr * iea)583 static UWord InEdgeArr__size ( const InEdgeArr* iea )
584 {
585    if (iea->has_var) {
586       vg_assert(iea->n_fixed == 0);
587       return VG_(sizeXA)(iea->edges.var);
588    } else {
589       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
590       return iea->n_fixed;
591    }
592 }
593 
InEdgeArr__makeEmpty(InEdgeArr * iea)594 static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
595 {
596    if (iea->has_var) {
597       vg_assert(iea->n_fixed == 0);
598       VG_(deleteXA)(iea->edges.var);
599       iea->edges.var = NULL;
600       iea->has_var = False;
601    } else {
602       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
603       iea->n_fixed = 0;
604    }
605 }
606 
607 static
InEdgeArr__index(InEdgeArr * iea,UWord i)608 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
609 {
610    if (iea->has_var) {
611       vg_assert(iea->n_fixed == 0);
612       return (InEdge*)VG_(indexXA)(iea->edges.var, i);
613    } else {
614       vg_assert(i < iea->n_fixed);
615       return &iea->edges.fixed[i];
616    }
617 }
618 
619 static
InEdgeArr__deleteIndex(InEdgeArr * iea,UWord i)620 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
621 {
622    if (iea->has_var) {
623       vg_assert(iea->n_fixed == 0);
624       VG_(removeIndexXA)(iea->edges.var, i);
625    } else {
626       vg_assert(i < iea->n_fixed);
627       for (; i+1 < iea->n_fixed; i++) {
628          iea->edges.fixed[i] = iea->edges.fixed[i+1];
629       }
630       iea->n_fixed--;
631    }
632 }
633 
634 static
InEdgeArr__add(InEdgeArr * iea,InEdge * ie)635 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
636 {
637    if (iea->has_var) {
638       vg_assert(iea->n_fixed == 0);
639       VG_(addToXA)(iea->edges.var, ie);
640    } else {
641       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
642       if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
643          /* The fixed array is full, so we have to initialise an
644             XArray and copy the fixed array into it. */
645          XArray *var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
646                                   ttaux_free,
647                                   sizeof(InEdge));
648          VG_(hintSizeXA) (var, iea->n_fixed + 1);
649          UWord i;
650          for (i = 0; i < iea->n_fixed; i++) {
651             VG_(addToXA)(var, &iea->edges.fixed[i]);
652          }
653          VG_(addToXA)(var, ie);
654          iea->n_fixed = 0;
655          iea->has_var = True;
656          iea->edges.var = var;
657       } else {
658          /* Just add to the fixed array. */
659          iea->edges.fixed[iea->n_fixed++] = *ie;
660       }
661    }
662 }
663 
OutEdgeArr__size(const OutEdgeArr * oea)664 static UWord OutEdgeArr__size ( const OutEdgeArr* oea )
665 {
666    if (oea->has_var) {
667       vg_assert(oea->n_fixed == 0);
668       return VG_(sizeXA)(oea->edges.var);
669    } else {
670       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
671       return oea->n_fixed;
672    }
673 }
674 
OutEdgeArr__makeEmpty(OutEdgeArr * oea)675 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
676 {
677    if (oea->has_var) {
678       vg_assert(oea->n_fixed == 0);
679       VG_(deleteXA)(oea->edges.var);
680       oea->edges.var = NULL;
681       oea->has_var = False;
682    } else {
683       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
684       oea->n_fixed = 0;
685    }
686 }
687 
688 static
OutEdgeArr__index(OutEdgeArr * oea,UWord i)689 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
690 {
691    if (oea->has_var) {
692       vg_assert(oea->n_fixed == 0);
693       return (OutEdge*)VG_(indexXA)(oea->edges.var, i);
694    } else {
695       vg_assert(i < oea->n_fixed);
696       return &oea->edges.fixed[i];
697    }
698 }
699 
700 static
OutEdgeArr__deleteIndex(OutEdgeArr * oea,UWord i)701 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
702 {
703    if (oea->has_var) {
704       vg_assert(oea->n_fixed == 0);
705       VG_(removeIndexXA)(oea->edges.var, i);
706    } else {
707       vg_assert(i < oea->n_fixed);
708       for (; i+1 < oea->n_fixed; i++) {
709          oea->edges.fixed[i] = oea->edges.fixed[i+1];
710       }
711       oea->n_fixed--;
712    }
713 }
714 
715 static
OutEdgeArr__add(OutEdgeArr * oea,OutEdge * oe)716 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
717 {
718    if (oea->has_var) {
719       vg_assert(oea->n_fixed == 0);
720       VG_(addToXA)(oea->edges.var, oe);
721    } else {
722       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
723       if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
724          /* The fixed array is full, so we have to initialise an
725             XArray and copy the fixed array into it. */
726          XArray *var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
727                                   ttaux_free,
728                                   sizeof(OutEdge));
729          VG_(hintSizeXA) (var, oea->n_fixed+1);
730          UWord i;
731          for (i = 0; i < oea->n_fixed; i++) {
732             VG_(addToXA)(var, &oea->edges.fixed[i]);
733          }
734          VG_(addToXA)(var, oe);
735          oea->n_fixed = 0;
736          oea->has_var = True;
737          oea->edges.var = var;
738       } else {
739          /* Just add to the fixed array. */
740          oea->edges.fixed[oea->n_fixed++] = *oe;
741       }
742    }
743 }
744 
745 static
HostExtent__cmpOrd(const void * v1,const void * v2)746 Int HostExtent__cmpOrd ( const void* v1, const void* v2 )
747 {
748    const HostExtent* hx1 = v1;
749    const HostExtent* hx2 = v2;
750    if (hx1->start + hx1->len <= hx2->start) return -1;
751    if (hx2->start + hx2->len <= hx1->start) return 1;
752    return 0; /* partial overlap */
753 }
754 
755 /* True if hx is a dead host extent, i.e. corresponds to host code
756    of an entry that was invalidated. */
757 static
HostExtent__is_dead(const HostExtent * hx,const Sector * sec)758 Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec)
759 {
760    const TTEno tteNo = hx->tteNo;
761 #define LDEBUG(m) if (DEBUG_TRANSTAB)                           \
762       VG_(printf) (m                                            \
763                    " start 0x%p len %u sector %d ttslot %u"     \
764                    " tt.entry 0x%lu tt.tcptr 0x%p\n",           \
765                    hx->start, hx->len, (int)(sec - sectors),    \
766                    hx->tteNo,                                   \
767                    sec->ttC[tteNo].entry, sec->ttC[tteNo].tcptr)
768 
769    /* Entry might have been invalidated and not re-used yet.*/
770    if (sec->ttH[tteNo].status == Deleted) {
771       LDEBUG("found deleted entry");
772       return True;
773    }
774    /* Maybe we found this entry via a host_extents which was
775       inserted for an entry which was changed to Deleted then
776       re-used after. If this entry was re-used, then its tcptr
777       is >= to host_extents start (i.e. the previous tcptr) + len.
778       This is the case as there is no re-use of host code: a new
779       entry or re-used entry always gets "higher value" host code. */
780    if ((UChar*) sec->ttC[tteNo].tcptr >= hx->start + hx->len) {
781       LDEBUG("found re-used entry");
782       return True;
783    }
784 
785    return False;
786 #undef LDEBUG
787 }
788 
789 static __attribute__((noinline))
find_TTEntry_from_hcode(SECno * from_sNo,TTEno * from_tteNo,void * hcode)790 Bool find_TTEntry_from_hcode( /*OUT*/SECno* from_sNo,
791                               /*OUT*/TTEno* from_tteNo,
792                               void* hcode )
793 {
794    SECno i;
795 
796    /* Search order logic copied from VG_(search_transtab). */
797    for (i = 0; i < n_sectors; i++) {
798       SECno sno = sector_search_order[i];
799       if (UNLIKELY(sno == INV_SNO))
800          return False; /* run out of sectors to search */
801 
802       const Sector* sec = &sectors[sno];
803       const XArray* /* of HostExtent */ host_extents = sec->host_extents;
804       vg_assert(host_extents);
805 
806       HostExtent key;
807       VG_(memset)(&key, 0, sizeof(key));
808       key.start = hcode;
809       key.len = 1;
810       Word firstW = -1, lastW = -1;
811       Bool found  = VG_(lookupXA_UNSAFE)(
812                        host_extents, &key, &firstW, &lastW,
813                        HostExtent__cmpOrd );
814       vg_assert(firstW == lastW); // always true, even if not found
815       if (found) {
816          HostExtent* hx = VG_(indexXA)(host_extents, firstW);
817          TTEno tteNo = hx->tteNo;
818          /* Do some additional sanity checks. */
819          vg_assert(tteNo < N_TTES_PER_SECTOR);
820 
821          /* if this hx entry corresponds to dead host code, we must
822             tell this code has not been found, as it cannot be patched. */
823          if (HostExtent__is_dead (hx, sec))
824             return False;
825 
826          vg_assert(sec->ttH[tteNo].status == InUse);
827          /* Can only half check that the found TTEntry contains hcode,
828             due to not having a length value for the hcode in the
829             TTEntry. */
830          vg_assert((UChar*)sec->ttC[tteNo].tcptr <= (UChar*)hcode);
831          /* Looks plausible */
832          *from_sNo   = sno;
833          *from_tteNo = tteNo;
834          return True;
835       }
836    }
837    return False;
838 }
839 
840 
841 /* Figure out whether or not hcode is jitted code present in the main
842    code cache (but not in the no-redir cache).  Used for sanity
843    checking. */
is_in_the_main_TC(const void * hcode)844 static Bool is_in_the_main_TC ( const void* hcode )
845 {
846    SECno i, sno;
847    for (i = 0; i < n_sectors; i++) {
848       sno = sector_search_order[i];
849       if (sno == INV_SNO)
850          break; /* run out of sectors to search */
851       if ((const UChar*)hcode >= (const UChar*)sectors[sno].tc
852           && (const UChar*)hcode <= (const UChar*)sectors[sno].tc_next
853                               + sizeof(ULong) - 1)
854          return True;
855    }
856    return False;
857 }
858 
859 
860 /* Fulfill a chaining request, and record admin info so we
861    can undo it later, if required.
862 */
VG_(tt_tc_do_chaining)863 void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
864                               SECno to_sNo,
865                               TTEno to_tteNo,
866                               Bool  to_fastEP )
867 {
868    /* Get the CPU info established at startup. */
869    VexArch     arch_host = VexArch_INVALID;
870    VexArchInfo archinfo_host;
871    VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
872    VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
873    VexEndness endness_host = archinfo_host.endness;
874 
875    // host_code is where we're patching to.  So it needs to
876    // take into account, whether we're jumping to the slow
877    // or fast entry point.  By definition, the fast entry point
878    // is exactly one event check's worth of code along from
879    // the slow (tcptr) entry point.
880    TTEntryC* to_tteC   = index_tteC(to_sNo, to_tteNo);
881    void*     host_code = ((UChar*)to_tteC->tcptr)
882                          + (to_fastEP ? LibVEX_evCheckSzB(arch_host) : 0);
883 
884    // stay sane -- the patch point (dst) is in this sector's code cache
885    vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
886    vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
887                                    + sizeof(ULong) - 1 );
888 
889    /* Find the TTEntry for the from__ code.  This isn't simple since
890       we only know the patch address, which is going to be somewhere
891       inside the from_ block. */
892    SECno from_sNo   = INV_SNO;
893    TTEno from_tteNo = INV_TTE;
894    Bool from_found
895       = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
896                                  from__patch_addr );
897    if (!from_found) {
898       // The from code might have been discarded due to sector re-use
899       // or marked Deleted due to translation invalidation.
900       // In such a case, don't do the chaining.
901       VG_(debugLog)(1,"transtab",
902                     "host code %p not found (discarded? sector recycled?)"
903                     " => no chaining done\n",
904                     from__patch_addr);
905       return;
906    }
907 
908    TTEntryC* from_tteC = index_tteC(from_sNo, from_tteNo);
909 
910    /* Get VEX to do the patching itself.  We have to hand it off
911       since it is host-dependent. */
912    VexInvalRange vir
913       = LibVEX_Chain(
914            arch_host, endness_host,
915            from__patch_addr,
916            VG_(fnptr_to_fnentry)(
917               to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
918                         : &VG_(disp_cp_chain_me_to_slowEP)),
919            (void*)host_code
920         );
921    VG_(invalidate_icache)( (void*)vir.start, vir.len );
922 
923    /* Now do the tricky bit -- update the ch_succs and ch_preds info
924       for the two translations involved, so we can undo the chaining
925       later, which we will have to do if the to_ block gets removed
926       for whatever reason. */
927 
928    /* This is the new from_ -> to_ link to add. */
929    InEdge ie;
930    InEdge__init(&ie);
931    ie.from_sNo   = from_sNo;
932    ie.from_tteNo = from_tteNo;
933    ie.to_fastEP  = to_fastEP;
934    HWord from_offs = (HWord)( (UChar*)from__patch_addr
935                               - (UChar*)from_tteC->tcptr );
936    vg_assert(from_offs < 100000/* let's say */);
937    ie.from_offs  = (UInt)from_offs;
938 
939    /* This is the new to_ -> from_ backlink to add. */
940    OutEdge oe;
941    OutEdge__init(&oe);
942    oe.to_sNo    = to_sNo;
943    oe.to_tteNo  = to_tteNo;
944    oe.from_offs = (UInt)from_offs;
945 
946    /* Add .. */
947    InEdgeArr__add(&to_tteC->in_edges, &ie);
948    OutEdgeArr__add(&from_tteC->out_edges, &oe);
949 }
950 
951 
952 /* Unchain one patch, as described by the specified InEdge.  For
953    sanity check purposes only (to check that the patched location is
954    as expected) it also requires the fast and slow entry point
955    addresses of the destination block (that is, the block that owns
956    this InEdge). */
957 __attribute__((noinline))
unchain_one(VexArch arch_host,VexEndness endness_host,InEdge * ie,void * to_fastEPaddr,void * to_slowEPaddr)958 static void unchain_one ( VexArch arch_host, VexEndness endness_host,
959                           InEdge* ie,
960                           void* to_fastEPaddr, void* to_slowEPaddr )
961 {
962    vg_assert(ie);
963    TTEntryC* tteC
964       = index_tteC(ie->from_sNo, ie->from_tteNo);
965    UChar* place_to_patch
966       = ((UChar*)tteC->tcptr) + ie->from_offs;
967    UChar* disp_cp_chain_me
968       = VG_(fnptr_to_fnentry)(
969            ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
970                          : &VG_(disp_cp_chain_me_to_slowEP)
971         );
972    UChar* place_to_jump_to_EXPECTED
973       = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
974 
975    // stay sane: both src and dst for this unchaining are
976    // in the main code cache
977    vg_assert( is_in_the_main_TC(place_to_patch) ); // src
978    vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
979    // dst check is ok because LibVEX_UnChain checks that
980    // place_to_jump_to_EXPECTED really is the current dst, and
981    // asserts if it isn't.
982    VexInvalRange vir
983        = LibVEX_UnChain( arch_host, endness_host, place_to_patch,
984                          place_to_jump_to_EXPECTED, disp_cp_chain_me );
985    VG_(invalidate_icache)( (void*)vir.start, vir.len );
986 }
987 
988 
989 /* The specified block is about to be deleted.  Update the preds and
990    succs of its associated blocks accordingly.  This includes undoing
991    any chained jumps to this block. */
992 static
unchain_in_preparation_for_deletion(VexArch arch_host,VexEndness endness_host,SECno here_sNo,TTEno here_tteNo)993 void unchain_in_preparation_for_deletion ( VexArch arch_host,
994                                            VexEndness endness_host,
995                                            SECno here_sNo, TTEno here_tteNo )
996 {
997    if (DEBUG_TRANSTAB)
998       VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo);
999    UWord     i, j, n, m;
1000    Int       evCheckSzB = LibVEX_evCheckSzB(arch_host);
1001    TTEntryC* here_tteC  = index_tteC(here_sNo, here_tteNo);
1002    TTEntryH* here_tteH  = index_tteH(here_sNo, here_tteNo);
1003    if (DEBUG_TRANSTAB)
1004       VG_(printf)("... QQQ tt.entry 0x%lu tt.tcptr 0x%p\n",
1005                   here_tteC->entry, here_tteC->tcptr);
1006    vg_assert(here_tteH->status == InUse);
1007 
1008    /* Visit all InEdges owned by here_tte. */
1009    n = InEdgeArr__size(&here_tteC->in_edges);
1010    for (i = 0; i < n; i++) {
1011       InEdge* ie = InEdgeArr__index(&here_tteC->in_edges, i);
1012       // Undo the chaining.
1013       UChar* here_slow_EP = (UChar*)here_tteC->tcptr;
1014       UChar* here_fast_EP = here_slow_EP + evCheckSzB;
1015       unchain_one(arch_host, endness_host, ie, here_fast_EP, here_slow_EP);
1016       // Find the corresponding entry in the "from" node's out_edges,
1017       // and remove it.
1018       TTEntryC* from_tteC = index_tteC(ie->from_sNo, ie->from_tteNo);
1019       m = OutEdgeArr__size(&from_tteC->out_edges);
1020       vg_assert(m > 0); // it must have at least one entry
1021       for (j = 0; j < m; j++) {
1022          OutEdge* oe = OutEdgeArr__index(&from_tteC->out_edges, j);
1023          if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
1024              && oe->from_offs == ie->from_offs)
1025            break;
1026       }
1027       vg_assert(j < m); // "oe must be findable"
1028       OutEdgeArr__deleteIndex(&from_tteC->out_edges, j);
1029    }
1030 
1031    /* Visit all OutEdges owned by here_tte. */
1032    n = OutEdgeArr__size(&here_tteC->out_edges);
1033    for (i = 0; i < n; i++) {
1034       OutEdge* oe = OutEdgeArr__index(&here_tteC->out_edges, i);
1035       // Find the corresponding entry in the "to" node's in_edges,
1036       // and remove it.
1037       TTEntryC* to_tteC = index_tteC(oe->to_sNo, oe->to_tteNo);
1038       m = InEdgeArr__size(&to_tteC->in_edges);
1039       vg_assert(m > 0); // it must have at least one entry
1040       for (j = 0; j < m; j++) {
1041          InEdge* ie = InEdgeArr__index(&to_tteC->in_edges, j);
1042          if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
1043              && ie->from_offs == oe->from_offs)
1044            break;
1045       }
1046       vg_assert(j < m); // "ie must be findable"
1047       InEdgeArr__deleteIndex(&to_tteC->in_edges, j);
1048    }
1049 
1050    InEdgeArr__makeEmpty(&here_tteC->in_edges);
1051    OutEdgeArr__makeEmpty(&here_tteC->out_edges);
1052 }
1053 
1054 
1055 /*-------------------------------------------------------------*/
1056 /*--- Address-range equivalence class stuff                 ---*/
1057 /*-------------------------------------------------------------*/
1058 
1059 /* Return equivalence class number for a range. */
1060 
range_to_eclass(Addr start,UInt len)1061 static EClassNo range_to_eclass ( Addr start, UInt len )
1062 {
1063    UInt mask   = (1 << ECLASS_WIDTH) - 1;
1064    UInt lo     = (UInt)start;
1065    UInt hi     = lo + len - 1;
1066    UInt loBits = (lo >> ECLASS_SHIFT) & mask;
1067    UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
1068    if (loBits == hiBits) {
1069       vg_assert(loBits < ECLASS_N-1);
1070       return loBits;
1071    } else {
1072       return ECLASS_MISC;
1073    }
1074 }
1075 
1076 
1077 /* Calculates the equivalence class numbers for any VexGuestExtent.
1078    These are written in *eclasses, which must be big enough to hold 3
1079    Ints.  The number written, between 1 and 3, is returned.  The
1080    eclasses are presented in order, and any duplicates are removed.
1081 */
1082 
1083 static
vexGuestExtents_to_eclasses(EClassNo * eclasses,const TTEntryH * tteH)1084 Int vexGuestExtents_to_eclasses ( /*OUT*/EClassNo* eclasses,
1085                                   const TTEntryH* tteH )
1086 {
1087 
1088 #  define SWAP(_lv1,_lv2) \
1089       do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
1090 
1091    UInt i, j, n_ec;
1092    EClassNo r;
1093 
1094    vg_assert(tteH->vge_n_used >= 1 && tteH->vge_n_used <= 3);
1095 
1096    n_ec = 0;
1097    for (i = 0; i < tteH->vge_n_used; i++) {
1098       r = range_to_eclass( tteH->vge_base[i], tteH->vge_len[i] );
1099       if (r == ECLASS_MISC)
1100          goto bad;
1101       /* only add if we haven't already seen it */
1102       for (j = 0; j < n_ec; j++)
1103          if (eclasses[j] == r)
1104             break;
1105       if (j == n_ec)
1106          eclasses[n_ec++] = r;
1107    }
1108 
1109    if (n_ec == 1)
1110       return 1;
1111 
1112    if (n_ec == 2) {
1113       /* sort */
1114       if (eclasses[0] > eclasses[1])
1115          SWAP(eclasses[0], eclasses[1]);
1116       return 2;
1117    }
1118 
1119    if (n_ec == 3) {
1120       /* sort */
1121       if (eclasses[0] > eclasses[2])
1122          SWAP(eclasses[0], eclasses[2]);
1123       if (eclasses[0] > eclasses[1])
1124          SWAP(eclasses[0], eclasses[1]);
1125       if (eclasses[1] > eclasses[2])
1126          SWAP(eclasses[1], eclasses[2]);
1127       return 3;
1128    }
1129 
1130    /* NOTREACHED */
1131    vg_assert(0);
1132 
1133   bad:
1134    eclasses[0] = ECLASS_MISC;
1135    return 1;
1136 
1137 #  undef SWAP
1138 }
1139 
1140 
1141 /* Add tteno to the set of entries listed for equivalence class ec in
1142    this sector.  Returns used location in eclass array. */
1143 
1144 static
addEClassNo(Sector * sec,EClassNo ec,TTEno tteno)1145 UInt addEClassNo ( /*MOD*/Sector* sec, EClassNo ec, TTEno tteno )
1146 {
1147    Int    old_sz, new_sz, i, r;
1148    TTEno  *old_ar, *new_ar;
1149 
1150    vg_assert(ec >= 0 && ec < ECLASS_N);
1151    vg_assert(tteno < N_TTES_PER_SECTOR);
1152 
1153    if (DEBUG_TRANSTAB) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
1154 
1155    if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
1156 
1157       vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
1158 
1159       old_sz = sec->ec2tte_size[ec];
1160       old_ar = sec->ec2tte[ec];
1161       new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
1162       new_ar = ttaux_malloc("transtab.aECN.1",
1163                             new_sz * sizeof(TTEno));
1164       for (i = 0; i < old_sz; i++)
1165          new_ar[i] = old_ar[i];
1166       if (old_ar)
1167          ttaux_free(old_ar);
1168       sec->ec2tte_size[ec] = new_sz;
1169       sec->ec2tte[ec] = new_ar;
1170 
1171       if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
1172    }
1173 
1174    /* Common case */
1175    r = sec->ec2tte_used[ec]++;
1176    vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
1177    sec->ec2tte[ec][r] = tteno;
1178    return (UInt)r;
1179 }
1180 
1181 
1182 /* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
1183    eclass entries to 'sec'. */
1184 
1185 static
upd_eclasses_after_add(Sector * sec,TTEno tteno)1186 void upd_eclasses_after_add ( /*MOD*/Sector* sec, TTEno tteno )
1187 {
1188    Int i, r;
1189    EClassNo eclasses[3];
1190    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1191 
1192    TTEntryH* tteH = &sec->ttH[tteno];
1193    r = vexGuestExtents_to_eclasses( eclasses, tteH );
1194    vg_assert(r >= 1 && r <= 3);
1195 
1196    TTEntryC* tteC = &sec->ttC[tteno];
1197    tteC->n_tte2ec = r;
1198    for (i = 0; i < r; i++) {
1199       tteC->tte2ec_ec[i] = eclasses[i];
1200       tteC->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], tteno );
1201    }
1202 }
1203 
1204 
1205 /* Check the eclass info in 'sec' to ensure it is consistent.  Returns
1206    True if OK, False if something's not right.  Expensive. */
1207 
sanity_check_eclasses_in_sector(const Sector * sec)1208 static Bool sanity_check_eclasses_in_sector ( const Sector* sec )
1209 {
1210 #  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
1211 
1212    const HChar* whassup = NULL;
1213    Int      j, k, n, ec_idx;
1214    EClassNo i;
1215    EClassNo ec_num;
1216    TTEno    tteno;
1217    ULong*   tce;
1218 
1219    /* Basic checks on this sector */
1220    if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR)
1221       BAD("invalid sec->tt_n_inuse");
1222    tce = sec->tc_next;
1223    if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
1224       BAD("sec->tc_next points outside tc");
1225 
1226    /* For each eclass ... */
1227    for (i = 0; i < ECLASS_N; i++) {
1228       if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
1229          BAD("ec2tte_size/ec2tte mismatch(1)");
1230       if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
1231          BAD("ec2tte_size/ec2tte mismatch(2)");
1232       if (sec->ec2tte_used[i] < 0
1233           || sec->ec2tte_used[i] > sec->ec2tte_size[i])
1234          BAD("implausible ec2tte_used");
1235       if (sec->ec2tte_used[i] == 0)
1236          continue;
1237 
1238       /* For each tt reference in each eclass .. ensure the reference
1239          is to a valid tt entry, and that the entry's address ranges
1240          really include this eclass. */
1241 
1242       for (j = 0; j < sec->ec2tte_used[i]; j++) {
1243          tteno = sec->ec2tte[i][j];
1244          if (tteno == EC2TTE_DELETED)
1245             continue;
1246          if (tteno >= N_TTES_PER_SECTOR)
1247             BAD("implausible tteno");
1248          TTEntryC* tteC = &sec->ttC[tteno];
1249          TTEntryH* tteH = &sec->ttH[tteno];
1250          if (tteH->status != InUse)
1251             BAD("tteno points to non-inuse tte");
1252          if (tteC->n_tte2ec < 1 || tteC->n_tte2ec > 3)
1253             BAD("tteC->n_tte2ec out of range");
1254          /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
1255             must equal i.  Inspect tte's eclass info. */
1256          n = 0;
1257          for (k = 0; k < tteC->n_tte2ec; k++) {
1258             if (k < tteC->n_tte2ec-1
1259                 && tteC->tte2ec_ec[k] >= tteC->tte2ec_ec[k+1])
1260                BAD("tteC->tte2ec_ec[..] out of order");
1261             ec_num = tteC->tte2ec_ec[k];
1262             if (ec_num < 0 || ec_num >= ECLASS_N)
1263                BAD("tteC->tte2ec_ec[..] out of range");
1264             if (ec_num != i)
1265                continue;
1266             ec_idx = tteC->tte2ec_ix[k];
1267             if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
1268                BAD("tteC->tte2ec_ix[..] out of range");
1269             if (ec_idx == j)
1270                n++;
1271          }
1272          if (n != 1)
1273             BAD("tteno does not point back at eclass");
1274       }
1275    }
1276 
1277    /* That establishes that for each forward pointer from TTEntrys
1278       there is a corresponding backward pointer from the eclass[]
1279       arrays.  However, it doesn't rule out the possibility of other,
1280       bogus pointers in the eclass[] arrays.  So do those similarly:
1281       scan through them and check the TTEntryies they point at point
1282       back. */
1283 
1284    for (tteno = 0; tteno < N_TTES_PER_SECTOR; tteno++) {
1285 
1286       TTEntryC* tteC = &sec->ttC[tteno];
1287       TTEntryH* tteH = &sec->ttH[tteno];
1288       if (tteH->status == Empty || tteH->status == Deleted) {
1289          if (tteC->n_tte2ec != 0)
1290             BAD("tteC->n_tte2ec nonzero for unused tte");
1291          continue;
1292       }
1293 
1294       vg_assert(tteH->status == InUse);
1295 
1296       if (tteC->n_tte2ec < 1 || tteC->n_tte2ec > 3)
1297          BAD("tteC->n_tte2ec out of range(2)");
1298 
1299       for (j = 0; j < tteC->n_tte2ec; j++) {
1300          ec_num = tteC->tte2ec_ec[j];
1301          if (ec_num < 0 || ec_num >= ECLASS_N)
1302             BAD("tteC->tte2ec_ec[..] out of range");
1303          ec_idx = tteC->tte2ec_ix[j];
1304          if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
1305             BAD("tteC->tte2ec_ix[..] out of range(2)");
1306          if (sec->ec2tte[ec_num][ec_idx] != tteno)
1307             BAD("ec2tte does not point back to tte");
1308       }
1309    }
1310 
1311    return True;
1312 
1313   bad:
1314    if (whassup)
1315       VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
1316 
1317 #  if 0
1318    VG_(printf)("eclass = %d\n", i);
1319    VG_(printf)("tteno = %d\n", (Int)tteno);
1320    switch (tte->status) {
1321       case InUse:   VG_(printf)("InUse\n"); break;
1322       case Deleted: VG_(printf)("Deleted\n"); break;
1323       case Empty:   VG_(printf)("Empty\n"); break;
1324    }
1325    if (tte->status != Empty) {
1326       for (k = 0; k < tte->vge.n_used; k++)
1327          VG_(printf)("0x%lx %u\n", tte->vge.base[k], (UInt)tte->vge.len[k]);
1328    }
1329 #  endif
1330 
1331    return False;
1332 
1333 #  undef BAD
1334 }
1335 
1336 
1337 /* Sanity check absolutely everything.  True == check passed. */
1338 
1339 /* forwards */
1340 static Bool sanity_check_redir_tt_tc ( void );
1341 
sanity_check_sector_search_order(void)1342 static Bool sanity_check_sector_search_order ( void )
1343 {
1344    SECno i, j, nListed;
1345    /* assert the array is the right size */
1346    vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order)
1347                                / sizeof(sector_search_order[0])));
1348    /* Check it's of the form  valid_sector_numbers ++ [INV_SNO, INV_SNO, ..] */
1349    for (i = 0; i < n_sectors; i++) {
1350       if (sector_search_order[i] == INV_SNO
1351           || sector_search_order[i] >= n_sectors)
1352          break;
1353    }
1354    nListed = i;
1355    for (/* */; i < n_sectors; i++) {
1356       if (sector_search_order[i] != INV_SNO)
1357          break;
1358    }
1359    if (i != n_sectors)
1360       return False;
1361    /* Check each sector number only appears once */
1362    for (i = 0; i < n_sectors; i++) {
1363       if (sector_search_order[i] == INV_SNO)
1364          continue;
1365       for (j = i+1; j < n_sectors; j++) {
1366          if (sector_search_order[j] == sector_search_order[i])
1367             return False;
1368       }
1369    }
1370    /* Check that the number of listed sectors equals the number
1371       in use, by counting nListed back down. */
1372    for (i = 0; i < n_sectors; i++) {
1373       if (sectors[i].tc != NULL)
1374          nListed--;
1375    }
1376    if (nListed != 0)
1377       return False;
1378    return True;
1379 }
1380 
sanity_check_all_sectors(void)1381 static Bool sanity_check_all_sectors ( void )
1382 {
1383    SECno   sno;
1384    Bool    sane;
1385    Sector* sec;
1386    for (sno = 0; sno < n_sectors; sno++) {
1387       Int i;
1388       Int nr_not_dead_hx = 0;
1389       Int szhxa;
1390       sec = &sectors[sno];
1391       if (sec->tc == NULL)
1392          continue;
1393       sane = sanity_check_eclasses_in_sector( sec );
1394       if (!sane)
1395          return False;
1396       szhxa = VG_(sizeXA)(sec->host_extents);
1397       for (i = 0; i < szhxa; i++) {
1398          const HostExtent* hx = VG_(indexXA)(sec->host_extents, i);
1399          if (!HostExtent__is_dead (hx, sec))
1400             nr_not_dead_hx++;
1401       }
1402       if (nr_not_dead_hx != sec->tt_n_inuse) {
1403          VG_(debugLog)(0, "transtab",
1404                        "nr_not_dead_hx %d sanity fail "
1405                        "(expected == in use %d)\n",
1406                        nr_not_dead_hx, sec->tt_n_inuse);
1407          return False;
1408       }
1409    }
1410 
1411    if ( !sanity_check_redir_tt_tc() )
1412       return False;
1413    if ( !sanity_check_sector_search_order() )
1414       return False;
1415    return True;
1416 }
1417 
1418 
1419 
1420 /*-------------------------------------------------------------*/
1421 /*--- Add/find translations                                 ---*/
1422 /*-------------------------------------------------------------*/
1423 
vge_osize(const VexGuestExtents * vge)1424 static UInt vge_osize ( const VexGuestExtents* vge )
1425 {
1426    UInt i, n = 0;
1427    for (i = 0; i < vge->n_used; i++)
1428       n += (UInt)vge->len[i];
1429    return n;
1430 }
1431 
TTEntryH__osize(const TTEntryH * tteH)1432 static UInt TTEntryH__osize ( const TTEntryH* tteH )
1433 {
1434    UInt i, n = 0;
1435    for (i = 0; i < tteH->vge_n_used; i++)
1436       n += (UInt)tteH->vge_len[i];
1437    return n;
1438 }
1439 
isValidSector(SECno sector)1440 static Bool isValidSector ( SECno sector )
1441 {
1442    if (sector == INV_SNO || sector >= n_sectors)
1443       return False;
1444    return True;
1445 }
1446 
HASH_TT(Addr key)1447 static inline HTTno HASH_TT ( Addr key )
1448 {
1449    UInt kHi = sizeof(Addr) == 4 ? 0 : (key >> 32);
1450    UInt kLo = (UInt)key;
1451    UInt k32 = kHi ^ kLo;
1452    UInt ror = 7;
1453    if (ror > 0)
1454       k32 = (k32 >> ror) | (k32 << (32-ror));
1455    return (HTTno)(k32 % N_HTTES_PER_SECTOR);
1456 }
1457 
setFastCacheEntry(Addr key,ULong * tcptr)1458 static void setFastCacheEntry ( Addr key, ULong* tcptr )
1459 {
1460    UInt cno = (UInt)VG_TT_FAST_HASH(key);
1461    VG_(tt_fast)[cno].guest = key;
1462    VG_(tt_fast)[cno].host  = (Addr)tcptr;
1463    n_fast_updates++;
1464    /* This shouldn't fail.  It should be assured by m_translate
1465       which should reject any attempt to make translation of code
1466       starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1467    vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
1468 }
1469 
1470 /* Invalidate the fast cache VG_(tt_fast). */
invalidateFastCache(void)1471 static void invalidateFastCache ( void )
1472 {
1473    UInt j;
1474    /* This loop is popular enough to make it worth unrolling a
1475       bit, at least on ppc32. */
1476    vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
1477    for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
1478       VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1479       VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1480       VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1481       VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1482    }
1483 
1484    vg_assert(j == VG_TT_FAST_SIZE);
1485    n_fast_flushes++;
1486 }
1487 
1488 
get_empty_tt_slot(SECno sNo)1489 static TTEno get_empty_tt_slot(SECno sNo)
1490 {
1491    TTEno i;
1492 
1493    i = sectors[sNo].empty_tt_list;
1494    sectors[sNo].empty_tt_list = sectors[sNo].ttC[i].usage.next_empty_tte;
1495 
1496    vg_assert (i >= 0 && i < N_TTES_PER_SECTOR);
1497 
1498    return i;
1499 }
1500 
add_to_empty_tt_list(SECno sNo,TTEno tteno)1501 static void add_to_empty_tt_list (SECno sNo, TTEno tteno)
1502 {
1503    sectors[sNo].ttC[tteno].usage.next_empty_tte = sectors[sNo].empty_tt_list;
1504    sectors[sNo].empty_tt_list = tteno;
1505 }
1506 
initialiseSector(SECno sno)1507 static void initialiseSector ( SECno sno )
1508 {
1509    UInt i;
1510    SysRes  sres;
1511    Sector* sec;
1512    vg_assert(isValidSector(sno));
1513 
1514    { Bool sane = sanity_check_sector_search_order();
1515      vg_assert(sane);
1516    }
1517    sec = &sectors[sno];
1518 
1519    if (sec->tc == NULL) {
1520 
1521       /* Sector has never been used before.  Need to allocate tt and
1522          tc. */
1523       vg_assert(sec->ttC == NULL);
1524       vg_assert(sec->ttH == NULL);
1525       vg_assert(sec->tc_next == NULL);
1526       vg_assert(sec->tt_n_inuse == 0);
1527       for (EClassNo e = 0; e < ECLASS_N; e++) {
1528          vg_assert(sec->ec2tte_size[e] == 0);
1529          vg_assert(sec->ec2tte_used[e] == 0);
1530          vg_assert(sec->ec2tte[e] == NULL);
1531       }
1532       vg_assert(sec->host_extents == NULL);
1533 
1534       if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1535          VG_(dmsg)("transtab: " "allocate sector %d\n", sno);
1536 
1537       sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
1538       if (sr_isError(sres)) {
1539          VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
1540                                      8 * tc_sector_szQ );
1541 	 /*NOTREACHED*/
1542       }
1543       sec->tc = (ULong*)(Addr)sr_Res(sres);
1544 
1545       sres = VG_(am_mmap_anon_float_valgrind)
1546                 ( N_TTES_PER_SECTOR * sizeof(TTEntryC) );
1547       if (sr_isError(sres)) {
1548          VG_(out_of_memory_NORETURN)("initialiseSector(TTC)",
1549                                      N_TTES_PER_SECTOR * sizeof(TTEntryC) );
1550 	 /*NOTREACHED*/
1551       }
1552       sec->ttC = (TTEntryC*)(Addr)sr_Res(sres);
1553 
1554       sres = VG_(am_mmap_anon_float_valgrind)
1555                 ( N_TTES_PER_SECTOR * sizeof(TTEntryH) );
1556       if (sr_isError(sres)) {
1557          VG_(out_of_memory_NORETURN)("initialiseSector(TTH)",
1558                                      N_TTES_PER_SECTOR * sizeof(TTEntryH) );
1559 	 /*NOTREACHED*/
1560       }
1561       sec->ttH = (TTEntryH*)(Addr)sr_Res(sres);
1562 
1563       sec->empty_tt_list = HTT_EMPTY;
1564       for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
1565          sec->ttH[ei].status   = Empty;
1566          sec->ttC[ei].n_tte2ec = 0;
1567          add_to_empty_tt_list(sno, ei);
1568       }
1569 
1570       sres = VG_(am_mmap_anon_float_valgrind)
1571                 ( N_HTTES_PER_SECTOR * sizeof(TTEno) );
1572       if (sr_isError(sres)) {
1573          VG_(out_of_memory_NORETURN)("initialiseSector(HTT)",
1574                                      N_HTTES_PER_SECTOR * sizeof(TTEno) );
1575 	 /*NOTREACHED*/
1576       }
1577       sec->htt = (TTEno*)(Addr)sr_Res(sres);
1578       for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
1579          sec->htt[hi] = HTT_EMPTY;
1580 
1581       /* Set up the host_extents array. */
1582       sec->host_extents
1583          = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
1584                       ttaux_free,
1585                       sizeof(HostExtent));
1586 
1587       /* Add an entry in the sector_search_order */
1588       for (i = 0; i < n_sectors; i++) {
1589          if (sector_search_order[i] == INV_SNO)
1590             break;
1591       }
1592       vg_assert(i >= 0 && i < n_sectors);
1593       sector_search_order[i] = sno;
1594 
1595       if (VG_(clo_verbosity) > 2)
1596          VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
1597 
1598    } else {
1599 
1600       /* Sector has been used before.  Dump the old contents. */
1601       if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1602          VG_(dmsg)("transtab: " "recycle  sector %d\n", sno);
1603       n_sectors_recycled++;
1604 
1605       vg_assert(sec->ttC != NULL);
1606       vg_assert(sec->ttH != NULL);
1607       vg_assert(sec->tc_next != NULL);
1608       n_dump_count += sec->tt_n_inuse;
1609 
1610       VexArch     arch_host = VexArch_INVALID;
1611       VexArchInfo archinfo_host;
1612       VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1613       VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1614       VexEndness endness_host = archinfo_host.endness;
1615 
1616       /* Visit each just-about-to-be-abandoned translation. */
1617       if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n",
1618                                       sno);
1619       sec->empty_tt_list = HTT_EMPTY;
1620       for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
1621          if (sec->ttH[ei].status == InUse) {
1622             vg_assert(sec->ttC[ei].n_tte2ec >= 1);
1623             vg_assert(sec->ttC[ei].n_tte2ec <= 3);
1624             n_dump_osize += TTEntryH__osize(&sec->ttH[ei]);
1625             /* Tell the tool too. */
1626             if (VG_(needs).superblock_discards) {
1627                VexGuestExtents vge_tmp;
1628                TTEntryH__to_VexGuestExtents( &vge_tmp, &sec->ttH[ei] );
1629                VG_TDICT_CALL( tool_discard_superblock_info,
1630                               sec->ttC[ei].entry, vge_tmp );
1631             }
1632             unchain_in_preparation_for_deletion(arch_host,
1633                                                 endness_host, sno, ei);
1634          } else {
1635             vg_assert(sec->ttC[ei].n_tte2ec == 0);
1636          }
1637          sec->ttH[ei].status   = Empty;
1638          sec->ttC[ei].n_tte2ec = 0;
1639          add_to_empty_tt_list(sno, ei);
1640       }
1641       for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
1642          sec->htt[hi] = HTT_EMPTY;
1643 
1644       if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n",
1645                                       sno);
1646 
1647       /* Free up the eclass structures. */
1648       for (EClassNo e = 0; e < ECLASS_N; e++) {
1649          if (sec->ec2tte_size[e] == 0) {
1650             vg_assert(sec->ec2tte_used[e] == 0);
1651             vg_assert(sec->ec2tte[e] == NULL);
1652          } else {
1653             vg_assert(sec->ec2tte[e] != NULL);
1654             ttaux_free(sec->ec2tte[e]);
1655             sec->ec2tte[e] = NULL;
1656             sec->ec2tte_size[e] = 0;
1657             sec->ec2tte_used[e] = 0;
1658          }
1659       }
1660 
1661       /* Empty out the host extents array. */
1662       vg_assert(sec->host_extents != NULL);
1663       VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
1664       vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
1665 
1666       /* Sanity check: ensure it is already in
1667          sector_search_order[]. */
1668       SECno ix;
1669       for (ix = 0; ix < n_sectors; ix++) {
1670          if (sector_search_order[ix] == sno)
1671             break;
1672       }
1673       vg_assert(ix >= 0 && ix < n_sectors);
1674 
1675       if (VG_(clo_verbosity) > 2)
1676          VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
1677    }
1678 
1679    sec->tc_next = sec->tc;
1680    sec->tt_n_inuse = 0;
1681 
1682    invalidateFastCache();
1683 
1684    { Bool sane = sanity_check_sector_search_order();
1685      vg_assert(sane);
1686    }
1687 }
1688 
1689 /* Add a translation of vge to TT/TC.  The translation is temporarily
1690    in code[0 .. code_len-1].
1691 
1692    pre: youngest_sector points to a valid (although possibly full)
1693    sector.
1694 */
VG_(add_to_transtab)1695 void VG_(add_to_transtab)( const VexGuestExtents* vge,
1696                            Addr             entry,
1697                            Addr             code,
1698                            UInt             code_len,
1699                            Bool             is_self_checking,
1700                            Int              offs_profInc,
1701                            UInt             n_guest_instrs )
1702 {
1703    Int    tcAvailQ, reqdQ, y;
1704    ULong  *tcptr, *tcptr2;
1705    UChar* srcP;
1706    UChar* dstP;
1707 
1708    vg_assert(init_done);
1709    vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1710 
1711    /* 60000: should agree with N_TMPBUF in m_translate.c. */
1712    vg_assert(code_len > 0 && code_len < 60000);
1713 
1714    /* Generally stay sane */
1715    vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
1716 
1717    if (DEBUG_TRANSTAB)
1718       VG_(printf)("add_to_transtab(entry = 0x%lx, len = %u) ...\n",
1719                   entry, code_len);
1720 
1721    n_in_count++;
1722    n_in_tsize += code_len;
1723    n_in_osize += vge_osize(vge);
1724    if (is_self_checking)
1725       n_in_sc_count++;
1726 
1727    y = youngest_sector;
1728    vg_assert(isValidSector(y));
1729 
1730    if (sectors[y].tc == NULL)
1731       initialiseSector(y);
1732 
1733    /* Try putting the translation in this sector. */
1734    reqdQ = (code_len + 7) >> 3;
1735 
1736    /* Will it fit in tc? */
1737    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1738               - ((ULong*)(sectors[y].tc_next));
1739    vg_assert(tcAvailQ >= 0);
1740    vg_assert(tcAvailQ <= tc_sector_szQ);
1741 
1742    if (tcAvailQ < reqdQ
1743        || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR) {
1744       /* No.  So move on to the next sector.  Either it's never been
1745          used before, in which case it will get its tt/tc allocated
1746          now, or it has been used before, in which case it is set to be
1747          empty, hence throwing out the oldest sector. */
1748       vg_assert(tc_sector_szQ > 0);
1749       Int tt_loading_pct = (100 * sectors[y].tt_n_inuse)
1750                            / N_HTTES_PER_SECTOR;
1751       Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ))
1752                            / tc_sector_szQ;
1753       if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) {
1754          VG_(dmsg)("transtab: "
1755                    "declare  sector %d full "
1756                    "(TT loading %2d%%, TC loading %2d%%, avg tce size %d)\n",
1757                    y, tt_loading_pct, tc_loading_pct,
1758                    8 * (tc_sector_szQ - tcAvailQ)/sectors[y].tt_n_inuse);
1759       }
1760       youngest_sector++;
1761       if (youngest_sector >= n_sectors)
1762          youngest_sector = 0;
1763       y = youngest_sector;
1764       initialiseSector(y);
1765    }
1766 
1767    /* Be sure ... */
1768    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1769               - ((ULong*)(sectors[y].tc_next));
1770    vg_assert(tcAvailQ >= 0);
1771    vg_assert(tcAvailQ <= tc_sector_szQ);
1772    vg_assert(tcAvailQ >= reqdQ);
1773    vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR);
1774    vg_assert(sectors[y].tt_n_inuse >= 0);
1775 
1776    /* Copy into tc. */
1777    tcptr = sectors[y].tc_next;
1778    vg_assert(tcptr >= &sectors[y].tc[0]);
1779    vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
1780 
1781    dstP = (UChar*)tcptr;
1782    srcP = (UChar*)code;
1783    VG_(memcpy)(dstP, srcP, code_len);
1784    sectors[y].tc_next += reqdQ;
1785    sectors[y].tt_n_inuse++;
1786 
1787    /* more paranoia */
1788    tcptr2 = sectors[y].tc_next;
1789    vg_assert(tcptr2 >= &sectors[y].tc[0]);
1790    vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
1791 
1792    /* Find an empty tt slot, and use it.  There must be such a slot
1793       since tt is never allowed to get completely full. */
1794    TTEno tteix = get_empty_tt_slot(y);
1795    TTEntryC__init(&sectors[y].ttC[tteix]);
1796    TTEntryH__init(&sectors[y].ttH[tteix]);
1797    sectors[y].ttC[tteix].tcptr  = tcptr;
1798    sectors[y].ttC[tteix].usage.prof.count  = 0;
1799    sectors[y].ttC[tteix].usage.prof.weight =
1800       n_guest_instrs == 0 ? 1 : n_guest_instrs;
1801    sectors[y].ttC[tteix].entry  = entry;
1802    TTEntryH__from_VexGuestExtents( &sectors[y].ttH[tteix], vge );
1803    sectors[y].ttH[tteix].status = InUse;
1804 
1805    // Point an htt entry to the tt slot
1806    HTTno htti = HASH_TT(entry);
1807    vg_assert(htti >= 0 && htti < N_HTTES_PER_SECTOR);
1808    while (True) {
1809       if (sectors[y].htt[htti] == HTT_EMPTY
1810           || sectors[y].htt[htti] == HTT_DELETED)
1811          break;
1812       htti++;
1813       if (htti >= N_HTTES_PER_SECTOR)
1814          htti = 0;
1815    }
1816    sectors[y].htt[htti] = tteix;
1817 
1818    /* Patch in the profile counter location, if necessary. */
1819    if (offs_profInc != -1) {
1820       vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
1821       VexArch     arch_host = VexArch_INVALID;
1822       VexArchInfo archinfo_host;
1823       VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1824       VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1825       VexEndness endness_host = archinfo_host.endness;
1826       VexInvalRange vir
1827          = LibVEX_PatchProfInc( arch_host, endness_host,
1828                                 dstP + offs_profInc,
1829                                 &sectors[y].ttC[tteix].usage.prof.count );
1830       VG_(invalidate_icache)( (void*)vir.start, vir.len );
1831    }
1832 
1833    VG_(invalidate_icache)( dstP, code_len );
1834 
1835    /* Add this entry to the host_extents map, checking that we're
1836       adding in order. */
1837    { HostExtent hx;
1838      hx.start = (UChar*)tcptr;
1839      hx.len   = code_len;
1840      hx.tteNo = tteix;
1841      vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
1842      XArray* hx_array = sectors[y].host_extents;
1843      vg_assert(hx_array);
1844      Word n = VG_(sizeXA)(hx_array);
1845      if (n > 0) {
1846         HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
1847         vg_assert(hx_prev->start + hx_prev->len <= hx.start);
1848      }
1849      VG_(addToXA)(hx_array, &hx);
1850      if (DEBUG_TRANSTAB)
1851         VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n",
1852                     hx.start, hx.len, y, tteix);
1853    }
1854 
1855    /* Update the fast-cache. */
1856    setFastCacheEntry( entry, tcptr );
1857 
1858    /* Note the eclass numbers for this translation. */
1859    upd_eclasses_after_add( &sectors[y], tteix );
1860 }
1861 
1862 
1863 /* Search for the translation of the given guest address.  If
1864    requested, a successful search can also cause the fast-caches to be
1865    updated.
1866 */
VG_(search_transtab)1867 Bool VG_(search_transtab) ( /*OUT*/Addr*  res_hcode,
1868                             /*OUT*/SECno* res_sNo,
1869                             /*OUT*/TTEno* res_tteNo,
1870                             Addr          guest_addr,
1871                             Bool          upd_cache )
1872 {
1873    SECno i, sno;
1874    HTTno j, k, kstart;
1875    TTEno tti;
1876 
1877    vg_assert(init_done);
1878    /* Find the initial probe point just once.  It will be the same in
1879       all sectors and avoids multiple expensive % operations. */
1880    n_full_lookups++;
1881    kstart = HASH_TT(guest_addr);
1882    vg_assert(kstart >= 0 && kstart < N_HTTES_PER_SECTOR);
1883 
1884    /* Search in all the sectors,using sector_search_order[] as a
1885       heuristic guide as to what order to visit the sectors. */
1886    for (i = 0; i < n_sectors; i++) {
1887 
1888       sno = sector_search_order[i];
1889       if (UNLIKELY(sno == INV_SNO))
1890          return False; /* run out of sectors to search */
1891 
1892       k = kstart;
1893       for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
1894          n_lookup_probes++;
1895          tti = sectors[sno].htt[k];
1896          if (tti < N_TTES_PER_SECTOR
1897              && sectors[sno].ttC[tti].entry == guest_addr) {
1898             /* found it */
1899             if (upd_cache)
1900                setFastCacheEntry(
1901                   guest_addr, sectors[sno].ttC[tti].tcptr );
1902             if (res_hcode)
1903                *res_hcode = (Addr)sectors[sno].ttC[tti].tcptr;
1904             if (res_sNo)
1905                *res_sNo = sno;
1906             if (res_tteNo)
1907                *res_tteNo = tti;
1908             /* pull this one one step closer to the front.  For large
1909                apps this more or less halves the number of required
1910                probes. */
1911             if (i > 0) {
1912                Int tmp = sector_search_order[i-1];
1913                sector_search_order[i-1] = sector_search_order[i];
1914                sector_search_order[i] = tmp;
1915             }
1916             return True;
1917          }
1918          // tti is HTT_EMPTY or HTT_DELETED or not the entry of guest_addr
1919          if (sectors[sno].htt[k] == HTT_EMPTY)
1920             break; /* not found in this sector */
1921          k++;
1922          if (k == N_HTTES_PER_SECTOR)
1923             k = 0;
1924       }
1925 
1926       /* If we fall off the end, all entries are InUse and not
1927          matching, or Deleted.  In any case we did not find it in this
1928          sector. */
1929    }
1930 
1931    /* Not found in any sector. */
1932    return False;
1933 }
1934 
1935 
1936 /*-------------------------------------------------------------*/
1937 /*--- Delete translations.                                  ---*/
1938 /*-------------------------------------------------------------*/
1939 
1940 /* forward */
1941 static void unredir_discard_translations( Addr, ULong );
1942 
1943 /* Stuff for deleting translations which intersect with a given
1944    address range.  Unfortunately, to make this run at a reasonable
1945    speed, it is complex. */
1946 
1947 static inline
overlap1(Addr s1,ULong r1,Addr s2,ULong r2)1948 Bool overlap1 ( Addr s1, ULong r1, Addr s2, ULong r2 )
1949 {
1950    Addr e1 = s1 + r1 - 1;
1951    Addr e2 = s2 + r2 - 1;
1952    if (e1 < s2 || e2 < s1)
1953       return False;
1954    return True;
1955 }
1956 
1957 static inline
overlaps(Addr start,ULong range,const TTEntryH * tteH)1958 Bool overlaps ( Addr start, ULong range, const TTEntryH* tteH )
1959 {
1960    if (overlap1(start, range, tteH->vge_base[0], tteH->vge_len[0]))
1961       return True;
1962    if (tteH->vge_n_used < 2)
1963       return False;
1964    if (overlap1(start, range, tteH->vge_base[1], tteH->vge_len[1]))
1965       return True;
1966    if (tteH->vge_n_used < 3)
1967       return False;
1968    if (overlap1(start, range, tteH->vge_base[2], tteH->vge_len[2]))
1969       return True;
1970    return False;
1971 }
1972 
1973 
1974 /* Delete a tt entry, and update all the eclass data accordingly. */
1975 
delete_tte(Sector * sec,SECno secNo,TTEno tteno,VexArch arch_host,VexEndness endness_host)1976 static void delete_tte ( /*MOD*/Sector* sec, SECno secNo, TTEno tteno,
1977                          VexArch arch_host, VexEndness endness_host )
1978 {
1979    Int      i, ec_idx;
1980    EClassNo ec_num;
1981 
1982    /* sec and secNo are mutually redundant; cross-check. */
1983    vg_assert(sec == &sectors[secNo]);
1984 
1985    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1986    TTEntryC* tteC = &sec->ttC[tteno];
1987    TTEntryH* tteH = &sec->ttH[tteno];
1988    vg_assert(tteH->status == InUse);
1989    vg_assert(tteC->n_tte2ec >= 1 && tteC->n_tte2ec <= 3);
1990 
1991    /* Unchain .. */
1992    unchain_in_preparation_for_deletion(arch_host, endness_host, secNo, tteno);
1993 
1994    /* Deal with the ec-to-tte links first. */
1995    for (i = 0; i < tteC->n_tte2ec; i++) {
1996       ec_num = tteC->tte2ec_ec[i];
1997       ec_idx = tteC->tte2ec_ix[i];
1998       vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1999       vg_assert(ec_idx >= 0);
2000       vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
2001       /* Assert that the two links point at each other. */
2002       vg_assert(sec->ec2tte[ec_num][ec_idx] == tteno);
2003       /* "delete" the pointer back to here. */
2004       sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
2005    }
2006 
2007    /* Now fix up this TTEntry. */
2008    /* Mark the entry as deleted in htt.
2009       Note: we could avoid the below hash table search by
2010       adding a reference from tte to its hash position in tt. */
2011    HTTno j;
2012    HTTno k = HASH_TT(tteC->entry);
2013    vg_assert(k >= 0 && k < N_HTTES_PER_SECTOR);
2014    for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
2015       if (sec->htt[k] == tteno)
2016          break;
2017       k++;
2018       if (k == N_HTTES_PER_SECTOR)
2019          k = 0;
2020    }
2021    vg_assert(j < N_HTTES_PER_SECTOR);
2022    sec->htt[k]    = HTT_DELETED;
2023    tteH->status   = Deleted;
2024    tteC->n_tte2ec = 0;
2025    add_to_empty_tt_list(secNo, tteno);
2026 
2027    /* Stats .. */
2028    sec->tt_n_inuse--;
2029    n_disc_count++;
2030    n_disc_osize += TTEntryH__osize(tteH);
2031 
2032    /* Tell the tool too. */
2033    if (VG_(needs).superblock_discards) {
2034       VexGuestExtents vge_tmp;
2035       TTEntryH__to_VexGuestExtents( &vge_tmp, tteH );
2036       VG_TDICT_CALL( tool_discard_superblock_info, tteC->entry, vge_tmp );
2037    }
2038 }
2039 
2040 
2041 /* Delete translations from sec which intersect specified range, but
2042    only consider translations in the specified eclass. */
2043 
2044 static
delete_translations_in_sector_eclass(Sector * sec,SECno secNo,Addr guest_start,ULong range,EClassNo ec,VexArch arch_host,VexEndness endness_host)2045 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, SECno secNo,
2046                                             Addr guest_start, ULong range,
2047                                             EClassNo ec,
2048                                             VexArch arch_host,
2049                                             VexEndness endness_host )
2050 {
2051    Int      i;
2052    TTEno    tteno;
2053    Bool     anyDeld = False;
2054 
2055    vg_assert(ec >= 0 && ec < ECLASS_N);
2056 
2057    for (i = 0; i < sec->ec2tte_used[ec]; i++) {
2058 
2059       tteno = sec->ec2tte[ec][i];
2060       if (tteno == EC2TTE_DELETED) {
2061          /* already deleted */
2062          continue;
2063       }
2064 
2065       vg_assert(tteno < N_TTES_PER_SECTOR);
2066 
2067       TTEntryH* tteH = &sec->ttH[tteno];
2068       vg_assert(tteH->status == InUse);
2069 
2070       if (overlaps( guest_start, range, tteH )) {
2071          anyDeld = True;
2072          delete_tte( sec, secNo, tteno, arch_host, endness_host );
2073       }
2074 
2075    }
2076 
2077    return anyDeld;
2078 }
2079 
2080 
2081 /* Delete translations from sec which intersect specified range, the
2082    slow way, by inspecting all translations in sec. */
2083 
2084 static
delete_translations_in_sector(Sector * sec,SECno secNo,Addr guest_start,ULong range,VexArch arch_host,VexEndness endness_host)2085 Bool delete_translations_in_sector ( /*MOD*/Sector* sec, SECno secNo,
2086                                      Addr guest_start, ULong range,
2087                                      VexArch arch_host,
2088                                      VexEndness endness_host )
2089 {
2090    TTEno i;
2091    Bool  anyDeld = False;
2092 
2093    for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2094       /* The entire and only purpose of splitting TTEntry into cold
2095          and hot parts (TTEntryC and TTEntryH) is to make this search
2096          loop run as fast as possible, by avoiding having to pull all
2097          of the cold data up the memory hierarchy. */
2098       if (UNLIKELY(sec->ttH[i].status == InUse
2099                    && overlaps( guest_start, range, &sec->ttH[i] ))) {
2100          anyDeld = True;
2101          delete_tte( sec, secNo, i, arch_host, endness_host );
2102       }
2103    }
2104 
2105    return anyDeld;
2106 }
2107 
2108 
VG_(discard_translations)2109 void VG_(discard_translations) ( Addr guest_start, ULong range,
2110                                  const HChar* who )
2111 {
2112    Sector* sec;
2113    SECno   sno;
2114    EClassNo ec;
2115    Bool    anyDeleted = False;
2116 
2117    vg_assert(init_done);
2118 
2119    VG_(debugLog)(2, "transtab",
2120                     "discard_translations(0x%lx, %llu) req by %s\n",
2121                     guest_start, range, who );
2122 
2123    /* Pre-deletion sanity check */
2124    if (VG_(clo_sanity_level) >= 4) {
2125       Bool sane = sanity_check_all_sectors();
2126       vg_assert(sane);
2127    }
2128 
2129    if (range == 0)
2130       return;
2131 
2132    VexArch     arch_host = VexArch_INVALID;
2133    VexArchInfo archinfo_host;
2134    VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
2135    VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
2136    VexEndness endness_host = archinfo_host.endness;
2137 
2138    /* There are two different ways to do this.
2139 
2140       If the range fits within a single address-range equivalence
2141       class, as will be the case for a cache line sized invalidation,
2142       then we only have to inspect the set of translations listed in
2143       that equivalence class, and also in the "sin-bin" equivalence
2144       class ECLASS_MISC.
2145 
2146       Otherwise, the invalidation is of a larger range and probably
2147       results from munmap.  In this case it's (probably!) faster just
2148       to inspect all translations, dump those we don't want, and
2149       regenerate the equivalence class information (since modifying it
2150       in-situ is even more expensive).
2151    */
2152 
2153    /* First off, figure out if the range falls within a single class,
2154       and if so which one. */
2155 
2156    ec = ECLASS_MISC;
2157    if (range <= (1ULL << ECLASS_SHIFT))
2158       ec = range_to_eclass( guest_start, (UInt)range );
2159 
2160    /* if ec is ECLASS_MISC then we aren't looking at just a single
2161       class, so use the slow scheme.  Else use the fast scheme,
2162       examining 'ec' and ECLASS_MISC. */
2163 
2164    if (ec != ECLASS_MISC) {
2165 
2166       VG_(debugLog)(2, "transtab",
2167                        "                    FAST, ec = %d\n", ec);
2168 
2169       /* Fast scheme */
2170       vg_assert(ec >= 0 && ec < ECLASS_MISC);
2171 
2172       for (sno = 0; sno < n_sectors; sno++) {
2173          sec = &sectors[sno];
2174          if (sec->tc == NULL)
2175             continue;
2176          anyDeleted |= delete_translations_in_sector_eclass(
2177                           sec, sno, guest_start, range, ec,
2178                           arch_host, endness_host
2179                        );
2180          anyDeleted |= delete_translations_in_sector_eclass(
2181                           sec, sno, guest_start, range, ECLASS_MISC,
2182                           arch_host, endness_host
2183                        );
2184       }
2185 
2186    } else {
2187 
2188       /* slow scheme */
2189 
2190       VG_(debugLog)(2, "transtab",
2191                        "                    SLOW, ec = %d\n", ec);
2192 
2193       for (sno = 0; sno < n_sectors; sno++) {
2194          sec = &sectors[sno];
2195          if (sec->tc == NULL)
2196             continue;
2197          anyDeleted |= delete_translations_in_sector(
2198                           sec, sno, guest_start, range,
2199                           arch_host, endness_host
2200                        );
2201       }
2202 
2203    }
2204 
2205    if (anyDeleted)
2206       invalidateFastCache();
2207 
2208    /* don't forget the no-redir cache */
2209    unredir_discard_translations( guest_start, range );
2210 
2211    /* Post-deletion sanity check */
2212    if (VG_(clo_sanity_level) >= 4) {
2213       TTEno i;
2214       Bool  sane = sanity_check_all_sectors();
2215       vg_assert(sane);
2216       /* But now, also check the requested address range isn't
2217          present anywhere. */
2218       for (sno = 0; sno < n_sectors; sno++) {
2219          sec = &sectors[sno];
2220          if (sec->tc == NULL)
2221             continue;
2222          for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2223             TTEntryH* tteH = &sec->ttH[i];
2224             if (tteH->status != InUse)
2225                continue;
2226             vg_assert(!overlaps( guest_start, range, tteH ));
2227          }
2228       }
2229    }
2230 }
2231 
2232 /* Whether or not tools may discard translations. */
2233 Bool  VG_(ok_to_discard_translations) = False;
2234 
2235 /* This function is exported to tools which can use it to discard
2236    translations, provided it is safe to do so. */
VG_(discard_translations_safely)2237 void VG_(discard_translations_safely) ( Addr  start, SizeT len,
2238                                         const HChar* who )
2239 {
2240    vg_assert(VG_(ok_to_discard_translations));
2241    VG_(discard_translations)(start, len, who);
2242 }
2243 
2244 /*------------------------------------------------------------*/
2245 /*--- AUXILIARY: the unredirected TT/TC                    ---*/
2246 /*------------------------------------------------------------*/
2247 
2248 /* A very simple translation cache which holds a small number of
2249    unredirected translations.  This is completely independent of the
2250    main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
2251    both structures are simply dumped and we start over.
2252 
2253    Since these translations are unredirected, the search key is (by
2254    definition) the first address entry in the .vge field. */
2255 
2256 /* Sized to hold 500 translations of average size 1000 bytes. */
2257 
2258 #define UNREDIR_SZB   1000
2259 
2260 #define N_UNREDIR_TT  500
2261 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
2262 
2263 typedef
2264    struct {
2265       VexGuestExtents vge;
2266       Addr            hcode;
2267       Bool            inUse;
2268    }
2269    UTCEntry;
2270 
2271 /* We just allocate forwards in _tc, never deleting. */
2272 static ULong    *unredir_tc;
2273 static Int      unredir_tc_used = N_UNREDIR_TCQ;
2274 
2275 /* Slots in _tt can come into use and out again (.inUse).
2276    Nevertheless _tt_highwater is maintained so that invalidations
2277    don't have to scan all the slots when only a few are in use.
2278    _tt_highwater holds the index of the highest ever allocated
2279    slot. */
2280 static UTCEntry unredir_tt[N_UNREDIR_TT];
2281 static Int      unredir_tt_highwater;
2282 
2283 
init_unredir_tt_tc(void)2284 static void init_unredir_tt_tc ( void )
2285 {
2286    Int i;
2287    if (unredir_tc == NULL) {
2288       SysRes sres = VG_(am_mmap_anon_float_valgrind)
2289                        ( N_UNREDIR_TT * UNREDIR_SZB );
2290       if (sr_isError(sres)) {
2291          VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
2292                                      N_UNREDIR_TT * UNREDIR_SZB);
2293          /*NOTREACHED*/
2294       }
2295       unredir_tc = (ULong *)(Addr)sr_Res(sres);
2296    }
2297    unredir_tc_used = 0;
2298    for (i = 0; i < N_UNREDIR_TT; i++)
2299       unredir_tt[i].inUse = False;
2300    unredir_tt_highwater = -1;
2301 }
2302 
2303 /* Do a sanity check; return False on failure. */
sanity_check_redir_tt_tc(void)2304 static Bool sanity_check_redir_tt_tc ( void )
2305 {
2306    Int i;
2307    if (unredir_tt_highwater < -1) return False;
2308    if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
2309 
2310    for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
2311       if (unredir_tt[i].inUse)
2312          return False;
2313 
2314    if (unredir_tc_used < 0) return False;
2315    if (unredir_tc_used > N_UNREDIR_TCQ) return False;
2316 
2317    return True;
2318 }
2319 
2320 
2321 /* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
2322    is temporarily in code[0 .. code_len-1].
2323 */
VG_(add_to_unredir_transtab)2324 void VG_(add_to_unredir_transtab)( const VexGuestExtents* vge,
2325                                    Addr             entry,
2326                                    Addr             code,
2327                                    UInt             code_len )
2328 {
2329    Int   i, j, code_szQ;
2330    HChar *srcP, *dstP;
2331 
2332    vg_assert(sanity_check_redir_tt_tc());
2333 
2334    /* This is the whole point: it's not redirected! */
2335    vg_assert(entry == vge->base[0]);
2336 
2337    /* How many unredir_tt slots are needed */
2338    code_szQ = (code_len + 7) / 8;
2339 
2340    /* Look for an empty unredir_tc slot */
2341    for (i = 0; i < N_UNREDIR_TT; i++)
2342       if (!unredir_tt[i].inUse)
2343          break;
2344 
2345    if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
2346       /* It's full; dump everything we currently have */
2347       init_unredir_tt_tc();
2348       i = 0;
2349    }
2350 
2351    vg_assert(unredir_tc_used >= 0);
2352    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2353    vg_assert(code_szQ > 0);
2354    vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
2355    vg_assert(i >= 0 && i < N_UNREDIR_TT);
2356    vg_assert(unredir_tt[i].inUse == False);
2357 
2358    if (i > unredir_tt_highwater)
2359       unredir_tt_highwater = i;
2360 
2361    dstP = (HChar*)&unredir_tc[unredir_tc_used];
2362    srcP = (HChar*)code;
2363    for (j = 0; j < code_len; j++)
2364       dstP[j] = srcP[j];
2365 
2366    VG_(invalidate_icache)( dstP, code_len );
2367 
2368    unredir_tt[i].inUse = True;
2369    unredir_tt[i].vge   = *vge;
2370    unredir_tt[i].hcode = (Addr)dstP;
2371 
2372    unredir_tc_used += code_szQ;
2373    vg_assert(unredir_tc_used >= 0);
2374    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2375 
2376    vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
2377 }
2378 
VG_(search_unredir_transtab)2379 Bool VG_(search_unredir_transtab) ( /*OUT*/Addr*  result,
2380                                     Addr          guest_addr )
2381 {
2382    Int i;
2383    for (i = 0; i < N_UNREDIR_TT; i++) {
2384       if (!unredir_tt[i].inUse)
2385          continue;
2386       if (unredir_tt[i].vge.base[0] == guest_addr) {
2387          *result = unredir_tt[i].hcode;
2388          return True;
2389       }
2390    }
2391    return False;
2392 }
2393 
unredir_discard_translations(Addr guest_start,ULong range)2394 static void unredir_discard_translations( Addr guest_start, ULong range )
2395 {
2396    Int i;
2397 
2398    vg_assert(sanity_check_redir_tt_tc());
2399 
2400    for (i = 0; i <= unredir_tt_highwater; i++) {
2401       if (unredir_tt[i].inUse) {
2402          /* Fake up a TTEntryH just so we can pass it to overlaps()
2403             without having to make a new version of overlaps() just for
2404             this rare case. */
2405          TTEntryH tmp_tteH;
2406          TTEntryH__from_VexGuestExtents( &tmp_tteH, &unredir_tt[i].vge );
2407          tmp_tteH.status = Empty; /* Completely irrelevant; pure paranoia. */
2408          if (overlaps( guest_start, range, &tmp_tteH )) {
2409             unredir_tt[i].inUse = False;
2410          }
2411       }
2412    }
2413 }
2414 
2415 
2416 /*------------------------------------------------------------*/
2417 /*--- Initialisation.                                      ---*/
2418 /*------------------------------------------------------------*/
2419 
VG_(init_tt_tc)2420 void VG_(init_tt_tc) ( void )
2421 {
2422    Int i, avg_codeszQ;
2423 
2424    vg_assert(!init_done);
2425    init_done = True;
2426 
2427    /* Otherwise lots of things go wrong... */
2428    vg_assert(sizeof(ULong) == 8);
2429    vg_assert(sizeof(TTEno) == sizeof(HTTno));
2430    vg_assert(sizeof(TTEno) == 2);
2431    vg_assert(N_TTES_PER_SECTOR <= N_HTTES_PER_SECTOR);
2432    vg_assert(N_HTTES_PER_SECTOR < INV_TTE);
2433    vg_assert(N_HTTES_PER_SECTOR < EC2TTE_DELETED);
2434    vg_assert(N_HTTES_PER_SECTOR < HTT_EMPTY);
2435    /* check fast cache entries really are 2 words long */
2436    vg_assert(sizeof(Addr) == sizeof(void*));
2437    vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
2438    /* check fast cache entries are packed back-to-back with no spaces */
2439    vg_assert(sizeof( VG_(tt_fast) )
2440              == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
2441    /* check fast cache is aligned as we requested.  Not fatal if it
2442       isn't, but we might as well make sure. */
2443    vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
2444 
2445    /* The TTEntryH size is critical for keeping the LLC miss rate down
2446       when doing a lot of discarding.  Hence check it here.  We also
2447       have a lot of TTEntryCs so let's check that too. */
2448    if (sizeof(HWord) == 8) {
2449       vg_assert(sizeof(TTEntryH) <= 32);
2450       vg_assert(sizeof(TTEntryC) <= 112);
2451    }
2452    else if (sizeof(HWord) == 4) {
2453       vg_assert(sizeof(TTEntryH) <= 20);
2454 #     if defined(VGP_ppc32_linux) || defined(VGP_mips32_linux) \
2455          || defined(VGP_arm_linux)
2456       /* On PPC32, MIPS32, ARM32 platforms, alignof(ULong) == 8, so the
2457          structure is larger than on other 32 bit targets. */
2458       vg_assert(sizeof(TTEntryC) <= 96);
2459 #     else
2460       vg_assert(sizeof(TTEntryC) <= 88);
2461 #     endif
2462    }
2463    else {
2464       vg_assert(0);
2465    }
2466 
2467    /* All the hassle about converting between TTEntryH and
2468       VexGuestExtents was so as to ensure the following. */
2469    vg_assert(sizeof(TTEntryH) == sizeof(VexGuestExtents));
2470 
2471    if (VG_(clo_verbosity) > 2)
2472       VG_(message)(Vg_DebugMsg,
2473                    "TT/TC: VG_(init_tt_tc) "
2474                    "(startup of code management)\n");
2475 
2476    /* Figure out how big each tc area should be.  */
2477    if (VG_(clo_avg_transtab_entry_size) == 0)
2478       avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
2479    else
2480       avg_codeszQ   = (VG_(clo_avg_transtab_entry_size) + 7) / 8;
2481    tc_sector_szQ = N_TTES_PER_SECTOR * (1 + avg_codeszQ);
2482 
2483    /* Ensure the calculated value is not way crazy. */
2484    vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR);
2485    vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR);
2486 
2487    n_sectors = VG_(clo_num_transtab_sectors);
2488    vg_assert(n_sectors >= MIN_N_SECTORS);
2489    vg_assert(n_sectors <= MAX_N_SECTORS);
2490 
2491    /* Initialise the sectors, even the ones we aren't going to use.
2492       Set all fields to zero. */
2493    youngest_sector = 0;
2494    for (i = 0; i < MAX_N_SECTORS; i++)
2495       VG_(memset)(&sectors[i], 0, sizeof(sectors[i]));
2496 
2497    /* Initialise the sector_search_order hint table, including the
2498       entries we aren't going to use. */
2499    for (i = 0; i < MAX_N_SECTORS; i++)
2500       sector_search_order[i] = INV_SNO;
2501 
2502    /* Initialise the fast cache. */
2503    invalidateFastCache();
2504 
2505    /* and the unredir tt/tc */
2506    init_unredir_tt_tc();
2507 
2508    if (VG_(clo_verbosity) > 2 || VG_(clo_stats)
2509        || VG_(debugLog_getLevel) () >= 2) {
2510       VG_(message)(Vg_DebugMsg,
2511          "TT/TC: cache: %s--avg-transtab-entry-size=%u, "
2512          "%stool provided default %u\n",
2513          VG_(clo_avg_transtab_entry_size) == 0 ? "ignoring " : "using ",
2514          VG_(clo_avg_transtab_entry_size),
2515          VG_(clo_avg_transtab_entry_size) == 0 ? "using " : "ignoring ",
2516          VG_(details).avg_translation_sizeB);
2517       VG_(message)(Vg_DebugMsg,
2518          "TT/TC: cache: %d sectors of %'d bytes each = %'d total TC\n",
2519           n_sectors, 8 * tc_sector_szQ,
2520           n_sectors * 8 * tc_sector_szQ );
2521       VG_(message)(Vg_DebugMsg,
2522          "TT/TC: table: %'d tables[%d] of C %'d + H %'d bytes each "
2523          "= %'d total TT\n",
2524           n_sectors, N_TTES_PER_SECTOR,
2525           (int)(N_TTES_PER_SECTOR * sizeof(TTEntryC)),
2526           (int)(N_TTES_PER_SECTOR * sizeof(TTEntryH)),
2527           (int)(n_sectors * N_TTES_PER_SECTOR
2528                           * (sizeof(TTEntryC) + sizeof(TTEntryH))));
2529       VG_(message)(Vg_DebugMsg,
2530          "TT/TC: table: %d tt entries each = %'d total tt entries\n",
2531          N_TTES_PER_SECTOR, n_sectors * N_TTES_PER_SECTOR);
2532       VG_(message)(Vg_DebugMsg,
2533          "TT/TC: table: %d htt[%d] of %'d bytes each = %'d total HTT"
2534                        " (htt[%d] %d%% max occup)\n",
2535          n_sectors, N_HTTES_PER_SECTOR,
2536          (int)(N_HTTES_PER_SECTOR * sizeof(TTEno)),
2537          (int)(n_sectors * N_HTTES_PER_SECTOR * sizeof(TTEno)),
2538          N_HTTES_PER_SECTOR, SECTOR_TT_LIMIT_PERCENT);
2539    }
2540 
2541    if (0) {
2542       VG_(printf)("XXXX sizeof(VexGuestExtents) = %d\n",
2543                   (Int)sizeof(VexGuestExtents));
2544       VG_(printf)("XXXX sizeof(InEdge)     = %d\n", (Int)sizeof(InEdge));
2545       VG_(printf)("XXXX sizeof(OutEdge)    = %d\n", (Int)sizeof(OutEdge));
2546       VG_(printf)("XXXX sizeof(InEdgeArr)  = %d\n", (Int)sizeof(InEdgeArr));
2547       VG_(printf)("XXXX sizeof(OutEdgeArr) = %d\n", (Int)sizeof(OutEdgeArr));
2548       VG_(printf)("XXXX sizeof(TTEntryC)   = %d\n", (Int)sizeof(TTEntryC));
2549       VG_(printf)("XXXX sizeof(TTEntryH)   = %d\n", (Int)sizeof(TTEntryH));
2550    }
2551 }
2552 
2553 
2554 /*------------------------------------------------------------*/
2555 /*--- Printing out statistics.                             ---*/
2556 /*------------------------------------------------------------*/
2557 
safe_idiv(ULong a,ULong b)2558 static Double safe_idiv( ULong a, ULong b )
2559 {
2560    return (b == 0 ? 0 : (Double)a / (Double)b);
2561 }
2562 
VG_(get_bbs_translated)2563 UInt VG_(get_bbs_translated) ( void )
2564 {
2565    return n_in_count;
2566 }
2567 
VG_(print_tt_tc_stats)2568 void VG_(print_tt_tc_stats) ( void )
2569 {
2570    VG_(message)(Vg_DebugMsg,
2571       "    tt/tc: %'llu tt lookups requiring %'llu probes\n",
2572       n_full_lookups, n_lookup_probes );
2573    VG_(message)(Vg_DebugMsg,
2574       "    tt/tc: %'llu fast-cache updates, %'llu flushes\n",
2575       n_fast_updates, n_fast_flushes );
2576 
2577    VG_(message)(Vg_DebugMsg,
2578                 " transtab: new        %'llu "
2579                 "(%'llu -> %'llu; ratio %3.1f) [%'llu scs] "
2580                 "avg tce size %llu\n",
2581                 n_in_count, n_in_osize, n_in_tsize,
2582                 safe_idiv(n_in_tsize, n_in_osize),
2583                 n_in_sc_count,
2584                 n_in_tsize / (n_in_count ? n_in_count : 1));
2585    VG_(message)(Vg_DebugMsg,
2586                 " transtab: dumped     %'llu (%'llu -> ?" "?) "
2587                 "(sectors recycled %'llu)\n",
2588                 n_dump_count, n_dump_osize, n_sectors_recycled );
2589    VG_(message)(Vg_DebugMsg,
2590                 " transtab: discarded  %'llu (%'llu -> ?" "?)\n",
2591                 n_disc_count, n_disc_osize );
2592 
2593    if (DEBUG_TRANSTAB) {
2594       VG_(printf)("\n");
2595       for (EClassNo e = 0; e < ECLASS_N; e++) {
2596          VG_(printf)(" %4d", sectors[0].ec2tte_used[e]);
2597          if (e % 16 == 15)
2598             VG_(printf)("\n");
2599       }
2600       VG_(printf)("\n\n");
2601    }
2602 }
2603 
2604 /*------------------------------------------------------------*/
2605 /*--- Printing out of profiling results.                   ---*/
2606 /*------------------------------------------------------------*/
2607 
score(const TTEntryC * tteC)2608 static ULong score ( const TTEntryC* tteC )
2609 {
2610    return ((ULong)tteC->usage.prof.weight) * ((ULong)tteC->usage.prof.count);
2611 }
2612 
VG_(get_SB_profile)2613 ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops )
2614 {
2615    SECno sno;
2616    Int   r, s;
2617    ULong score_total;
2618    TTEno i;
2619 
2620    /* First, compute the total weighted count, and find the top N
2621       ttes.  tops contains pointers to the most-used n_tops blocks, in
2622       descending order (viz, tops[0] is the highest scorer). */
2623    for (s = 0; s < n_tops; s++) {
2624       tops[s].addr  = 0;
2625       tops[s].score = 0;
2626    }
2627 
2628    score_total = 0;
2629 
2630    for (sno = 0; sno < n_sectors; sno++) {
2631       if (sectors[sno].tc == NULL)
2632          continue;
2633       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2634          if (sectors[sno].ttH[i].status != InUse)
2635             continue;
2636          score_total += score(&sectors[sno].ttC[i]);
2637          /* Find the rank for sectors[sno].tt{C,H}[i]. */
2638          r = n_tops-1;
2639          while (True) {
2640             if (r == -1)
2641                break;
2642              if (tops[r].addr == 0) {
2643                r--;
2644                continue;
2645              }
2646              if ( score(&sectors[sno].ttC[i]) > tops[r].score ) {
2647                 r--;
2648                 continue;
2649              }
2650              break;
2651          }
2652          r++;
2653          vg_assert(r >= 0 && r <= n_tops);
2654          /* This bb should be placed at r, and bbs above it shifted
2655             upwards one slot. */
2656          if (r < n_tops) {
2657             for (s = n_tops-1; s > r; s--)
2658                tops[s] = tops[s-1];
2659             tops[r].addr  = sectors[sno].ttC[i].entry;
2660             tops[r].score = score( &sectors[sno].ttC[i] );
2661          }
2662       }
2663    }
2664 
2665    /* Now zero out all the counter fields, so that we can make
2666       multiple calls here and just get the values since the last call,
2667       each time, rather than values accumulated for the whole run. */
2668    for (sno = 0; sno < n_sectors; sno++) {
2669       if (sectors[sno].tc == NULL)
2670          continue;
2671       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2672          if (sectors[sno].ttH[i].status != InUse)
2673             continue;
2674          sectors[sno].ttC[i].usage.prof.count = 0;
2675       }
2676    }
2677 
2678    return score_total;
2679 }
2680 
2681 /*--------------------------------------------------------------------*/
2682 /*--- end                                                          ---*/
2683 /*--------------------------------------------------------------------*/
2684