• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Management of the translation table and cache.               ---*/
4 /*---                                                 m_transtab.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of Valgrind, a dynamic binary instrumentation
9    framework.
10 
11    Copyright (C) 2000-2013 Julian Seward
12       jseward@acm.org
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27    02111-1307, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 */
31 
32 #include "pub_core_basics.h"
33 #include "pub_core_debuglog.h"
34 #include "pub_core_machine.h"    // For VG_(machine_get_VexArchInfo)
35 #include "pub_core_libcbase.h"
36 #include "pub_core_vki.h"        // to keep pub_core_libproc.h happy, sigh
37 #include "pub_core_libcproc.h"   // VG_(invalidate_icache)
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcprint.h"
40 #include "pub_core_options.h"
41 #include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
42 #include "pub_core_transtab.h"
43 #include "pub_core_aspacemgr.h"
44 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
45 #include "pub_core_xarray.h"
46 #include "pub_core_dispatch.h"   // For VG_(disp_cp*) addresses
47 
48 
49 #define DEBUG_TRANSTAB 0
50 
51 
52 /*-------------------------------------------------------------*/
53 /*--- Management of the FIFO-based translation table+cache. ---*/
54 /*-------------------------------------------------------------*/
55 
56 /* Nr of sectors provided via command line parameter. */
57 UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT;
58 /* Nr of sectors.
59    Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */
60 static UInt n_sectors = 0;
61 
62 /*------------------ CONSTANTS ------------------*/
63 /* Number of TC entries in each sector.  This needs to be a prime
64    number to work properly, it must be <= 65535 (so that a TT index
65    fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
66    'deleted') and it is strongly recommended not to change this.
67    65521 is the largest prime <= 65535. */
68 #define N_TTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
69 
70 /* Because each sector contains a hash table of TTEntries, we need to
71    specify the maximum allowable loading, after which the sector is
72    deemed full. */
73 #define SECTOR_TT_LIMIT_PERCENT 65
74 
75 /* The sector is deemed full when this many entries are in it. */
76 #define N_TTES_PER_SECTOR_USABLE \
77            ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
78 
79 /* Equivalence classes for fast address range deletion.  There are 1 +
80    2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
81    address range which does not fall cleanly within any specific bin.
82    Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
83 #define ECLASS_SHIFT 11
84 #define ECLASS_WIDTH 8
85 #define ECLASS_MISC  (1 << ECLASS_WIDTH)
86 #define ECLASS_N     (1 + ECLASS_MISC)
87 
88 #define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
89 
90 
91 /*------------------ TYPES ------------------*/
92 
93 /* In edges ("to-me") in the graph created by chaining. */
94 typedef
95    struct {
96       UInt from_sNo;   /* sector number */
97       UInt from_tteNo; /* TTE number in given sector */
98       UInt from_offs;  /* code offset from TCEntry::tcptr where the patch is */
99       Bool to_fastEP;  /* Is the patch to a fast or slow entry point? */
100    }
101    InEdge;
102 
103 
104 /* Out edges ("from-me") in the graph created by chaining. */
105 typedef
106    struct {
107       UInt to_sNo;    /* sector number */
108       UInt to_tteNo;  /* TTE number in given sector */
109       UInt from_offs; /* code offset in owning translation where patch is */
110    }
111    OutEdge;
112 
113 
114 #define N_FIXED_IN_EDGE_ARR 3
115 typedef
116    struct {
117       UInt     n_fixed; /* 0 .. N_FIXED_IN_EDGE_ARR */
118       InEdge   fixed[N_FIXED_IN_EDGE_ARR];
119       XArray*  var; /* XArray* of InEdgeArr */
120    }
121    InEdgeArr;
122 
123 #define N_FIXED_OUT_EDGE_ARR 2
124 typedef
125    struct {
126       UInt    n_fixed; /* 0 .. N_FIXED_OUT_EDGE_ARR */
127       OutEdge fixed[N_FIXED_OUT_EDGE_ARR];
128       XArray* var; /* XArray* of OutEdgeArr */
129    }
130    OutEdgeArr;
131 
132 
133 /* A translation-table entry.  This indicates precisely which areas of
134    guest code are included in the translation, and contains all other
135    auxiliary info too.  */
136 typedef
137    struct {
138       /* Profiling only: the count and weight (arbitrary meaning) for
139          this translation.  Weight is a property of the translation
140          itself and computed once when the translation is created.
141          Count is an entry count for the translation and is
142          incremented by 1 every time the translation is used, if we
143          are profiling. */
144       ULong    count;
145       UShort   weight;
146 
147       /* Status of the slot.  Note, we need to be able to do lazy
148          deletion, hence the Deleted state. */
149       enum { InUse, Deleted, Empty } status;
150 
151       /* 64-bit aligned pointer to one or more 64-bit words containing
152          the corresponding host code (must be in the same sector!)
153          This is a pointer into the sector's tc (code) area. */
154       ULong* tcptr;
155 
156       /* This is the original guest address that purportedly is the
157          entry point of the translation.  You might think that .entry
158          should be the same as .vge->base[0], and most of the time it
159          is.  However, when doing redirections, that is not the case.
160          .vge must always correctly describe the guest code sections
161          from which this translation was made.  However, .entry may or
162          may not be a lie, depending on whether or not we're doing
163          redirection. */
164       Addr64 entry;
165 
166       /* This structure describes precisely what ranges of guest code
167          the translation covers, so we can decide whether or not to
168          delete it when translations of a given address range are
169          invalidated. */
170       VexGuestExtents vge;
171 
172       /* Address range summary info: these are pointers back to
173          eclass[] entries in the containing Sector.  Those entries in
174          turn point back here -- the two structures are mutually
175          redundant but both necessary to make fast deletions work.
176          The eclass info is similar to, and derived from, this entry's
177          'vge' field, but it is not the same */
178       UShort n_tte2ec;      // # tte2ec pointers (1 to 3)
179       UShort tte2ec_ec[3];  // for each, the eclass #
180       UInt   tte2ec_ix[3];  // and the index within the eclass.
181       // for i in 0 .. n_tte2ec-1
182       //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
183       // should be the index
184       // of this TTEntry in the containing Sector's tt array.
185 
186       /* Admin information for chaining.  'in_edges' is a set of the
187          patch points which jump to this translation -- hence are
188          predecessors in the control flow graph.  'out_edges' points
189          to successors in the control flow graph -- translations to
190          which this one has a patched jump.  In short these are just
191          backwards and forwards edges in the graph of patched-together
192          blocks.  The 'in_edges' contain slightly more info, enough
193          that we can undo the chaining of each mentioned patch point.
194          The 'out_edges' list exists only so that we can visit the
195          'in_edges' entries of all blocks we're patched through to, in
196          order to remove ourselves from then when we're deleted. */
197 
198       /* A translation can disappear for two reasons:
199           1. erased (as part of the oldest sector cleanup) when the
200              youngest sector is full.
201           2. discarded due to calls to VG_(discard_translations).
202              VG_(discard_translations) sets the status of the
203              translation to 'Deleted'.
204              A.o., the gdbserver discards one or more translations
205              when a breakpoint is inserted or removed at an Addr,
206              or when single stepping mode is enabled/disabled
207              or when a translation is instrumented for gdbserver
208              (all the target jumps of this translation are
209               invalidated).
210 
211          So, it is possible that the translation A to be patched
212          (to obtain a patched jump from A to B) is invalidated
213          after B is translated and before A is patched.
214          In case a translation is erased or discarded, the patching
215          cannot be done.  VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
216          are checking the 'from' translation still exists before
217          doing the patching.
218 
219          Is it safe to erase or discard the current translation E being
220          executed ? Amazing, but yes, it is safe.
221          Here is the explanation:
222 
223          The translation E being executed can only be erased if a new
224          translation N is being done. A new translation is done only
225          if the host addr is a not yet patched jump to another
226          translation. In such a case, the guest address of N is
227          assigned to the PC in the VEX state. Control is returned
228          to the scheduler. N will be translated. This can erase the
229          translation E (in case of sector full). VG_(tt_tc_do_chaining)
230          will not do the chaining to a non found translation E.
231          The execution will continue at the current guest PC
232          (i.e. the translation N).
233          => it is safe to erase the current translation being executed.
234 
235          The current translation E being executed can also be discarded
236          (e.g. by gdbserver). VG_(discard_translations) will mark
237          this translation E as Deleted, but the translation itself
238          is not erased. In particular, its host code can only
239          be overwritten or erased in case a new translation is done.
240          A new translation will only be done if a not yet translated
241          jump is to be executed. The execution of the Deleted translation
242          E will continue till a non patched jump is encountered.
243          This situation is then similar to the 'erasing' case above :
244          the current translation E can be erased or overwritten, as the
245          execution will continue at the new translation N.
246 
247       */
248 
249       /* It is possible, although very unlikely, that a block A has
250          more than one patched jump to block B.  This could happen if
251          (eg) A finishes "jcond B; jmp B".
252 
253          This means in turn that B's in_edges set can list A more than
254          once (twice in this example).  However, each such entry must
255          have a different from_offs, since a patched jump can only
256          jump to one place at once (it's meaningless for it to have
257          multiple destinations.)  IOW, the successor and predecessor
258          edges in the graph are not uniquely determined by a
259          TTEntry --> TTEntry pair, but rather by a
260          (TTEntry,offset) --> TTEntry triple.
261 
262          If A has multiple edges to B then B will mention A multiple
263          times in its in_edges.  To make things simpler, we then
264          require that A mentions B exactly the same number of times in
265          its out_edges.  Furthermore, a matching out-in pair must have
266          the same offset (from_offs).  This facilitates sanity
267          checking, and it facilitates establishing the invariant that
268          a out_edges set may not have duplicates when using the
269          equality defined by (TTEntry,offset).  Hence the out_edges
270          and in_edges sets really do have both have set semantics.
271 
272          eg if  A has been patched to B at offsets 42 and 87 (in A)
273          then   A.out_edges = { (B,42), (B,87) }   (in any order)
274          and    B.in_edges  = { (A,42), (A,87) }   (in any order)
275 
276          Hence for each node pair P->Q in the graph, there's a 1:1
277          mapping between P.out_edges and Q.in_edges.
278       */
279       InEdgeArr  in_edges;
280       OutEdgeArr out_edges;
281    }
282    TTEntry;
283 
284 
285 /* A structure used for mapping host code addresses back to the
286    relevant TTEntry.  Used when doing chaining, for finding the
287    TTEntry to which some arbitrary patch address belongs. */
288 typedef
289    struct {
290       UChar* start;
291       UInt   len;
292       UInt   tteNo;
293    }
294    HostExtent;
295 
296 /* Finally, a sector itself.  Each sector contains an array of
297    TCEntries, which hold code, and an array of TTEntries, containing
298    all required administrative info.  Profiling is supported using the
299    TTEntry .count and .weight fields, if required.
300 
301    If the sector is not in use, all three pointers are NULL and
302    tt_n_inuse is zero.
303 */
304 typedef
305    struct {
306       /* The TCEntry area.  Size of this depends on the average
307          translation size.  We try and size it so it becomes full
308          precisely when this sector's translation table (tt) reaches
309          its load limit (SECTOR_TT_LIMIT_PERCENT). */
310       ULong* tc;
311 
312       /* The TTEntry array.  This is a fixed size, always containing
313          exactly N_TTES_PER_SECTOR entries. */
314       TTEntry* tt;
315 
316       /* This points to the current allocation point in tc. */
317       ULong* tc_next;
318 
319       /* The count of tt entries with state InUse. */
320       Int tt_n_inuse;
321 
322       /* Expandable arrays of tt indices for each of the ECLASS_N
323          address range equivalence classes.  These hold indices into
324          the containing sector's tt array, which in turn should point
325          back here. */
326       Int     ec2tte_size[ECLASS_N];
327       Int     ec2tte_used[ECLASS_N];
328       UShort* ec2tte[ECLASS_N];
329 
330       /* The host extents.  The [start, +len) ranges are constructed
331          in strictly non-overlapping order, so we can binary search
332          them at any time. */
333       XArray* host_extents; /* XArray* of HostExtent */
334    }
335    Sector;
336 
337 
338 /*------------------ DECLS ------------------*/
339 
340 /* The root data structure is an array of sectors.  The index of the
341    youngest sector is recorded, and new translations are put into that
342    sector.  When it fills up, we move along to the next sector and
343    start to fill that up, wrapping around at the end of the array.
344    That way, once all N_TC_SECTORS have been bought into use for the
345    first time, and are full, we then re-use the oldest sector,
346    endlessly.
347 
348    When running, youngest sector should be between >= 0 and <
349    N_TC_SECTORS.  The initial -1 value indicates the TT/TC system is
350    not yet initialised.
351 */
352 static Sector sectors[MAX_N_SECTORS];
353 static Int    youngest_sector = -1;
354 
355 /* The number of ULongs in each TCEntry area.  This is computed once
356    at startup and does not change. */
357 static Int    tc_sector_szQ = 0;
358 
359 
360 /* A list of sector numbers, in the order which they should be
361    searched to find translations.  This is an optimisation to be used
362    when searching for translations and should not affect
363    correctness.  -1 denotes "no entry". */
364 static Int sector_search_order[MAX_N_SECTORS];
365 
366 
367 /* Fast helper for the TC.  A direct-mapped cache which holds a set of
368    recently used (guest address, host address) pairs.  This array is
369    referred to directly from m_dispatch/dispatch-<platform>.S.
370 
371    Entries in tt_fast may refer to any valid TC entry, regardless of
372    which sector it's in.  Consequently we must be very careful to
373    invalidate this cache when TC entries are changed or disappear.
374 
375    A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
376    pointed at to cause that cache entry to miss.  This relies on the
377    assumption that no guest code actually has that address, hence a
378    value 0x1 seems good.  m_translate gives the client a synthetic
379    segfault if it tries to execute at this address.
380 */
381 /*
382 typedef
383    struct {
384       Addr guest;
385       Addr host;
386    }
387    FastCacheEntry;
388 */
389 /*global*/ __attribute__((aligned(16)))
390            FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
391 
392 /* Make sure we're not used before initialisation. */
393 static Bool init_done = False;
394 
395 
396 /*------------------ STATS DECLS ------------------*/
397 
398 /* Number of fast-cache updates and flushes done. */
399 static ULong n_fast_flushes = 0;
400 static ULong n_fast_updates = 0;
401 
402 /* Number of full lookups done. */
403 static ULong n_full_lookups = 0;
404 static ULong n_lookup_probes = 0;
405 
406 /* Number/osize/tsize of translations entered; also the number of
407    those for which self-checking was requested. */
408 static ULong n_in_count    = 0;
409 static ULong n_in_osize    = 0;
410 static ULong n_in_tsize    = 0;
411 static ULong n_in_sc_count = 0;
412 
413 /* Number/osize of translations discarded due to lack of space. */
414 static ULong n_dump_count = 0;
415 static ULong n_dump_osize = 0;
416 
417 /* Number/osize of translations discarded due to requests to do so. */
418 static ULong n_disc_count = 0;
419 static ULong n_disc_osize = 0;
420 
421 
422 /*-------------------------------------------------------------*/
423 /*--- Misc                                                  ---*/
424 /*-------------------------------------------------------------*/
425 
ttaux_malloc(const HChar * tag,SizeT n)426 static void* ttaux_malloc ( const HChar* tag, SizeT n )
427 {
428    return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
429 }
430 
ttaux_free(void * p)431 static void ttaux_free ( void* p )
432 {
433    VG_(arena_free)(VG_AR_TTAUX, p);
434 }
435 
436 
437 /*-------------------------------------------------------------*/
438 /*--- Chaining support                                      ---*/
439 /*-------------------------------------------------------------*/
440 
index_tte(UInt sNo,UInt tteNo)441 static inline TTEntry* index_tte ( UInt sNo, UInt tteNo )
442 {
443    vg_assert(sNo < n_sectors);
444    vg_assert(tteNo < N_TTES_PER_SECTOR);
445    Sector* s = &sectors[sNo];
446    vg_assert(s->tt);
447    TTEntry* tte = &s->tt[tteNo];
448    vg_assert(tte->status == InUse);
449    return tte;
450 }
451 
InEdge__init(InEdge * ie)452 static void InEdge__init ( InEdge* ie )
453 {
454    ie->from_sNo   = -1; /* invalid */
455    ie->from_tteNo = 0;
456    ie->from_offs  = 0;
457    ie->to_fastEP  = False;
458 }
459 
OutEdge__init(OutEdge * oe)460 static void OutEdge__init ( OutEdge* oe )
461 {
462    oe->to_sNo    = -1; /* invalid */
463    oe->to_tteNo  = 0;
464    oe->from_offs = 0;
465 }
466 
TTEntry__init(TTEntry * tte)467 static void TTEntry__init ( TTEntry* tte )
468 {
469    VG_(memset)(tte, 0, sizeof(*tte));
470 }
471 
InEdgeArr__size(InEdgeArr * iea)472 static UWord InEdgeArr__size ( InEdgeArr* iea )
473 {
474    if (iea->var) {
475       vg_assert(iea->n_fixed == 0);
476       return VG_(sizeXA)(iea->var);
477    } else {
478       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
479       return iea->n_fixed;
480    }
481 }
482 
InEdgeArr__makeEmpty(InEdgeArr * iea)483 static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
484 {
485    if (iea->var) {
486       vg_assert(iea->n_fixed == 0);
487       VG_(deleteXA)(iea->var);
488       iea->var = NULL;
489    } else {
490       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
491       iea->n_fixed = 0;
492    }
493 }
494 
495 static
InEdgeArr__index(InEdgeArr * iea,UWord i)496 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
497 {
498    if (iea->var) {
499       vg_assert(iea->n_fixed == 0);
500       return (InEdge*)VG_(indexXA)(iea->var, i);
501    } else {
502       vg_assert(i < iea->n_fixed);
503       return &iea->fixed[i];
504    }
505 }
506 
507 static
InEdgeArr__deleteIndex(InEdgeArr * iea,UWord i)508 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
509 {
510    if (iea->var) {
511       vg_assert(iea->n_fixed == 0);
512       VG_(removeIndexXA)(iea->var, i);
513    } else {
514       vg_assert(i < iea->n_fixed);
515       for (; i+1 < iea->n_fixed; i++) {
516          iea->fixed[i] = iea->fixed[i+1];
517       }
518       iea->n_fixed--;
519    }
520 }
521 
522 static
InEdgeArr__add(InEdgeArr * iea,InEdge * ie)523 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
524 {
525    if (iea->var) {
526       vg_assert(iea->n_fixed == 0);
527       VG_(addToXA)(iea->var, ie);
528    } else {
529       vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
530       if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
531          /* The fixed array is full, so we have to initialise an
532             XArray and copy the fixed array into it. */
533          iea->var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
534                                ttaux_free,
535                                sizeof(InEdge));
536          UWord i;
537          for (i = 0; i < iea->n_fixed; i++) {
538             VG_(addToXA)(iea->var, &iea->fixed[i]);
539          }
540          VG_(addToXA)(iea->var, ie);
541          iea->n_fixed = 0;
542       } else {
543          /* Just add to the fixed array. */
544          iea->fixed[iea->n_fixed++] = *ie;
545       }
546    }
547 }
548 
OutEdgeArr__size(OutEdgeArr * oea)549 static UWord OutEdgeArr__size ( OutEdgeArr* oea )
550 {
551    if (oea->var) {
552       vg_assert(oea->n_fixed == 0);
553       return VG_(sizeXA)(oea->var);
554    } else {
555       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
556       return oea->n_fixed;
557    }
558 }
559 
OutEdgeArr__makeEmpty(OutEdgeArr * oea)560 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
561 {
562    if (oea->var) {
563       vg_assert(oea->n_fixed == 0);
564       VG_(deleteXA)(oea->var);
565       oea->var = NULL;
566    } else {
567       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
568       oea->n_fixed = 0;
569    }
570 }
571 
572 static
OutEdgeArr__index(OutEdgeArr * oea,UWord i)573 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
574 {
575    if (oea->var) {
576       vg_assert(oea->n_fixed == 0);
577       return (OutEdge*)VG_(indexXA)(oea->var, i);
578    } else {
579       vg_assert(i < oea->n_fixed);
580       return &oea->fixed[i];
581    }
582 }
583 
584 static
OutEdgeArr__deleteIndex(OutEdgeArr * oea,UWord i)585 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
586 {
587    if (oea->var) {
588       vg_assert(oea->n_fixed == 0);
589       VG_(removeIndexXA)(oea->var, i);
590    } else {
591       vg_assert(i < oea->n_fixed);
592       for (; i+1 < oea->n_fixed; i++) {
593          oea->fixed[i] = oea->fixed[i+1];
594       }
595       oea->n_fixed--;
596    }
597 }
598 
599 static
OutEdgeArr__add(OutEdgeArr * oea,OutEdge * oe)600 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
601 {
602    if (oea->var) {
603       vg_assert(oea->n_fixed == 0);
604       VG_(addToXA)(oea->var, oe);
605    } else {
606       vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
607       if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
608          /* The fixed array is full, so we have to initialise an
609             XArray and copy the fixed array into it. */
610          oea->var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
611                                ttaux_free,
612                                sizeof(OutEdge));
613          UWord i;
614          for (i = 0; i < oea->n_fixed; i++) {
615             VG_(addToXA)(oea->var, &oea->fixed[i]);
616          }
617          VG_(addToXA)(oea->var, oe);
618          oea->n_fixed = 0;
619       } else {
620          /* Just add to the fixed array. */
621          oea->fixed[oea->n_fixed++] = *oe;
622       }
623    }
624 }
625 
626 static
HostExtent__cmpOrd(const void * v1,const void * v2)627 Int HostExtent__cmpOrd ( const void* v1, const void* v2 )
628 {
629    const HostExtent* hx1 = v1;
630    const HostExtent* hx2 = v2;
631    if (hx1->start + hx1->len <= hx2->start) return -1;
632    if (hx2->start + hx2->len <= hx1->start) return 1;
633    return 0; /* partial overlap */
634 }
635 
636 /* True if hx is a dead host extent, i.e. corresponds to host code
637    of an entry that was invalidated. */
638 static
HostExtent__is_dead(const HostExtent * hx,const Sector * sec)639 Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec)
640 {
641    const UInt tteNo = hx->tteNo;
642 #define LDEBUG(m) if (DEBUG_TRANSTAB)                           \
643       VG_(printf) (m                                            \
644                    " start 0x%p len %u sector %d ttslot %u"     \
645                    " tt.entry 0x%llu tt.tcptr 0x%p\n",          \
646                    hx->start, hx->len, (int)(sec - sectors),    \
647                    hx->tteNo,                                   \
648                    sec->tt[tteNo].entry, sec->tt[tteNo].tcptr)
649 
650    /* Entry might have been invalidated and not re-used yet.*/
651    if (sec->tt[tteNo].status == Deleted) {
652       LDEBUG("found deleted entry");
653       return True;
654    }
655    /* Maybe we found this entry via a host_extents which was
656       inserted for an entry which was changed to Deleted then
657       re-used after. If this entry was re-used, then its tcptr
658       is >= to host_extents start (i.e. the previous tcptr) + len.
659       This is the case as there is no re-use of host code: a new
660       entry or re-used entry always gets "higher value" host code. */
661    if ((UChar*) sec->tt[tteNo].tcptr >= hx->start + hx->len) {
662       LDEBUG("found re-used entry");
663       return True;
664    }
665 
666    return False;
667 #undef LDEBUG
668 }
669 
670 static __attribute__((noinline))
find_TTEntry_from_hcode(UInt * from_sNo,UInt * from_tteNo,void * hcode)671 Bool find_TTEntry_from_hcode( /*OUT*/UInt* from_sNo,
672                               /*OUT*/UInt* from_tteNo,
673                               void* hcode )
674 {
675    Int i;
676 
677    /* Search order logic copied from VG_(search_transtab). */
678    for (i = 0; i < n_sectors; i++) {
679       Int sno = sector_search_order[i];
680       if (UNLIKELY(sno == -1))
681          return False; /* run out of sectors to search */
682 
683       Sector* sec = &sectors[sno];
684       XArray* /* of HostExtent */ host_extents = sec->host_extents;
685       vg_assert(host_extents);
686 
687       HostExtent key;
688       VG_(memset)(&key, 0, sizeof(key));
689       key.start = hcode;
690       key.len = 1;
691       Word firstW = -1, lastW = -1;
692       Bool found  = VG_(lookupXA_UNSAFE)(
693                        host_extents, &key, &firstW, &lastW,
694                        HostExtent__cmpOrd );
695       vg_assert(firstW == lastW); // always true, even if not found
696       if (found) {
697          HostExtent* hx = VG_(indexXA)(host_extents, firstW);
698          UInt tteNo = hx->tteNo;
699          /* Do some additional sanity checks. */
700          vg_assert(tteNo <= N_TTES_PER_SECTOR);
701 
702          /* if this hx entry corresponds to dead host code, we must
703             tell this code has not been found, as it cannot be patched. */
704          if (HostExtent__is_dead (hx, sec))
705             return False;
706 
707          vg_assert(sec->tt[tteNo].status == InUse);
708          /* Can only half check that the found TTEntry contains hcode,
709             due to not having a length value for the hcode in the
710             TTEntry. */
711          vg_assert((UChar*)sec->tt[tteNo].tcptr <= (UChar*)hcode);
712          /* Looks plausible */
713          *from_sNo   = sno;
714          *from_tteNo = (UInt)tteNo;
715          return True;
716       }
717    }
718    return False;
719 }
720 
721 
722 /* Figure out whether or not hcode is jitted code present in the main
723    code cache (but not in the no-redir cache).  Used for sanity
724    checking. */
is_in_the_main_TC(void * hcode)725 static Bool is_in_the_main_TC ( void* hcode )
726 {
727    Int i, sno;
728    for (i = 0; i < n_sectors; i++) {
729       sno = sector_search_order[i];
730       if (sno == -1)
731          break; /* run out of sectors to search */
732       if ((UChar*)hcode >= (UChar*)sectors[sno].tc
733           && (UChar*)hcode <= (UChar*)sectors[sno].tc_next
734                               + sizeof(ULong) - 1)
735          return True;
736    }
737    return False;
738 }
739 
740 
741 /* Fulfill a chaining request, and record admin info so we
742    can undo it later, if required.
743 */
VG_(tt_tc_do_chaining)744 void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
745                               UInt  to_sNo,
746                               UInt  to_tteNo,
747                               Bool  to_fastEP )
748 {
749    /* Get the CPU info established at startup. */
750    VexArch vex_arch = VexArch_INVALID;
751    VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
752 
753    // host_code is where we're patching to.  So it needs to
754    // take into account, whether we're jumping to the slow
755    // or fast entry point.  By definition, the fast entry point
756    // is exactly one event check's worth of code along from
757    // the slow (tcptr) entry point.
758    TTEntry* to_tte    = index_tte(to_sNo, to_tteNo);
759    void*    host_code = ((UChar*)to_tte->tcptr)
760                         + (to_fastEP ? LibVEX_evCheckSzB(vex_arch) : 0);
761 
762    // stay sane -- the patch point (dst) is in this sector's code cache
763    vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
764    vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
765                                    + sizeof(ULong) - 1 );
766 
767    /* Find the TTEntry for the from__ code.  This isn't simple since
768       we only know the patch address, which is going to be somewhere
769       inside the from_ block. */
770    UInt from_sNo   = (UInt)-1;
771    UInt from_tteNo = (UInt)-1;
772    Bool from_found
773       = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
774                                  from__patch_addr );
775    if (!from_found) {
776       // The from code might have been discarded due to sector re-use
777       // or marked Deleted due to translation invalidation.
778       // In such a case, don't do the chaining.
779       VG_(debugLog)(1,"transtab",
780                     "host code %p not found (discarded? sector recycled?)"
781                     " => no chaining done\n",
782                     from__patch_addr);
783       return;
784    }
785 
786    TTEntry* from_tte = index_tte(from_sNo, from_tteNo);
787 
788    /* Get VEX to do the patching itself.  We have to hand it off
789       since it is host-dependent. */
790    VexInvalRange vir
791       = LibVEX_Chain(
792            vex_arch,
793            from__patch_addr,
794            VG_(fnptr_to_fnentry)(
795               to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
796                         : &VG_(disp_cp_chain_me_to_slowEP)),
797            (void*)host_code
798         );
799    VG_(invalidate_icache)( (void*)vir.start, vir.len );
800 
801    /* Now do the tricky bit -- update the ch_succs and ch_preds info
802       for the two translations involved, so we can undo the chaining
803       later, which we will have to do if the to_ block gets removed
804       for whatever reason. */
805 
806    /* This is the new from_ -> to_ link to add. */
807    InEdge ie;
808    InEdge__init(&ie);
809    ie.from_sNo   = from_sNo;
810    ie.from_tteNo = from_tteNo;
811    ie.to_fastEP  = to_fastEP;
812    HWord from_offs = (HWord)( (UChar*)from__patch_addr
813                               - (UChar*)from_tte->tcptr );
814    vg_assert(from_offs < 100000/* let's say */);
815    ie.from_offs  = (UInt)from_offs;
816 
817    /* This is the new to_ -> from_ backlink to add. */
818    OutEdge oe;
819    OutEdge__init(&oe);
820    oe.to_sNo    = to_sNo;
821    oe.to_tteNo  = to_tteNo;
822    oe.from_offs = (UInt)from_offs;
823 
824    /* Add .. */
825    InEdgeArr__add(&to_tte->in_edges, &ie);
826    OutEdgeArr__add(&from_tte->out_edges, &oe);
827 }
828 
829 
830 /* Unchain one patch, as described by the specified InEdge.  For
831    sanity check purposes only (to check that the patched location is
832    as expected) it also requires the fast and slow entry point
833    addresses of the destination block (that is, the block that owns
834    this InEdge). */
835 __attribute__((noinline))
unchain_one(VexArch vex_arch,InEdge * ie,void * to_fastEPaddr,void * to_slowEPaddr)836 static void unchain_one ( VexArch vex_arch,
837                           InEdge* ie,
838                           void* to_fastEPaddr, void* to_slowEPaddr )
839 {
840    vg_assert(ie);
841    TTEntry* tte
842       = index_tte(ie->from_sNo, ie->from_tteNo);
843    UChar* place_to_patch
844       = ((UChar*)tte->tcptr) + ie->from_offs;
845    UChar* disp_cp_chain_me
846       = VG_(fnptr_to_fnentry)(
847            ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
848                          : &VG_(disp_cp_chain_me_to_slowEP)
849         );
850    UChar* place_to_jump_to_EXPECTED
851       = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
852 
853    // stay sane: both src and dst for this unchaining are
854    // in the main code cache
855    vg_assert( is_in_the_main_TC(place_to_patch) ); // src
856    vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
857    // dst check is ok because LibVEX_UnChain checks that
858    // place_to_jump_to_EXPECTED really is the current dst, and
859    // asserts if it isn't.
860    VexInvalRange vir
861        = LibVEX_UnChain( vex_arch, place_to_patch,
862                          place_to_jump_to_EXPECTED, disp_cp_chain_me );
863    VG_(invalidate_icache)( (void*)vir.start, vir.len );
864 }
865 
866 
867 /* The specified block is about to be deleted.  Update the preds and
868    succs of its associated blocks accordingly.  This includes undoing
869    any chained jumps to this block. */
870 static
unchain_in_preparation_for_deletion(VexArch vex_arch,UInt here_sNo,UInt here_tteNo)871 void unchain_in_preparation_for_deletion ( VexArch vex_arch,
872                                            UInt here_sNo, UInt here_tteNo )
873 {
874    if (DEBUG_TRANSTAB)
875       VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo);
876    UWord    i, j, n, m;
877    Int      evCheckSzB = LibVEX_evCheckSzB(vex_arch);
878    TTEntry* here_tte   = index_tte(here_sNo, here_tteNo);
879    if (DEBUG_TRANSTAB)
880       VG_(printf)("... QQQ tt.entry 0x%llu tt.tcptr 0x%p\n",
881                   here_tte->entry, here_tte->tcptr);
882    vg_assert(here_tte->status == InUse);
883 
884    /* Visit all InEdges owned by here_tte. */
885    n = InEdgeArr__size(&here_tte->in_edges);
886    for (i = 0; i < n; i++) {
887       InEdge* ie = InEdgeArr__index(&here_tte->in_edges, i);
888       // Undo the chaining.
889       UChar* here_slow_EP = (UChar*)here_tte->tcptr;
890       UChar* here_fast_EP = here_slow_EP + evCheckSzB;
891       unchain_one(vex_arch, ie, here_fast_EP, here_slow_EP);
892       // Find the corresponding entry in the "from" node's out_edges,
893       // and remove it.
894       TTEntry* from_tte = index_tte(ie->from_sNo, ie->from_tteNo);
895       m = OutEdgeArr__size(&from_tte->out_edges);
896       vg_assert(m > 0); // it must have at least one entry
897       for (j = 0; j < m; j++) {
898          OutEdge* oe = OutEdgeArr__index(&from_tte->out_edges, j);
899          if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
900              && oe->from_offs == ie->from_offs)
901            break;
902       }
903       vg_assert(j < m); // "oe must be findable"
904       OutEdgeArr__deleteIndex(&from_tte->out_edges, j);
905    }
906 
907    /* Visit all OutEdges owned by here_tte. */
908    n = OutEdgeArr__size(&here_tte->out_edges);
909    for (i = 0; i < n; i++) {
910       OutEdge* oe = OutEdgeArr__index(&here_tte->out_edges, i);
911       // Find the corresponding entry in the "to" node's in_edges,
912       // and remove it.
913       TTEntry* to_tte = index_tte(oe->to_sNo, oe->to_tteNo);
914       m = InEdgeArr__size(&to_tte->in_edges);
915       vg_assert(m > 0); // it must have at least one entry
916       for (j = 0; j < m; j++) {
917          InEdge* ie = InEdgeArr__index(&to_tte->in_edges, j);
918          if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
919              && ie->from_offs == oe->from_offs)
920            break;
921       }
922       vg_assert(j < m); // "ie must be findable"
923       InEdgeArr__deleteIndex(&to_tte->in_edges, j);
924    }
925 
926    InEdgeArr__makeEmpty(&here_tte->in_edges);
927    OutEdgeArr__makeEmpty(&here_tte->out_edges);
928 }
929 
930 
931 /*-------------------------------------------------------------*/
932 /*--- Address-range equivalence class stuff                 ---*/
933 /*-------------------------------------------------------------*/
934 
935 /* Return equivalence class number for a range. */
936 
range_to_eclass(Addr64 start,UInt len)937 static Int range_to_eclass ( Addr64 start, UInt len )
938 {
939    UInt mask   = (1 << ECLASS_WIDTH) - 1;
940    UInt lo     = (UInt)start;
941    UInt hi     = lo + len - 1;
942    UInt loBits = (lo >> ECLASS_SHIFT) & mask;
943    UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
944    if (loBits == hiBits) {
945       vg_assert(loBits < ECLASS_N-1);
946       return loBits;
947    } else {
948       return ECLASS_MISC;
949    }
950 }
951 
952 
953 /* Calculates the equivalence class numbers for any VexGuestExtent.
954    These are written in *eclasses, which must be big enough to hold 3
955    Ints.  The number written, between 1 and 3, is returned.  The
956    eclasses are presented in order, and any duplicates are removed.
957 */
958 
959 static
vexGuestExtents_to_eclasses(Int * eclasses,VexGuestExtents * vge)960 Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
961                                   VexGuestExtents* vge )
962 {
963 #  define SWAP(_lv1,_lv2) \
964       do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
965 
966    Int i, j, n_ec, r;
967 
968    vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
969 
970    n_ec = 0;
971    for (i = 0; i < vge->n_used; i++) {
972       r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
973       if (r == ECLASS_MISC)
974          goto bad;
975       /* only add if we haven't already seen it */
976       for (j = 0; j < n_ec; j++)
977          if (eclasses[j] == r)
978             break;
979       if (j == n_ec)
980          eclasses[n_ec++] = r;
981    }
982 
983    if (n_ec == 1)
984       return 1;
985 
986    if (n_ec == 2) {
987       /* sort */
988       if (eclasses[0] > eclasses[1])
989          SWAP(eclasses[0], eclasses[1]);
990       return 2;
991    }
992 
993    if (n_ec == 3) {
994       /* sort */
995       if (eclasses[0] > eclasses[2])
996          SWAP(eclasses[0], eclasses[2]);
997       if (eclasses[0] > eclasses[1])
998          SWAP(eclasses[0], eclasses[1]);
999       if (eclasses[1] > eclasses[2])
1000          SWAP(eclasses[1], eclasses[2]);
1001       return 3;
1002    }
1003 
1004    /* NOTREACHED */
1005    vg_assert(0);
1006 
1007   bad:
1008    eclasses[0] = ECLASS_MISC;
1009    return 1;
1010 
1011 #  undef SWAP
1012 }
1013 
1014 
1015 /* Add tteno to the set of entries listed for equivalence class ec in
1016    this sector.  Returns used location in eclass array. */
1017 
1018 static
addEClassNo(Sector * sec,Int ec,UShort tteno)1019 UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
1020 {
1021    Int    old_sz, new_sz, i, r;
1022    UShort *old_ar, *new_ar;
1023 
1024    vg_assert(ec >= 0 && ec < ECLASS_N);
1025    vg_assert(tteno < N_TTES_PER_SECTOR);
1026 
1027    if (DEBUG_TRANSTAB) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
1028 
1029    if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
1030 
1031       vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
1032 
1033       old_sz = sec->ec2tte_size[ec];
1034       old_ar = sec->ec2tte[ec];
1035       new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
1036       new_ar = ttaux_malloc("transtab.aECN.1",
1037                             new_sz * sizeof(UShort));
1038       for (i = 0; i < old_sz; i++)
1039          new_ar[i] = old_ar[i];
1040       if (old_ar)
1041          ttaux_free(old_ar);
1042       sec->ec2tte_size[ec] = new_sz;
1043       sec->ec2tte[ec] = new_ar;
1044 
1045       if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
1046    }
1047 
1048    /* Common case */
1049    r = sec->ec2tte_used[ec]++;
1050    vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
1051    sec->ec2tte[ec][r] = tteno;
1052    return (UInt)r;
1053 }
1054 
1055 
1056 /* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
1057    eclass entries to 'sec'. */
1058 
1059 static
upd_eclasses_after_add(Sector * sec,Int tteno)1060 void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
1061 {
1062    Int i, r, eclasses[3];
1063    TTEntry* tte;
1064    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1065 
1066    tte = &sec->tt[tteno];
1067    r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
1068 
1069    vg_assert(r >= 1 && r <= 3);
1070    tte->n_tte2ec = r;
1071 
1072    for (i = 0; i < r; i++) {
1073       tte->tte2ec_ec[i] = eclasses[i];
1074       tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
1075    }
1076 }
1077 
1078 
1079 /* Check the eclass info in 'sec' to ensure it is consistent.  Returns
1080    True if OK, False if something's not right.  Expensive. */
1081 
sanity_check_eclasses_in_sector(Sector * sec)1082 static Bool sanity_check_eclasses_in_sector ( Sector* sec )
1083 {
1084 #  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
1085 
1086    const HChar* whassup = NULL;
1087    Int      i, j, k, n, ec_num, ec_idx;
1088    TTEntry* tte;
1089    UShort   tteno;
1090    ULong*   tce;
1091 
1092    /* Basic checks on this sector */
1093    if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
1094       BAD("invalid sec->tt_n_inuse");
1095    tce = sec->tc_next;
1096    if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
1097       BAD("sec->tc_next points outside tc");
1098 
1099    /* For each eclass ... */
1100    for (i = 0; i < ECLASS_N; i++) {
1101       if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
1102          BAD("ec2tte_size/ec2tte mismatch(1)");
1103       if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
1104          BAD("ec2tte_size/ec2tte mismatch(2)");
1105       if (sec->ec2tte_used[i] < 0
1106           || sec->ec2tte_used[i] > sec->ec2tte_size[i])
1107          BAD("implausible ec2tte_used");
1108       if (sec->ec2tte_used[i] == 0)
1109          continue;
1110 
1111       /* For each tt reference in each eclass .. ensure the reference
1112          is to a valid tt entry, and that the entry's address ranges
1113          really include this eclass. */
1114 
1115       for (j = 0; j < sec->ec2tte_used[i]; j++) {
1116          tteno = sec->ec2tte[i][j];
1117          if (tteno == EC2TTE_DELETED)
1118             continue;
1119          if (tteno >= N_TTES_PER_SECTOR)
1120             BAD("implausible tteno");
1121          tte = &sec->tt[tteno];
1122          if (tte->status != InUse)
1123             BAD("tteno points to non-inuse tte");
1124          if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1125             BAD("tte->n_tte2ec out of range");
1126          /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
1127             must equal i.  Inspect tte's eclass info. */
1128          n = 0;
1129          for (k = 0; k < tte->n_tte2ec; k++) {
1130             if (k < tte->n_tte2ec-1
1131                 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
1132                BAD("tte->tte2ec_ec[..] out of order");
1133             ec_num = tte->tte2ec_ec[k];
1134             if (ec_num < 0 || ec_num >= ECLASS_N)
1135                BAD("tte->tte2ec_ec[..] out of range");
1136             if (ec_num != i)
1137                continue;
1138             ec_idx = tte->tte2ec_ix[k];
1139             if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
1140                BAD("tte->tte2ec_ix[..] out of range");
1141             if (ec_idx == j)
1142                n++;
1143          }
1144          if (n != 1)
1145             BAD("tteno does not point back at eclass");
1146       }
1147    }
1148 
1149    /* That establishes that for each forward pointer from TTEntrys
1150       there is a corresponding backward pointer from the eclass[]
1151       arrays.  However, it doesn't rule out the possibility of other,
1152       bogus pointers in the eclass[] arrays.  So do those similarly:
1153       scan through them and check the TTEntryies they point at point
1154       back. */
1155 
1156    for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
1157 
1158       tte = &sec->tt[i];
1159       if (tte->status == Empty || tte->status == Deleted) {
1160          if (tte->n_tte2ec != 0)
1161             BAD("tte->n_eclasses nonzero for unused tte");
1162          continue;
1163       }
1164 
1165       vg_assert(tte->status == InUse);
1166 
1167       if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1168          BAD("tte->n_eclasses out of range(2)");
1169 
1170       for (j = 0; j < tte->n_tte2ec; j++) {
1171          ec_num = tte->tte2ec_ec[j];
1172          if (ec_num < 0 || ec_num >= ECLASS_N)
1173             BAD("tte->eclass[..] out of range");
1174          ec_idx = tte->tte2ec_ix[j];
1175          if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
1176             BAD("tte->ec_idx[..] out of range(2)");
1177          if (sec->ec2tte[ec_num][ec_idx] != i)
1178             BAD("ec2tte does not point back to tte");
1179       }
1180    }
1181 
1182    return True;
1183 
1184   bad:
1185    if (whassup)
1186       VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
1187 
1188 #  if 0
1189    VG_(printf)("eclass = %d\n", i);
1190    VG_(printf)("tteno = %d\n", (Int)tteno);
1191    switch (tte->status) {
1192       case InUse:   VG_(printf)("InUse\n"); break;
1193       case Deleted: VG_(printf)("Deleted\n"); break;
1194       case Empty:   VG_(printf)("Empty\n"); break;
1195    }
1196    if (tte->status != Empty) {
1197       for (k = 0; k < tte->vge.n_used; k++)
1198          VG_(printf)("0x%llx %d\n", tte->vge.base[k],
1199                                     (Int)tte->vge.len[k]);
1200    }
1201 #  endif
1202 
1203    return False;
1204 
1205 #  undef BAD
1206 }
1207 
1208 
1209 /* Sanity check absolutely everything.  True == check passed. */
1210 
1211 /* forwards */
1212 static Bool sanity_check_redir_tt_tc ( void );
1213 
sanity_check_sector_search_order(void)1214 static Bool sanity_check_sector_search_order ( void )
1215 {
1216    Int i, j, nListed;
1217    /* assert the array is the right size */
1218    vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order)
1219                                / sizeof(sector_search_order[0])));
1220    /* Check it's of the form  valid_sector_numbers ++ [-1, -1, ..] */
1221    for (i = 0; i < n_sectors; i++) {
1222       if (sector_search_order[i] < 0 || sector_search_order[i] >= n_sectors)
1223          break;
1224    }
1225    nListed = i;
1226    for (/* */; i < n_sectors; i++) {
1227       if (sector_search_order[i] != -1)
1228          break;
1229    }
1230    if (i != n_sectors)
1231       return False;
1232    /* Check each sector number only appears once */
1233    for (i = 0; i < n_sectors; i++) {
1234       if (sector_search_order[i] == -1)
1235          continue;
1236       for (j = i+1; j < n_sectors; j++) {
1237          if (sector_search_order[j] == sector_search_order[i])
1238             return False;
1239       }
1240    }
1241    /* Check that the number of listed sectors equals the number
1242       in use, by counting nListed back down. */
1243    for (i = 0; i < n_sectors; i++) {
1244       if (sectors[i].tc != NULL)
1245          nListed--;
1246    }
1247    if (nListed != 0)
1248       return False;
1249    return True;
1250 }
1251 
sanity_check_all_sectors(void)1252 static Bool sanity_check_all_sectors ( void )
1253 {
1254    Int     sno;
1255    Bool    sane;
1256    Sector* sec;
1257    for (sno = 0; sno < n_sectors; sno++) {
1258       Int i;
1259       Int nr_not_dead_hx = 0;
1260       Int szhxa;
1261       sec = &sectors[sno];
1262       if (sec->tc == NULL)
1263          continue;
1264       sane = sanity_check_eclasses_in_sector( sec );
1265       if (!sane)
1266          return False;
1267       szhxa = VG_(sizeXA)(sec->host_extents);
1268       for (i = 0; i < szhxa; i++) {
1269          const HostExtent* hx = VG_(indexXA)(sec->host_extents, i);
1270          if (!HostExtent__is_dead (hx, sec))
1271             nr_not_dead_hx++;
1272       }
1273       if (nr_not_dead_hx != sec->tt_n_inuse) {
1274          VG_(debugLog)(0, "transtab",
1275                        "nr_not_dead_hx %d sanity fail (expected == in use %d)\n",
1276                        nr_not_dead_hx, sec->tt_n_inuse);
1277          return False;
1278       }
1279    }
1280 
1281    if ( !sanity_check_redir_tt_tc() )
1282       return False;
1283    if ( !sanity_check_sector_search_order() )
1284       return False;
1285    return True;
1286 }
1287 
1288 
1289 
1290 /*-------------------------------------------------------------*/
1291 /*--- Add/find translations                                 ---*/
1292 /*-------------------------------------------------------------*/
1293 
vge_osize(VexGuestExtents * vge)1294 static UInt vge_osize ( VexGuestExtents* vge )
1295 {
1296    UInt i, n = 0;
1297    for (i = 0; i < vge->n_used; i++)
1298       n += (UInt)vge->len[i];
1299    return n;
1300 }
1301 
isValidSector(Int sector)1302 static Bool isValidSector ( Int sector )
1303 {
1304    if (sector < 0 || sector >= n_sectors)
1305       return False;
1306    return True;
1307 }
1308 
HASH_TT(Addr64 key)1309 static inline UInt HASH_TT ( Addr64 key )
1310 {
1311    UInt kHi = (UInt)(key >> 32);
1312    UInt kLo = (UInt)key;
1313    UInt k32 = kHi ^ kLo;
1314    UInt ror = 7;
1315    if (ror > 0)
1316       k32 = (k32 >> ror) | (k32 << (32-ror));
1317    return k32 % N_TTES_PER_SECTOR;
1318 }
1319 
setFastCacheEntry(Addr64 key,ULong * tcptr)1320 static void setFastCacheEntry ( Addr64 key, ULong* tcptr )
1321 {
1322    UInt cno = (UInt)VG_TT_FAST_HASH(key);
1323    VG_(tt_fast)[cno].guest = (Addr)key;
1324    VG_(tt_fast)[cno].host  = (Addr)tcptr;
1325    n_fast_updates++;
1326    /* This shouldn't fail.  It should be assured by m_translate
1327       which should reject any attempt to make translation of code
1328       starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1329    vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
1330 }
1331 
1332 /* Invalidate the fast cache VG_(tt_fast). */
invalidateFastCache(void)1333 static void invalidateFastCache ( void )
1334 {
1335    UInt j;
1336    /* This loop is popular enough to make it worth unrolling a
1337       bit, at least on ppc32. */
1338    vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
1339    for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
1340       VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1341       VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1342       VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1343       VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1344    }
1345 
1346    vg_assert(j == VG_TT_FAST_SIZE);
1347    n_fast_flushes++;
1348 }
1349 
initialiseSector(Int sno)1350 static void initialiseSector ( Int sno )
1351 {
1352    Int     i;
1353    SysRes  sres;
1354    Sector* sec;
1355    vg_assert(isValidSector(sno));
1356 
1357    { Bool sane = sanity_check_sector_search_order();
1358      vg_assert(sane);
1359    }
1360    sec = &sectors[sno];
1361 
1362    if (sec->tc == NULL) {
1363 
1364       /* Sector has never been used before.  Need to allocate tt and
1365          tc. */
1366       vg_assert(sec->tt == NULL);
1367       vg_assert(sec->tc_next == NULL);
1368       vg_assert(sec->tt_n_inuse == 0);
1369       for (i = 0; i < ECLASS_N; i++) {
1370          vg_assert(sec->ec2tte_size[i] == 0);
1371          vg_assert(sec->ec2tte_used[i] == 0);
1372          vg_assert(sec->ec2tte[i] == NULL);
1373       }
1374       vg_assert(sec->host_extents == NULL);
1375 
1376       VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
1377       if (VG_(clo_stats))
1378          VG_(dmsg)("transtab: " "allocate sector %d\n", sno);
1379 
1380       sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
1381       if (sr_isError(sres)) {
1382          VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
1383                                      8 * tc_sector_szQ );
1384 	 /*NOTREACHED*/
1385       }
1386       sec->tc = (ULong*)(AddrH)sr_Res(sres);
1387 
1388       sres = VG_(am_mmap_anon_float_valgrind)
1389                 ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
1390       if (sr_isError(sres)) {
1391          VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
1392                                      N_TTES_PER_SECTOR * sizeof(TTEntry) );
1393 	 /*NOTREACHED*/
1394       }
1395       sec->tt = (TTEntry*)(AddrH)sr_Res(sres);
1396 
1397       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1398          sec->tt[i].status   = Empty;
1399          sec->tt[i].n_tte2ec = 0;
1400       }
1401 
1402       /* Set up the host_extents array. */
1403       sec->host_extents
1404          = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
1405                       ttaux_free,
1406                       sizeof(HostExtent));
1407 
1408       /* Add an entry in the sector_search_order */
1409       for (i = 0; i < n_sectors; i++) {
1410          if (sector_search_order[i] == -1)
1411             break;
1412       }
1413       vg_assert(i >= 0 && i < n_sectors);
1414       sector_search_order[i] = sno;
1415 
1416       if (VG_(clo_verbosity) > 2)
1417          VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
1418 
1419    } else {
1420 
1421       /* Sector has been used before.  Dump the old contents. */
1422       VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
1423       if (VG_(clo_stats))
1424          VG_(dmsg)("transtab: " "recycle sector %d\n", sno);
1425 
1426       vg_assert(sec->tt != NULL);
1427       vg_assert(sec->tc_next != NULL);
1428       n_dump_count += sec->tt_n_inuse;
1429 
1430       VexArch vex_arch = VexArch_INVALID;
1431       VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
1432 
1433       /* Visit each just-about-to-be-abandoned translation. */
1434       if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n",
1435                                       sno);
1436       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1437          if (sec->tt[i].status == InUse) {
1438             vg_assert(sec->tt[i].n_tte2ec >= 1);
1439             vg_assert(sec->tt[i].n_tte2ec <= 3);
1440             n_dump_osize += vge_osize(&sec->tt[i].vge);
1441             /* Tell the tool too. */
1442             if (VG_(needs).superblock_discards) {
1443                VG_TDICT_CALL( tool_discard_superblock_info,
1444                               sec->tt[i].entry,
1445                               sec->tt[i].vge );
1446             }
1447             unchain_in_preparation_for_deletion(vex_arch, sno, i);
1448          } else {
1449             vg_assert(sec->tt[i].n_tte2ec == 0);
1450          }
1451          sec->tt[i].status   = Empty;
1452          sec->tt[i].n_tte2ec = 0;
1453       }
1454       if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n",
1455                                       sno);
1456 
1457       /* Free up the eclass structures. */
1458       for (i = 0; i < ECLASS_N; i++) {
1459          if (sec->ec2tte_size[i] == 0) {
1460             vg_assert(sec->ec2tte_used[i] == 0);
1461             vg_assert(sec->ec2tte[i] == NULL);
1462          } else {
1463             vg_assert(sec->ec2tte[i] != NULL);
1464             ttaux_free(sec->ec2tte[i]);
1465             sec->ec2tte[i] = NULL;
1466             sec->ec2tte_size[i] = 0;
1467             sec->ec2tte_used[i] = 0;
1468          }
1469       }
1470 
1471       /* Empty out the host extents array. */
1472       vg_assert(sec->host_extents != NULL);
1473       VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
1474       vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
1475 
1476       /* Sanity check: ensure it is already in
1477          sector_search_order[]. */
1478       for (i = 0; i < n_sectors; i++) {
1479          if (sector_search_order[i] == sno)
1480             break;
1481       }
1482       vg_assert(i >= 0 && i < n_sectors);
1483 
1484       if (VG_(clo_verbosity) > 2)
1485          VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
1486    }
1487 
1488    sec->tc_next = sec->tc;
1489    sec->tt_n_inuse = 0;
1490 
1491    invalidateFastCache();
1492 
1493    { Bool sane = sanity_check_sector_search_order();
1494      vg_assert(sane);
1495    }
1496 }
1497 
1498 
1499 /* Add a translation of vge to TT/TC.  The translation is temporarily
1500    in code[0 .. code_len-1].
1501 
1502    pre: youngest_sector points to a valid (although possibly full)
1503    sector.
1504 */
VG_(add_to_transtab)1505 void VG_(add_to_transtab)( VexGuestExtents* vge,
1506                            Addr64           entry,
1507                            AddrH            code,
1508                            UInt             code_len,
1509                            Bool             is_self_checking,
1510                            Int              offs_profInc,
1511                            UInt             n_guest_instrs,
1512                            VexArch          arch_host )
1513 {
1514    Int    tcAvailQ, reqdQ, y, i;
1515    ULong  *tcptr, *tcptr2;
1516    UChar* srcP;
1517    UChar* dstP;
1518 
1519    vg_assert(init_done);
1520    vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1521 
1522    /* 60000: should agree with N_TMPBUF in m_translate.c. */
1523    vg_assert(code_len > 0 && code_len < 60000);
1524 
1525    /* Generally stay sane */
1526    vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
1527 
1528    if (DEBUG_TRANSTAB)
1529       VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d) ...\n",
1530                   entry, code_len);
1531 
1532    n_in_count++;
1533    n_in_tsize += code_len;
1534    n_in_osize += vge_osize(vge);
1535    if (is_self_checking)
1536       n_in_sc_count++;
1537 
1538    y = youngest_sector;
1539    vg_assert(isValidSector(y));
1540 
1541    if (sectors[y].tc == NULL)
1542       initialiseSector(y);
1543 
1544    /* Try putting the translation in this sector. */
1545    reqdQ = (code_len + 7) >> 3;
1546 
1547    /* Will it fit in tc? */
1548    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1549               - ((ULong*)(sectors[y].tc_next));
1550    vg_assert(tcAvailQ >= 0);
1551    vg_assert(tcAvailQ <= tc_sector_szQ);
1552 
1553    if (tcAvailQ < reqdQ
1554        || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
1555       /* No.  So move on to the next sector.  Either it's never been
1556          used before, in which case it will get its tt/tc allocated
1557          now, or it has been used before, in which case it is set to be
1558          empty, hence throwing out the oldest sector. */
1559       vg_assert(tc_sector_szQ > 0);
1560       Int tt_loading_pct = (100 * sectors[y].tt_n_inuse)
1561                            / N_TTES_PER_SECTOR;
1562       Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ))
1563                            / tc_sector_szQ;
1564       VG_(debugLog)(1,"transtab",
1565                       "declare sector %d full "
1566                       "(TT loading %2d%%, TC loading %2d%%)\n",
1567                       y, tt_loading_pct, tc_loading_pct);
1568       if (VG_(clo_stats)) {
1569          VG_(dmsg)("transtab: "
1570                    "declare sector %d full "
1571                    "(TT loading %2d%%, TC loading %2d%%)\n",
1572                    y, tt_loading_pct, tc_loading_pct);
1573       }
1574       youngest_sector++;
1575       if (youngest_sector >= n_sectors)
1576          youngest_sector = 0;
1577       y = youngest_sector;
1578       initialiseSector(y);
1579    }
1580 
1581    /* Be sure ... */
1582    tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1583               - ((ULong*)(sectors[y].tc_next));
1584    vg_assert(tcAvailQ >= 0);
1585    vg_assert(tcAvailQ <= tc_sector_szQ);
1586    vg_assert(tcAvailQ >= reqdQ);
1587    vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
1588    vg_assert(sectors[y].tt_n_inuse >= 0);
1589 
1590    /* Copy into tc. */
1591    tcptr = sectors[y].tc_next;
1592    vg_assert(tcptr >= &sectors[y].tc[0]);
1593    vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
1594 
1595    dstP = (UChar*)tcptr;
1596    srcP = (UChar*)code;
1597    VG_(memcpy)(dstP, srcP, code_len);
1598    sectors[y].tc_next += reqdQ;
1599    sectors[y].tt_n_inuse++;
1600 
1601    /* more paranoia */
1602    tcptr2 = sectors[y].tc_next;
1603    vg_assert(tcptr2 >= &sectors[y].tc[0]);
1604    vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
1605 
1606    /* Find an empty tt slot, and use it.  There must be such a slot
1607       since tt is never allowed to get completely full. */
1608    i = HASH_TT(entry);
1609    vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
1610    while (True) {
1611       if (sectors[y].tt[i].status == Empty
1612           || sectors[y].tt[i].status == Deleted)
1613          break;
1614       i++;
1615       if (i >= N_TTES_PER_SECTOR)
1616          i = 0;
1617    }
1618 
1619    TTEntry__init(&sectors[y].tt[i]);
1620    sectors[y].tt[i].status = InUse;
1621    sectors[y].tt[i].tcptr  = tcptr;
1622    sectors[y].tt[i].count  = 0;
1623    sectors[y].tt[i].weight = n_guest_instrs == 0 ? 1 : n_guest_instrs;
1624    sectors[y].tt[i].vge    = *vge;
1625    sectors[y].tt[i].entry  = entry;
1626 
1627    /* Patch in the profile counter location, if necessary. */
1628    if (offs_profInc != -1) {
1629       vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
1630       VexInvalRange vir
1631          = LibVEX_PatchProfInc( arch_host,
1632                                 dstP + offs_profInc,
1633                                 &sectors[y].tt[i].count );
1634       VG_(invalidate_icache)( (void*)vir.start, vir.len );
1635    }
1636 
1637    VG_(invalidate_icache)( dstP, code_len );
1638 
1639    /* Add this entry to the host_extents map, checking that we're
1640       adding in order. */
1641    { HostExtent hx;
1642      hx.start = (UChar*)tcptr;
1643      hx.len   = code_len;
1644      hx.tteNo = i;
1645      vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
1646      XArray* hx_array = sectors[y].host_extents;
1647      vg_assert(hx_array);
1648      Word n = VG_(sizeXA)(hx_array);
1649      if (n > 0) {
1650         HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
1651         vg_assert(hx_prev->start + hx_prev->len <= hx.start);
1652      }
1653      VG_(addToXA)(hx_array, &hx);
1654      if (DEBUG_TRANSTAB)
1655         VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n",
1656                     hx.start, hx.len, y, i);
1657    }
1658 
1659    /* Update the fast-cache. */
1660    setFastCacheEntry( entry, tcptr );
1661 
1662    /* Note the eclass numbers for this translation. */
1663    upd_eclasses_after_add( &sectors[y], i );
1664 }
1665 
1666 
1667 /* Search for the translation of the given guest address.  If
1668    requested, a successful search can also cause the fast-caches to be
1669    updated.
1670 */
VG_(search_transtab)1671 Bool VG_(search_transtab) ( /*OUT*/AddrH* res_hcode,
1672                             /*OUT*/UInt*  res_sNo,
1673                             /*OUT*/UInt*  res_tteNo,
1674                             Addr64        guest_addr,
1675                             Bool          upd_cache )
1676 {
1677    Int i, j, k, kstart, sno;
1678 
1679    vg_assert(init_done);
1680    /* Find the initial probe point just once.  It will be the same in
1681       all sectors and avoids multiple expensive % operations. */
1682    n_full_lookups++;
1683    k      = -1;
1684    kstart = HASH_TT(guest_addr);
1685    vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
1686 
1687    /* Search in all the sectors,using sector_search_order[] as a
1688       heuristic guide as to what order to visit the sectors. */
1689    for (i = 0; i < n_sectors; i++) {
1690 
1691       sno = sector_search_order[i];
1692       if (UNLIKELY(sno == -1))
1693          return False; /* run out of sectors to search */
1694 
1695       k = kstart;
1696       for (j = 0; j < N_TTES_PER_SECTOR; j++) {
1697          n_lookup_probes++;
1698          if (sectors[sno].tt[k].status == InUse
1699              && sectors[sno].tt[k].entry == guest_addr) {
1700             /* found it */
1701             if (upd_cache)
1702                setFastCacheEntry(
1703                   guest_addr, sectors[sno].tt[k].tcptr );
1704             if (res_hcode)
1705                *res_hcode = (AddrH)sectors[sno].tt[k].tcptr;
1706             if (res_sNo)
1707                *res_sNo = sno;
1708             if (res_tteNo)
1709                *res_tteNo = k;
1710             /* pull this one one step closer to the front.  For large
1711                apps this more or less halves the number of required
1712                probes. */
1713             if (i > 0) {
1714                Int tmp = sector_search_order[i-1];
1715                sector_search_order[i-1] = sector_search_order[i];
1716                sector_search_order[i] = tmp;
1717             }
1718             return True;
1719          }
1720          if (sectors[sno].tt[k].status == Empty)
1721             break; /* not found in this sector */
1722          k++;
1723          if (k == N_TTES_PER_SECTOR)
1724             k = 0;
1725       }
1726 
1727       /* If we fall off the end, all entries are InUse and not
1728          matching, or Deleted.  In any case we did not find it in this
1729          sector. */
1730    }
1731 
1732    /* Not found in any sector. */
1733    return False;
1734 }
1735 
1736 
1737 /*-------------------------------------------------------------*/
1738 /*--- Delete translations.                                  ---*/
1739 /*-------------------------------------------------------------*/
1740 
1741 /* forward */
1742 static void unredir_discard_translations( Addr64, ULong );
1743 
1744 /* Stuff for deleting translations which intersect with a given
1745    address range.  Unfortunately, to make this run at a reasonable
1746    speed, it is complex. */
1747 
1748 static inline
overlap1(Addr64 s1,ULong r1,Addr64 s2,ULong r2)1749 Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
1750 {
1751    Addr64 e1 = s1 + r1 - 1ULL;
1752    Addr64 e2 = s2 + r2 - 1ULL;
1753    if (e1 < s2 || e2 < s1)
1754       return False;
1755    return True;
1756 }
1757 
1758 static inline
overlaps(Addr64 start,ULong range,VexGuestExtents * vge)1759 Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
1760 {
1761    if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
1762       return True;
1763    if (vge->n_used < 2)
1764       return False;
1765    if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
1766       return True;
1767    if (vge->n_used < 3)
1768       return False;
1769    if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
1770       return True;
1771    return False;
1772 }
1773 
1774 
1775 /* Delete a tt entry, and update all the eclass data accordingly. */
1776 
delete_tte(Sector * sec,UInt secNo,Int tteno,VexArch vex_arch)1777 static void delete_tte ( /*MOD*/Sector* sec, UInt secNo, Int tteno,
1778                          VexArch vex_arch )
1779 {
1780    Int      i, ec_num, ec_idx;
1781    TTEntry* tte;
1782 
1783    /* sec and secNo are mutually redundant; cross-check. */
1784    vg_assert(sec == &sectors[secNo]);
1785 
1786    vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1787    tte = &sec->tt[tteno];
1788    vg_assert(tte->status == InUse);
1789    vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1790 
1791    /* Unchain .. */
1792    unchain_in_preparation_for_deletion(vex_arch, secNo, tteno);
1793 
1794    /* Deal with the ec-to-tte links first. */
1795    for (i = 0; i < tte->n_tte2ec; i++) {
1796       ec_num = (Int)tte->tte2ec_ec[i];
1797       ec_idx = tte->tte2ec_ix[i];
1798       vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1799       vg_assert(ec_idx >= 0);
1800       vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1801       /* Assert that the two links point at each other. */
1802       vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
1803       /* "delete" the pointer back to here. */
1804       sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1805    }
1806 
1807    /* Now fix up this TTEntry. */
1808    tte->status   = Deleted;
1809    tte->n_tte2ec = 0;
1810 
1811    /* Stats .. */
1812    sec->tt_n_inuse--;
1813    n_disc_count++;
1814    n_disc_osize += vge_osize(&tte->vge);
1815 
1816    /* Tell the tool too. */
1817    if (VG_(needs).superblock_discards) {
1818       VG_TDICT_CALL( tool_discard_superblock_info,
1819                      tte->entry,
1820                      tte->vge );
1821    }
1822 }
1823 
1824 
1825 /* Delete translations from sec which intersect specified range, but
1826    only consider translations in the specified eclass. */
1827 
1828 static
delete_translations_in_sector_eclass(Sector * sec,UInt secNo,Addr64 guest_start,ULong range,Int ec,VexArch vex_arch)1829 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, UInt secNo,
1830                                             Addr64 guest_start, ULong range,
1831                                             Int ec,
1832                                             VexArch vex_arch )
1833 {
1834    Int      i;
1835    UShort   tteno;
1836    Bool     anyDeld = False;
1837    TTEntry* tte;
1838 
1839    vg_assert(ec >= 0 && ec < ECLASS_N);
1840 
1841    for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1842 
1843       tteno = sec->ec2tte[ec][i];
1844       if (tteno == EC2TTE_DELETED) {
1845          /* already deleted */
1846          continue;
1847       }
1848 
1849       vg_assert(tteno < N_TTES_PER_SECTOR);
1850 
1851       tte = &sec->tt[tteno];
1852       vg_assert(tte->status == InUse);
1853 
1854       if (overlaps( guest_start, range, &tte->vge )) {
1855          anyDeld = True;
1856          delete_tte( sec, secNo, (Int)tteno, vex_arch );
1857       }
1858 
1859    }
1860 
1861    return anyDeld;
1862 }
1863 
1864 
1865 /* Delete translations from sec which intersect specified range, the
1866    slow way, by inspecting all translations in sec. */
1867 
1868 static
delete_translations_in_sector(Sector * sec,UInt secNo,Addr64 guest_start,ULong range,VexArch vex_arch)1869 Bool delete_translations_in_sector ( /*MOD*/Sector* sec, UInt secNo,
1870                                      Addr64 guest_start, ULong range,
1871                                      VexArch vex_arch )
1872 {
1873    Int  i;
1874    Bool anyDeld = False;
1875 
1876    for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1877       if (sec->tt[i].status == InUse
1878           && overlaps( guest_start, range, &sec->tt[i].vge )) {
1879          anyDeld = True;
1880          delete_tte( sec, secNo, i, vex_arch );
1881       }
1882    }
1883 
1884    return anyDeld;
1885 }
1886 
1887 
VG_(discard_translations)1888 void VG_(discard_translations) ( Addr64 guest_start, ULong range,
1889                                  const HChar* who )
1890 {
1891    Sector* sec;
1892    Int     sno, ec;
1893    Bool    anyDeleted = False;
1894 
1895    vg_assert(init_done);
1896 
1897    VG_(debugLog)(2, "transtab",
1898                     "discard_translations(0x%llx, %lld) req by %s\n",
1899                     guest_start, range, who );
1900 
1901    /* Pre-deletion sanity check */
1902    if (VG_(clo_sanity_level >= 4)) {
1903       Bool sane = sanity_check_all_sectors();
1904       vg_assert(sane);
1905    }
1906 
1907    if (range == 0)
1908       return;
1909 
1910    VexArch vex_arch = VexArch_INVALID;
1911    VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
1912 
1913    /* There are two different ways to do this.
1914 
1915       If the range fits within a single address-range equivalence
1916       class, as will be the case for a cache line sized invalidation,
1917       then we only have to inspect the set of translations listed in
1918       that equivalence class, and also in the "sin-bin" equivalence
1919       class ECLASS_MISC.
1920 
1921       Otherwise, the invalidation is of a larger range and probably
1922       results from munmap.  In this case it's (probably!) faster just
1923       to inspect all translations, dump those we don't want, and
1924       regenerate the equivalence class information (since modifying it
1925       in-situ is even more expensive).
1926    */
1927 
1928    /* First off, figure out if the range falls within a single class,
1929       and if so which one. */
1930 
1931    ec = ECLASS_MISC;
1932    if (range < (1ULL << ECLASS_SHIFT))
1933       ec = range_to_eclass( guest_start, (UInt)range );
1934 
1935    /* if ec is ECLASS_MISC then we aren't looking at just a single
1936       class, so use the slow scheme.  Else use the fast scheme,
1937       examining 'ec' and ECLASS_MISC. */
1938 
1939    if (ec != ECLASS_MISC) {
1940 
1941       VG_(debugLog)(2, "transtab",
1942                        "                    FAST, ec = %d\n", ec);
1943 
1944       /* Fast scheme */
1945       vg_assert(ec >= 0 && ec < ECLASS_MISC);
1946 
1947       for (sno = 0; sno < n_sectors; sno++) {
1948          sec = &sectors[sno];
1949          if (sec->tc == NULL)
1950             continue;
1951          anyDeleted |= delete_translations_in_sector_eclass(
1952                           sec, sno, guest_start, range, ec,
1953                           vex_arch
1954                        );
1955          anyDeleted |= delete_translations_in_sector_eclass(
1956                           sec, sno, guest_start, range, ECLASS_MISC,
1957                           vex_arch
1958                        );
1959       }
1960 
1961    } else {
1962 
1963       /* slow scheme */
1964 
1965       VG_(debugLog)(2, "transtab",
1966                        "                    SLOW, ec = %d\n", ec);
1967 
1968       for (sno = 0; sno < n_sectors; sno++) {
1969          sec = &sectors[sno];
1970          if (sec->tc == NULL)
1971             continue;
1972          anyDeleted |= delete_translations_in_sector(
1973                           sec, sno, guest_start, range, vex_arch );
1974       }
1975 
1976    }
1977 
1978    if (anyDeleted)
1979       invalidateFastCache();
1980 
1981    /* don't forget the no-redir cache */
1982    unredir_discard_translations( guest_start, range );
1983 
1984    /* Post-deletion sanity check */
1985    if (VG_(clo_sanity_level >= 4)) {
1986       Int      i;
1987       TTEntry* tte;
1988       Bool     sane = sanity_check_all_sectors();
1989       vg_assert(sane);
1990       /* But now, also check the requested address range isn't
1991          present anywhere. */
1992       for (sno = 0; sno < n_sectors; sno++) {
1993          sec = &sectors[sno];
1994          if (sec->tc == NULL)
1995             continue;
1996          for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1997             tte = &sec->tt[i];
1998             if (tte->status != InUse)
1999                continue;
2000             vg_assert(!overlaps( guest_start, range, &tte->vge ));
2001          }
2002       }
2003    }
2004 }
2005 
2006 
2007 /*------------------------------------------------------------*/
2008 /*--- AUXILIARY: the unredirected TT/TC                    ---*/
2009 /*------------------------------------------------------------*/
2010 
2011 /* A very simple translation cache which holds a small number of
2012    unredirected translations.  This is completely independent of the
2013    main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
2014    both structures are simply dumped and we start over.
2015 
2016    Since these translations are unredirected, the search key is (by
2017    definition) the first address entry in the .vge field. */
2018 
2019 /* Sized to hold 500 translations of average size 1000 bytes. */
2020 
2021 #define UNREDIR_SZB   1000
2022 
2023 #define N_UNREDIR_TT  500
2024 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
2025 
2026 typedef
2027    struct {
2028       VexGuestExtents vge;
2029       Addr            hcode;
2030       Bool            inUse;
2031    }
2032    UTCEntry;
2033 
2034 /* We just allocate forwards in _tc, never deleting. */
2035 static ULong    *unredir_tc;
2036 static Int      unredir_tc_used = N_UNREDIR_TCQ;
2037 
2038 /* Slots in _tt can come into use and out again (.inUse).
2039    Nevertheless _tt_highwater is maintained so that invalidations
2040    don't have to scan all the slots when only a few are in use.
2041    _tt_highwater holds the index of the highest ever allocated
2042    slot. */
2043 static UTCEntry unredir_tt[N_UNREDIR_TT];
2044 static Int      unredir_tt_highwater;
2045 
2046 
init_unredir_tt_tc(void)2047 static void init_unredir_tt_tc ( void )
2048 {
2049    Int i;
2050    if (unredir_tc == NULL) {
2051       SysRes sres = VG_(am_mmap_anon_float_valgrind)
2052                        ( N_UNREDIR_TT * UNREDIR_SZB );
2053       if (sr_isError(sres)) {
2054          VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
2055                                      N_UNREDIR_TT * UNREDIR_SZB);
2056          /*NOTREACHED*/
2057       }
2058       unredir_tc = (ULong *)(AddrH)sr_Res(sres);
2059    }
2060    unredir_tc_used = 0;
2061    for (i = 0; i < N_UNREDIR_TT; i++)
2062       unredir_tt[i].inUse = False;
2063    unredir_tt_highwater = -1;
2064 }
2065 
2066 /* Do a sanity check; return False on failure. */
sanity_check_redir_tt_tc(void)2067 static Bool sanity_check_redir_tt_tc ( void )
2068 {
2069    Int i;
2070    if (unredir_tt_highwater < -1) return False;
2071    if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
2072 
2073    for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
2074       if (unredir_tt[i].inUse)
2075          return False;
2076 
2077    if (unredir_tc_used < 0) return False;
2078    if (unredir_tc_used > N_UNREDIR_TCQ) return False;
2079 
2080    return True;
2081 }
2082 
2083 
2084 /* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
2085    is temporarily in code[0 .. code_len-1].
2086 */
VG_(add_to_unredir_transtab)2087 void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
2088                                    Addr64           entry,
2089                                    AddrH            code,
2090                                    UInt             code_len )
2091 {
2092    Int   i, j, code_szQ;
2093    HChar *srcP, *dstP;
2094 
2095    vg_assert(sanity_check_redir_tt_tc());
2096 
2097    /* This is the whole point: it's not redirected! */
2098    vg_assert(entry == vge->base[0]);
2099 
2100    /* How many unredir_tt slots are needed */
2101    code_szQ = (code_len + 7) / 8;
2102 
2103    /* Look for an empty unredir_tc slot */
2104    for (i = 0; i < N_UNREDIR_TT; i++)
2105       if (!unredir_tt[i].inUse)
2106          break;
2107 
2108    if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
2109       /* It's full; dump everything we currently have */
2110       init_unredir_tt_tc();
2111       i = 0;
2112    }
2113 
2114    vg_assert(unredir_tc_used >= 0);
2115    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2116    vg_assert(code_szQ > 0);
2117    vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
2118    vg_assert(i >= 0 && i < N_UNREDIR_TT);
2119    vg_assert(unredir_tt[i].inUse == False);
2120 
2121    if (i > unredir_tt_highwater)
2122       unredir_tt_highwater = i;
2123 
2124    dstP = (HChar*)&unredir_tc[unredir_tc_used];
2125    srcP = (HChar*)code;
2126    for (j = 0; j < code_len; j++)
2127       dstP[j] = srcP[j];
2128 
2129    VG_(invalidate_icache)( dstP, code_len );
2130 
2131    unredir_tt[i].inUse = True;
2132    unredir_tt[i].vge   = *vge;
2133    unredir_tt[i].hcode = (Addr)dstP;
2134 
2135    unredir_tc_used += code_szQ;
2136    vg_assert(unredir_tc_used >= 0);
2137    vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2138 
2139    vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
2140 }
2141 
VG_(search_unredir_transtab)2142 Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
2143                                     Addr64        guest_addr )
2144 {
2145    Int i;
2146    for (i = 0; i < N_UNREDIR_TT; i++) {
2147       if (!unredir_tt[i].inUse)
2148          continue;
2149       if (unredir_tt[i].vge.base[0] == guest_addr) {
2150          *result = (AddrH)unredir_tt[i].hcode;
2151          return True;
2152       }
2153    }
2154    return False;
2155 }
2156 
unredir_discard_translations(Addr64 guest_start,ULong range)2157 static void unredir_discard_translations( Addr64 guest_start, ULong range )
2158 {
2159    Int i;
2160 
2161    vg_assert(sanity_check_redir_tt_tc());
2162 
2163    for (i = 0; i <= unredir_tt_highwater; i++) {
2164       if (unredir_tt[i].inUse
2165           && overlaps( guest_start, range, &unredir_tt[i].vge))
2166          unredir_tt[i].inUse = False;
2167    }
2168 }
2169 
2170 
2171 /*------------------------------------------------------------*/
2172 /*--- Initialisation.                                      ---*/
2173 /*------------------------------------------------------------*/
2174 
VG_(init_tt_tc)2175 void VG_(init_tt_tc) ( void )
2176 {
2177    Int i, avg_codeszQ;
2178 
2179    vg_assert(!init_done);
2180    init_done = True;
2181 
2182    /* Otherwise lots of things go wrong... */
2183    vg_assert(sizeof(ULong) == 8);
2184    vg_assert(sizeof(Addr64) == 8);
2185    /* check fast cache entries really are 2 words long */
2186    vg_assert(sizeof(Addr) == sizeof(void*));
2187    vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
2188    /* check fast cache entries are packed back-to-back with no spaces */
2189    vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
2190    /* check fast cache is aligned as we requested.  Not fatal if it
2191       isn't, but we might as well make sure. */
2192    vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
2193 
2194    if (VG_(clo_verbosity) > 2)
2195       VG_(message)(Vg_DebugMsg,
2196                    "TT/TC: VG_(init_tt_tc) "
2197                    "(startup of code management)\n");
2198 
2199    /* Figure out how big each tc area should be.  */
2200    avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
2201    tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
2202 
2203    /* Ensure the calculated value is not way crazy. */
2204    vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
2205    vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR_USABLE);
2206 
2207    n_sectors = VG_(clo_num_transtab_sectors);
2208    vg_assert(n_sectors >= MIN_N_SECTORS);
2209    vg_assert(n_sectors <= MAX_N_SECTORS);
2210 
2211    /* Initialise the sectors, even the ones we aren't going to use.
2212       Set all fields to zero. */
2213    youngest_sector = 0;
2214    for (i = 0; i < MAX_N_SECTORS; i++)
2215       VG_(memset)(&sectors[i], 0, sizeof(sectors[i]));
2216 
2217    /* Initialise the sector_search_order hint table, including the
2218       entries we aren't going to use. */
2219    for (i = 0; i < MAX_N_SECTORS; i++)
2220       sector_search_order[i] = -1;
2221 
2222    /* Initialise the fast cache. */
2223    invalidateFastCache();
2224 
2225    /* and the unredir tt/tc */
2226    init_unredir_tt_tc();
2227 
2228    if (VG_(clo_verbosity) > 2 || VG_(clo_stats)
2229        || VG_(debugLog_getLevel) () >= 2) {
2230       VG_(message)(Vg_DebugMsg,
2231          "TT/TC: cache: %d sectors of %d bytes each = %d total\n",
2232           n_sectors, 8 * tc_sector_szQ,
2233           n_sectors * 8 * tc_sector_szQ );
2234       VG_(message)(Vg_DebugMsg,
2235          "TT/TC: table: %d tables  of %d bytes each = %d total\n",
2236           n_sectors, (int)(N_TTES_PER_SECTOR * sizeof(TTEntry)),
2237           (int)(n_sectors * N_TTES_PER_SECTOR * sizeof(TTEntry)));
2238       VG_(message)(Vg_DebugMsg,
2239          "TT/TC: table: %d entries each = %d total entries"
2240                        " max occupancy %d (%d%%)\n",
2241          N_TTES_PER_SECTOR,
2242          n_sectors * N_TTES_PER_SECTOR,
2243          n_sectors * N_TTES_PER_SECTOR_USABLE,
2244          SECTOR_TT_LIMIT_PERCENT );
2245    }
2246 }
2247 
2248 
2249 /*------------------------------------------------------------*/
2250 /*--- Printing out statistics.                             ---*/
2251 /*------------------------------------------------------------*/
2252 
safe_idiv(ULong a,ULong b)2253 static ULong safe_idiv( ULong a, ULong b )
2254 {
2255    return (b == 0 ? 0 : a / b);
2256 }
2257 
VG_(get_bbs_translated)2258 UInt VG_(get_bbs_translated) ( void )
2259 {
2260    return n_in_count;
2261 }
2262 
VG_(print_tt_tc_stats)2263 void VG_(print_tt_tc_stats) ( void )
2264 {
2265    VG_(message)(Vg_DebugMsg,
2266       "    tt/tc: %'llu tt lookups requiring %'llu probes\n",
2267       n_full_lookups, n_lookup_probes );
2268    VG_(message)(Vg_DebugMsg,
2269       "    tt/tc: %'llu fast-cache updates, %'llu flushes\n",
2270       n_fast_updates, n_fast_flushes );
2271 
2272    VG_(message)(Vg_DebugMsg,
2273                 " transtab: new        %'lld "
2274                 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n",
2275                 n_in_count, n_in_osize, n_in_tsize,
2276                 safe_idiv(10*n_in_tsize, n_in_osize),
2277                 n_in_sc_count);
2278    VG_(message)(Vg_DebugMsg,
2279                 " transtab: dumped     %'llu (%'llu -> ?" "?)\n",
2280                 n_dump_count, n_dump_osize );
2281    VG_(message)(Vg_DebugMsg,
2282                 " transtab: discarded  %'llu (%'llu -> ?" "?)\n",
2283                 n_disc_count, n_disc_osize );
2284 
2285    if (DEBUG_TRANSTAB) {
2286       Int i;
2287       VG_(printf)("\n");
2288       for (i = 0; i < ECLASS_N; i++) {
2289          VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
2290          if (i % 16 == 15)
2291             VG_(printf)("\n");
2292       }
2293       VG_(printf)("\n\n");
2294    }
2295 }
2296 
2297 /*------------------------------------------------------------*/
2298 /*--- Printing out of profiling results.                   ---*/
2299 /*------------------------------------------------------------*/
2300 
score(TTEntry * tte)2301 static ULong score ( TTEntry* tte )
2302 {
2303    return ((ULong)tte->weight) * ((ULong)tte->count);
2304 }
2305 
VG_(get_SB_profile)2306 ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops )
2307 {
2308    Int   sno, i, r, s;
2309    ULong score_total;
2310 
2311    /* First, compute the total weighted count, and find the top N
2312       ttes.  tops contains pointers to the most-used n_tops blocks, in
2313       descending order (viz, tops[0] is the highest scorer). */
2314    for (i = 0; i < n_tops; i++) {
2315       tops[i].addr  = 0;
2316       tops[i].score = 0;
2317    }
2318 
2319    score_total = 0;
2320 
2321    for (sno = 0; sno < n_sectors; sno++) {
2322       if (sectors[sno].tc == NULL)
2323          continue;
2324       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2325          if (sectors[sno].tt[i].status != InUse)
2326             continue;
2327          score_total += score(&sectors[sno].tt[i]);
2328          /* Find the rank for sectors[sno].tt[i]. */
2329          r = n_tops-1;
2330          while (True) {
2331             if (r == -1)
2332                break;
2333              if (tops[r].addr == 0) {
2334                r--;
2335                continue;
2336              }
2337              if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
2338                 r--;
2339                 continue;
2340              }
2341              break;
2342          }
2343          r++;
2344          vg_assert(r >= 0 && r <= n_tops);
2345          /* This bb should be placed at r, and bbs above it shifted
2346             upwards one slot. */
2347          if (r < n_tops) {
2348             for (s = n_tops-1; s > r; s--)
2349                tops[s] = tops[s-1];
2350             tops[r].addr  = sectors[sno].tt[i].entry;
2351             tops[r].score = score( &sectors[sno].tt[i] );
2352          }
2353       }
2354    }
2355 
2356    /* Now zero out all the counter fields, so that we can make
2357       multiple calls here and just get the values since the last call,
2358       each time, rather than values accumulated for the whole run. */
2359    for (sno = 0; sno < n_sectors; sno++) {
2360       if (sectors[sno].tc == NULL)
2361          continue;
2362       for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2363          if (sectors[sno].tt[i].status != InUse)
2364             continue;
2365          sectors[sno].tt[i].count = 0;
2366       }
2367    }
2368 
2369    return score_total;
2370 }
2371 
2372 /*--------------------------------------------------------------------*/
2373 /*--- end                                                          ---*/
2374 /*--------------------------------------------------------------------*/
2375