1
2 /*--------------------------------------------------------------------*/
3 /*--- Management of the translation table and cache. ---*/
4 /*--- m_transtab.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2000-2012 Julian Seward
12 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30 */
31
32 #include "pub_core_basics.h"
33 #include "pub_core_debuglog.h"
34 #include "pub_core_machine.h" // For VG_(machine_get_VexArchInfo)
35 #include "pub_core_libcbase.h"
36 #include "pub_core_vki.h" // to keep pub_core_libproc.h happy, sigh
37 #include "pub_core_libcproc.h" // VG_(invalidate_icache)
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcprint.h"
40 #include "pub_core_options.h"
41 #include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB
42 #include "pub_core_transtab.h"
43 #include "pub_core_aspacemgr.h"
44 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
45 #include "pub_core_xarray.h"
46 #include "pub_core_dispatch.h" // For VG_(disp_cp*) addresses
47
48
49 /* #define DEBUG_TRANSTAB */
50
51
52 /*-------------------------------------------------------------*/
53 /*--- Management of the FIFO-based translation table+cache. ---*/
54 /*-------------------------------------------------------------*/
55
56 /*------------------ CONSTANTS ------------------*/
57
58 /* Number of sectors the TC is divided into. If you need a larger
59 overall translation cache, increase this value. */
60 #define N_SECTORS 8
61
62 /* Number of TC entries in each sector. This needs to be a prime
63 number to work properly, it must be <= 65535 (so that a TT index
64 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
65 'deleted') and it is strongly recommended not to change this.
66 65521 is the largest prime <= 65535. */
67 #define N_TTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
68
69 /* Because each sector contains a hash table of TTEntries, we need to
70 specify the maximum allowable loading, after which the sector is
71 deemed full. */
72 #define SECTOR_TT_LIMIT_PERCENT 65
73
74 /* The sector is deemed full when this many entries are in it. */
75 #define N_TTES_PER_SECTOR_USABLE \
76 ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
77
78 /* Equivalence classes for fast address range deletion. There are 1 +
79 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an
80 address range which does not fall cleanly within any specific bin.
81 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
82 #define ECLASS_SHIFT 11
83 #define ECLASS_WIDTH 8
84 #define ECLASS_MISC (1 << ECLASS_WIDTH)
85 #define ECLASS_N (1 + ECLASS_MISC)
86
87 #define EC2TTE_DELETED 0xFFFF /* 16-bit special value */
88
89
90 /*------------------ TYPES ------------------*/
91
92 /* In edges ("to-me") in the graph created by chaining. */
93 typedef
94 struct {
95 UInt from_sNo; /* sector number */
96 UInt from_tteNo; /* TTE number in given sector */
97 UInt from_offs; /* code offset from TCEntry::tcptr where the patch is */
98 Bool to_fastEP; /* Is the patch to a fast or slow entry point? */
99 }
100 InEdge;
101
102
103 /* Out edges ("from-me") in the graph created by chaining. */
104 typedef
105 struct {
106 UInt to_sNo; /* sector number */
107 UInt to_tteNo; /* TTE number in given sector */
108 UInt from_offs; /* code offset in owning translation where patch is */
109 }
110 OutEdge;
111
112
113 #define N_FIXED_IN_EDGE_ARR 3
114 typedef
115 struct {
116 UInt n_fixed; /* 0 .. N_FIXED_IN_EDGE_ARR */
117 InEdge fixed[N_FIXED_IN_EDGE_ARR];
118 XArray* var; /* XArray* of InEdgeArr */
119 }
120 InEdgeArr;
121
122 #define N_FIXED_OUT_EDGE_ARR 2
123 typedef
124 struct {
125 UInt n_fixed; /* 0 .. N_FIXED_OUT_EDGE_ARR */
126 OutEdge fixed[N_FIXED_OUT_EDGE_ARR];
127 XArray* var; /* XArray* of OutEdgeArr */
128 }
129 OutEdgeArr;
130
131
132 /* A translation-table entry. This indicates precisely which areas of
133 guest code are included in the translation, and contains all other
134 auxiliary info too. */
135 typedef
136 struct {
137 /* Profiling only: the count and weight (arbitrary meaning) for
138 this translation. Weight is a property of the translation
139 itself and computed once when the translation is created.
140 Count is an entry count for the translation and is
141 incremented by 1 every time the translation is used, if we
142 are profiling. */
143 ULong count;
144 UShort weight;
145
146 /* Status of the slot. Note, we need to be able to do lazy
147 deletion, hence the Deleted state. */
148 enum { InUse, Deleted, Empty } status;
149
150 /* 64-bit aligned pointer to one or more 64-bit words containing
151 the corresponding host code (must be in the same sector!)
152 This is a pointer into the sector's tc (code) area. */
153 ULong* tcptr;
154
155 /* This is the original guest address that purportedly is the
156 entry point of the translation. You might think that .entry
157 should be the same as .vge->base[0], and most of the time it
158 is. However, when doing redirections, that is not the case.
159 .vge must always correctly describe the guest code sections
160 from which this translation was made. However, .entry may or
161 may not be a lie, depending on whether or not we're doing
162 redirection. */
163 Addr64 entry;
164
165 /* This structure describes precisely what ranges of guest code
166 the translation covers, so we can decide whether or not to
167 delete it when translations of a given address range are
168 invalidated. */
169 VexGuestExtents vge;
170
171 /* Address range summary info: these are pointers back to
172 eclass[] entries in the containing Sector. Those entries in
173 turn point back here -- the two structures are mutually
174 redundant but both necessary to make fast deletions work.
175 The eclass info is similar to, and derived from, this entry's
176 'vge' field, but it is not the same */
177 UShort n_tte2ec; // # tte2ec pointers (1 to 3)
178 UShort tte2ec_ec[3]; // for each, the eclass #
179 UInt tte2ec_ix[3]; // and the index within the eclass.
180 // for i in 0 .. n_tte2ec-1
181 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
182 // should be the index
183 // of this TTEntry in the containing Sector's tt array.
184
185 /* Admin information for chaining. 'in_edges' is a set of the
186 patch points which jump to this translation -- hence are
187 predecessors in the control flow graph. 'out_edges' points
188 to successors in the control flow graph -- translations to
189 which this one has a patched jump. In short these are just
190 backwards and forwards edges in the graph of patched-together
191 blocks. The 'in_edges' contain slightly more info, enough
192 that we can undo the chaining of each mentioned patch point.
193 The 'out_edges' list exists only so that we can visit the
194 'in_edges' entries of all blocks we're patched through to, in
195 order to remove ourselves from then when we're deleted. */
196
197 /* A translation can disappear for two reasons:
198 1. erased (as part of the oldest sector cleanup) when the
199 youngest sector is full.
200 2. discarded due to calls to VG_(discard_translations).
201 VG_(discard_translations) sets the status of the
202 translation to 'Deleted'.
203 A.o., the gdbserver discards one or more translations
204 when a breakpoint is inserted or removed at an Addr,
205 or when single stepping mode is enabled/disabled
206 or when a translation is instrumented for gdbserver
207 (all the target jumps of this translation are
208 invalidated).
209
210 So, it is possible that the translation A to be patched
211 (to obtain a patched jump from A to B) is invalidated
212 after B is translated and before A is patched.
213 In case a translation is erased or discarded, the patching
214 cannot be done. VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
215 are checking the 'from' translation still exists before
216 doing the patching.
217
218 Is it safe to erase or discard the current translation E being
219 executed ? Amazing, but yes, it is safe.
220 Here is the explanation:
221
222 The translation E being executed can only be erased if a new
223 translation N is being done. A new translation is done only
224 if the host addr is a not yet patched jump to another
225 translation. In such a case, the guest address of N is
226 assigned to the PC in the VEX state. Control is returned
227 to the scheduler. N will be translated. This can erase the
228 translation E (in case of sector full). VG_(tt_tc_do_chaining)
229 will not do the chaining to a non found translation E.
230 The execution will continue at the current guest PC
231 (i.e. the translation N).
232 => it is safe to erase the current translation being executed.
233
234 The current translation E being executed can also be discarded
235 (e.g. by gdbserver). VG_(discard_translations) will mark
236 this translation E as Deleted, but the translation itself
237 is not erased. In particular, its host code can only
238 be overwritten or erased in case a new translation is done.
239 A new translation will only be done if a not yet translated
240 jump is to be executed. The execution of the Deleted translation
241 E will continue till a non patched jump is encountered.
242 This situation is then similar to the 'erasing' case above :
243 the current translation E can be erased or overwritten, as the
244 execution will continue at the new translation N.
245
246 */
247
248 /* It is possible, although very unlikely, that a block A has
249 more than one patched jump to block B. This could happen if
250 (eg) A finishes "jcond B; jmp B".
251
252 This means in turn that B's in_edges set can list A more than
253 once (twice in this example). However, each such entry must
254 have a different from_offs, since a patched jump can only
255 jump to one place at once (it's meaningless for it to have
256 multiple destinations.) IOW, the successor and predecessor
257 edges in the graph are not uniquely determined by a
258 TTEntry --> TTEntry pair, but rather by a
259 (TTEntry,offset) --> TTEntry triple.
260
261 If A has multiple edges to B then B will mention A multiple
262 times in its in_edges. To make things simpler, we then
263 require that A mentions B exactly the same number of times in
264 its out_edges. Furthermore, a matching out-in pair must have
265 the same offset (from_offs). This facilitates sanity
266 checking, and it facilitates establishing the invariant that
267 a out_edges set may not have duplicates when using the
268 equality defined by (TTEntry,offset). Hence the out_edges
269 and in_edges sets really do have both have set semantics.
270
271 eg if A has been patched to B at offsets 42 and 87 (in A)
272 then A.out_edges = { (B,42), (B,87) } (in any order)
273 and B.in_edges = { (A,42), (A,87) } (in any order)
274
275 Hence for each node pair P->Q in the graph, there's a 1:1
276 mapping between P.out_edges and Q.in_edges.
277 */
278 InEdgeArr in_edges;
279 OutEdgeArr out_edges;
280 }
281 TTEntry;
282
283
284 /* A structure used for mapping host code addresses back to the
285 relevant TTEntry. Used when doing chaining, for finding the
286 TTEntry to which some arbitrary patch address belongs. */
287 typedef
288 struct {
289 UChar* start;
290 UInt len;
291 UInt tteNo;
292 }
293 HostExtent;
294
295 /* Finally, a sector itself. Each sector contains an array of
296 TCEntries, which hold code, and an array of TTEntries, containing
297 all required administrative info. Profiling is supported using the
298 TTEntry .count and .weight fields, if required.
299
300 If the sector is not in use, all three pointers are NULL and
301 tt_n_inuse is zero.
302 */
303 typedef
304 struct {
305 /* The TCEntry area. Size of this depends on the average
306 translation size. We try and size it so it becomes full
307 precisely when this sector's translation table (tt) reaches
308 its load limit (SECTOR_TT_LIMIT_PERCENT). */
309 ULong* tc;
310
311 /* The TTEntry array. This is a fixed size, always containing
312 exactly N_TTES_PER_SECTOR entries. */
313 TTEntry* tt;
314
315 /* This points to the current allocation point in tc. */
316 ULong* tc_next;
317
318 /* The count of tt entries with state InUse. */
319 Int tt_n_inuse;
320
321 /* Expandable arrays of tt indices for each of the ECLASS_N
322 address range equivalence classes. These hold indices into
323 the containing sector's tt array, which in turn should point
324 back here. */
325 Int ec2tte_size[ECLASS_N];
326 Int ec2tte_used[ECLASS_N];
327 UShort* ec2tte[ECLASS_N];
328
329 /* The host extents. The [start, +len) ranges are constructed
330 in strictly non-overlapping order, so we can binary search
331 them at any time. */
332 XArray* host_extents; /* XArray* of HostExtent */
333 }
334 Sector;
335
336
337 /*------------------ DECLS ------------------*/
338
339 /* The root data structure is an array of sectors. The index of the
340 youngest sector is recorded, and new translations are put into that
341 sector. When it fills up, we move along to the next sector and
342 start to fill that up, wrapping around at the end of the array.
343 That way, once all N_TC_SECTORS have been bought into use for the
344 first time, and are full, we then re-use the oldest sector,
345 endlessly.
346
347 When running, youngest sector should be between >= 0 and <
348 N_TC_SECTORS. The initial -1 value indicates the TT/TC system is
349 not yet initialised.
350 */
351 static Sector sectors[N_SECTORS];
352 static Int youngest_sector = -1;
353
354 /* The number of ULongs in each TCEntry area. This is computed once
355 at startup and does not change. */
356 static Int tc_sector_szQ;
357
358
359 /* A list of sector numbers, in the order which they should be
360 searched to find translations. This is an optimisation to be used
361 when searching for translations and should not affect
362 correctness. -1 denotes "no entry". */
363 static Int sector_search_order[N_SECTORS];
364
365
366 /* Fast helper for the TC. A direct-mapped cache which holds a set of
367 recently used (guest address, host address) pairs. This array is
368 referred to directly from m_dispatch/dispatch-<platform>.S.
369
370 Entries in tt_fast may refer to any valid TC entry, regardless of
371 which sector it's in. Consequently we must be very careful to
372 invalidate this cache when TC entries are changed or disappear.
373
374 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
375 pointed at to cause that cache entry to miss. This relies on the
376 assumption that no guest code actually has that address, hence a
377 value 0x1 seems good. m_translate gives the client a synthetic
378 segfault if it tries to execute at this address.
379 */
380 /*
381 typedef
382 struct {
383 Addr guest;
384 Addr host;
385 }
386 FastCacheEntry;
387 */
388 /*global*/ __attribute__((aligned(16)))
389 FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
390
391 /* Make sure we're not used before initialisation. */
392 static Bool init_done = False;
393
394
395 /*------------------ STATS DECLS ------------------*/
396
397 /* Number of fast-cache updates and flushes done. */
398 static ULong n_fast_flushes = 0;
399 static ULong n_fast_updates = 0;
400
401 /* Number of full lookups done. */
402 static ULong n_full_lookups = 0;
403 static ULong n_lookup_probes = 0;
404
405 /* Number/osize/tsize of translations entered; also the number of
406 those for which self-checking was requested. */
407 static ULong n_in_count = 0;
408 static ULong n_in_osize = 0;
409 static ULong n_in_tsize = 0;
410 static ULong n_in_sc_count = 0;
411
412 /* Number/osize of translations discarded due to lack of space. */
413 static ULong n_dump_count = 0;
414 static ULong n_dump_osize = 0;
415
416 /* Number/osize of translations discarded due to requests to do so. */
417 static ULong n_disc_count = 0;
418 static ULong n_disc_osize = 0;
419
420
421 /*-------------------------------------------------------------*/
422 /*--- Misc ---*/
423 /*-------------------------------------------------------------*/
424
ttaux_malloc(HChar * tag,SizeT n)425 static void* ttaux_malloc ( HChar* tag, SizeT n )
426 {
427 return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
428 }
429
ttaux_free(void * p)430 static void ttaux_free ( void* p )
431 {
432 VG_(arena_free)(VG_AR_TTAUX, p);
433 }
434
435
436 /*-------------------------------------------------------------*/
437 /*--- Chaining support ---*/
438 /*-------------------------------------------------------------*/
439
index_tte(UInt sNo,UInt tteNo)440 static inline TTEntry* index_tte ( UInt sNo, UInt tteNo )
441 {
442 vg_assert(sNo < N_SECTORS);
443 vg_assert(tteNo < N_TTES_PER_SECTOR);
444 Sector* s = §ors[sNo];
445 vg_assert(s->tt);
446 TTEntry* tte = &s->tt[tteNo];
447 vg_assert(tte->status == InUse);
448 return tte;
449 }
450
InEdge__init(InEdge * ie)451 static void InEdge__init ( InEdge* ie )
452 {
453 ie->from_sNo = -1; /* invalid */
454 ie->from_tteNo = 0;
455 ie->from_offs = 0;
456 ie->to_fastEP = False;
457 }
458
OutEdge__init(OutEdge * oe)459 static void OutEdge__init ( OutEdge* oe )
460 {
461 oe->to_sNo = -1; /* invalid */
462 oe->to_tteNo = 0;
463 oe->from_offs = 0;
464 }
465
TTEntry__init(TTEntry * tte)466 static void TTEntry__init ( TTEntry* tte )
467 {
468 VG_(memset)(tte, 0, sizeof(*tte));
469 }
470
InEdgeArr__size(InEdgeArr * iea)471 static UWord InEdgeArr__size ( InEdgeArr* iea )
472 {
473 if (iea->var) {
474 vg_assert(iea->n_fixed == 0);
475 return VG_(sizeXA)(iea->var);
476 } else {
477 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
478 return iea->n_fixed;
479 }
480 }
481
InEdgeArr__makeEmpty(InEdgeArr * iea)482 static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
483 {
484 if (iea->var) {
485 vg_assert(iea->n_fixed == 0);
486 VG_(deleteXA)(iea->var);
487 iea->var = NULL;
488 } else {
489 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
490 iea->n_fixed = 0;
491 }
492 }
493
494 static
InEdgeArr__index(InEdgeArr * iea,UWord i)495 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
496 {
497 if (iea->var) {
498 vg_assert(iea->n_fixed == 0);
499 return (InEdge*)VG_(indexXA)(iea->var, i);
500 } else {
501 vg_assert(i < iea->n_fixed);
502 return &iea->fixed[i];
503 }
504 }
505
506 static
InEdgeArr__deleteIndex(InEdgeArr * iea,UWord i)507 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
508 {
509 if (iea->var) {
510 vg_assert(iea->n_fixed == 0);
511 VG_(removeIndexXA)(iea->var, i);
512 } else {
513 vg_assert(i < iea->n_fixed);
514 for (; i+1 < iea->n_fixed; i++) {
515 iea->fixed[i] = iea->fixed[i+1];
516 }
517 iea->n_fixed--;
518 }
519 }
520
521 static
InEdgeArr__add(InEdgeArr * iea,InEdge * ie)522 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
523 {
524 if (iea->var) {
525 vg_assert(iea->n_fixed == 0);
526 VG_(addToXA)(iea->var, ie);
527 } else {
528 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
529 if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
530 /* The fixed array is full, so we have to initialise an
531 XArray and copy the fixed array into it. */
532 iea->var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
533 ttaux_free,
534 sizeof(InEdge));
535 UWord i;
536 for (i = 0; i < iea->n_fixed; i++) {
537 VG_(addToXA)(iea->var, &iea->fixed[i]);
538 }
539 VG_(addToXA)(iea->var, ie);
540 iea->n_fixed = 0;
541 } else {
542 /* Just add to the fixed array. */
543 iea->fixed[iea->n_fixed++] = *ie;
544 }
545 }
546 }
547
OutEdgeArr__size(OutEdgeArr * oea)548 static UWord OutEdgeArr__size ( OutEdgeArr* oea )
549 {
550 if (oea->var) {
551 vg_assert(oea->n_fixed == 0);
552 return VG_(sizeXA)(oea->var);
553 } else {
554 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
555 return oea->n_fixed;
556 }
557 }
558
OutEdgeArr__makeEmpty(OutEdgeArr * oea)559 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
560 {
561 if (oea->var) {
562 vg_assert(oea->n_fixed == 0);
563 VG_(deleteXA)(oea->var);
564 oea->var = NULL;
565 } else {
566 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
567 oea->n_fixed = 0;
568 }
569 }
570
571 static
OutEdgeArr__index(OutEdgeArr * oea,UWord i)572 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
573 {
574 if (oea->var) {
575 vg_assert(oea->n_fixed == 0);
576 return (OutEdge*)VG_(indexXA)(oea->var, i);
577 } else {
578 vg_assert(i < oea->n_fixed);
579 return &oea->fixed[i];
580 }
581 }
582
583 static
OutEdgeArr__deleteIndex(OutEdgeArr * oea,UWord i)584 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
585 {
586 if (oea->var) {
587 vg_assert(oea->n_fixed == 0);
588 VG_(removeIndexXA)(oea->var, i);
589 } else {
590 vg_assert(i < oea->n_fixed);
591 for (; i+1 < oea->n_fixed; i++) {
592 oea->fixed[i] = oea->fixed[i+1];
593 }
594 oea->n_fixed--;
595 }
596 }
597
598 static
OutEdgeArr__add(OutEdgeArr * oea,OutEdge * oe)599 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
600 {
601 if (oea->var) {
602 vg_assert(oea->n_fixed == 0);
603 VG_(addToXA)(oea->var, oe);
604 } else {
605 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
606 if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
607 /* The fixed array is full, so we have to initialise an
608 XArray and copy the fixed array into it. */
609 oea->var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
610 ttaux_free,
611 sizeof(OutEdge));
612 UWord i;
613 for (i = 0; i < oea->n_fixed; i++) {
614 VG_(addToXA)(oea->var, &oea->fixed[i]);
615 }
616 VG_(addToXA)(oea->var, oe);
617 oea->n_fixed = 0;
618 } else {
619 /* Just add to the fixed array. */
620 oea->fixed[oea->n_fixed++] = *oe;
621 }
622 }
623 }
624
625 static
HostExtent__cmpOrd(void * v1,void * v2)626 Int HostExtent__cmpOrd ( void* v1, void* v2 )
627 {
628 HostExtent* hx1 = (HostExtent*)v1;
629 HostExtent* hx2 = (HostExtent*)v2;
630 if (hx1->start + hx1->len <= hx2->start) return -1;
631 if (hx2->start + hx2->len <= hx1->start) return 1;
632 return 0; /* partial overlap */
633 }
634
635 static __attribute__((noinline))
find_TTEntry_from_hcode(UInt * from_sNo,UInt * from_tteNo,void * hcode)636 Bool find_TTEntry_from_hcode( /*OUT*/UInt* from_sNo,
637 /*OUT*/UInt* from_tteNo,
638 void* hcode )
639 {
640 Int i;
641
642 /* Search order logic copied from VG_(search_transtab). */
643 for (i = 0; i < N_SECTORS; i++) {
644 Int sno = sector_search_order[i];
645 if (UNLIKELY(sno == -1))
646 return False; /* run out of sectors to search */
647
648 Sector* sec = §ors[sno];
649 XArray* /* of HostExtent */ host_extents = sec->host_extents;
650 vg_assert(host_extents);
651
652 HostExtent key;
653 VG_(memset)(&key, 0, sizeof(key));
654 key.start = hcode;
655 key.len = 1;
656 Word firstW = -1, lastW = -1;
657 Bool found = VG_(lookupXA_UNSAFE)(
658 host_extents, &key, &firstW, &lastW,
659 (Int(*)(void*,void*))HostExtent__cmpOrd
660 );
661 vg_assert(firstW == lastW); // always true, even if not found
662 if (found) {
663 HostExtent* hx = VG_(indexXA)(host_extents, firstW);
664 UInt tteNo = hx->tteNo;
665 /* Do some additional sanity checks. */
666 vg_assert(tteNo <= N_TTES_PER_SECTOR);
667 /* Entry might have been invalidated. Consider this
668 as not found. */
669 if (sec->tt[tteNo].status == Deleted)
670 return False;
671 vg_assert(sec->tt[tteNo].status == InUse);
672 /* Can only half check that the found TTEntry contains hcode,
673 due to not having a length value for the hcode in the
674 TTEntry. */
675 vg_assert((UChar*)sec->tt[tteNo].tcptr <= (UChar*)hcode);
676 /* Looks plausible */
677 *from_sNo = sno;
678 *from_tteNo = (UInt)tteNo;
679 return True;
680 }
681 }
682 return False;
683 }
684
685
686 /* Figure out whether or not hcode is jitted code present in the main
687 code cache (but not in the no-redir cache). Used for sanity
688 checking. */
is_in_the_main_TC(void * hcode)689 static Bool is_in_the_main_TC ( void* hcode )
690 {
691 Int i, sno;
692 for (i = 0; i < N_SECTORS; i++) {
693 sno = sector_search_order[i];
694 if (sno == -1)
695 break; /* run out of sectors to search */
696 if ((UChar*)hcode >= (UChar*)sectors[sno].tc
697 && (UChar*)hcode <= (UChar*)sectors[sno].tc_next
698 + sizeof(ULong) - 1)
699 return True;
700 }
701 return False;
702 }
703
704
705 /* Fulfill a chaining request, and record admin info so we
706 can undo it later, if required.
707 */
VG_(tt_tc_do_chaining)708 void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
709 UInt to_sNo,
710 UInt to_tteNo,
711 Bool to_fastEP )
712 {
713 /* Get the CPU info established at startup. */
714 VexArch vex_arch = VexArch_INVALID;
715 VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
716
717 // host_code is where we're patching to. So it needs to
718 // take into account, whether we're jumping to the slow
719 // or fast entry point. By definition, the fast entry point
720 // is exactly one event check's worth of code along from
721 // the slow (tcptr) entry point.
722 TTEntry* to_tte = index_tte(to_sNo, to_tteNo);
723 void* host_code = ((UChar*)to_tte->tcptr)
724 + (to_fastEP ? LibVEX_evCheckSzB(vex_arch) : 0);
725
726 // stay sane -- the patch point (dst) is in this sector's code cache
727 vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
728 vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
729 + sizeof(ULong) - 1 );
730
731 /* Find the TTEntry for the from__ code. This isn't simple since
732 we only know the patch address, which is going to be somewhere
733 inside the from_ block. */
734 UInt from_sNo = (UInt)-1;
735 UInt from_tteNo = (UInt)-1;
736 Bool from_found
737 = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
738 from__patch_addr );
739 if (!from_found) {
740 // The from code might have been discarded due to sector re-use
741 // or marked Deleted due to translation invalidation.
742 // In such a case, don't do the chaining.
743 VG_(debugLog)(1,"transtab",
744 "host code %p not found (discarded? sector recycled?)"
745 " => no chaining done\n",
746 from__patch_addr);
747 return;
748 }
749
750 TTEntry* from_tte = index_tte(from_sNo, from_tteNo);
751
752 /* Get VEX to do the patching itself. We have to hand it off
753 since it is host-dependent. */
754 VexInvalRange vir
755 = LibVEX_Chain(
756 vex_arch,
757 from__patch_addr,
758 VG_(fnptr_to_fnentry)(
759 to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
760 : &VG_(disp_cp_chain_me_to_slowEP)),
761 (void*)host_code
762 );
763 VG_(invalidate_icache)( (void*)vir.start, vir.len );
764
765 /* Now do the tricky bit -- update the ch_succs and ch_preds info
766 for the two translations involved, so we can undo the chaining
767 later, which we will have to do if the to_ block gets removed
768 for whatever reason. */
769
770 /* This is the new from_ -> to_ link to add. */
771 InEdge ie;
772 InEdge__init(&ie);
773 ie.from_sNo = from_sNo;
774 ie.from_tteNo = from_tteNo;
775 ie.to_fastEP = to_fastEP;
776 HWord from_offs = (HWord)( (UChar*)from__patch_addr
777 - (UChar*)from_tte->tcptr );
778 vg_assert(from_offs < 100000/* let's say */);
779 ie.from_offs = (UInt)from_offs;
780
781 /* This is the new to_ -> from_ backlink to add. */
782 OutEdge oe;
783 OutEdge__init(&oe);
784 oe.to_sNo = to_sNo;
785 oe.to_tteNo = to_tteNo;
786 oe.from_offs = (UInt)from_offs;
787
788 /* Add .. */
789 InEdgeArr__add(&to_tte->in_edges, &ie);
790 OutEdgeArr__add(&from_tte->out_edges, &oe);
791 }
792
793
794 /* Unchain one patch, as described by the specified InEdge. For
795 sanity check purposes only (to check that the patched location is
796 as expected) it also requires the fast and slow entry point
797 addresses of the destination block (that is, the block that owns
798 this InEdge). */
799 __attribute__((noinline))
unchain_one(VexArch vex_arch,InEdge * ie,void * to_fastEPaddr,void * to_slowEPaddr)800 static void unchain_one ( VexArch vex_arch,
801 InEdge* ie,
802 void* to_fastEPaddr, void* to_slowEPaddr )
803 {
804 vg_assert(ie);
805 TTEntry* tte
806 = index_tte(ie->from_sNo, ie->from_tteNo);
807 UChar* place_to_patch
808 = ((HChar*)tte->tcptr) + ie->from_offs;
809 UChar* disp_cp_chain_me
810 = VG_(fnptr_to_fnentry)(
811 ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
812 : &VG_(disp_cp_chain_me_to_slowEP)
813 );
814 UChar* place_to_jump_to_EXPECTED
815 = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
816
817 // stay sane: both src and dst for this unchaining are
818 // in the main code cache
819 vg_assert( is_in_the_main_TC(place_to_patch) ); // src
820 vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
821 // dst check is ok because LibVEX_UnChain checks that
822 // place_to_jump_to_EXPECTED really is the current dst, and
823 // asserts if it isn't.
824 VexInvalRange vir
825 = LibVEX_UnChain( vex_arch, place_to_patch,
826 place_to_jump_to_EXPECTED, disp_cp_chain_me );
827 VG_(invalidate_icache)( (void*)vir.start, vir.len );
828 }
829
830
831 /* The specified block is about to be deleted. Update the preds and
832 succs of its associated blocks accordingly. This includes undoing
833 any chained jumps to this block. */
834 static
unchain_in_preparation_for_deletion(VexArch vex_arch,UInt here_sNo,UInt here_tteNo)835 void unchain_in_preparation_for_deletion ( VexArch vex_arch,
836 UInt here_sNo, UInt here_tteNo )
837 {
838 if (0)
839 VG_(printf)("QQQ unchain_in_prep %u.%u\n", here_sNo, here_tteNo);
840 UWord i, j, n, m;
841 Int evCheckSzB = LibVEX_evCheckSzB(vex_arch);
842 TTEntry* here_tte = index_tte(here_sNo, here_tteNo);
843 vg_assert(here_tte->status == InUse);
844
845 /* Visit all InEdges owned by here_tte. */
846 n = InEdgeArr__size(&here_tte->in_edges);
847 for (i = 0; i < n; i++) {
848 InEdge* ie = InEdgeArr__index(&here_tte->in_edges, i);
849 // Undo the chaining.
850 UChar* here_slow_EP = (UChar*)here_tte->tcptr;
851 UChar* here_fast_EP = here_slow_EP + evCheckSzB;
852 unchain_one(vex_arch, ie, here_fast_EP, here_slow_EP);
853 // Find the corresponding entry in the "from" node's out_edges,
854 // and remove it.
855 TTEntry* from_tte = index_tte(ie->from_sNo, ie->from_tteNo);
856 m = OutEdgeArr__size(&from_tte->out_edges);
857 vg_assert(m > 0); // it must have at least one entry
858 for (j = 0; j < m; j++) {
859 OutEdge* oe = OutEdgeArr__index(&from_tte->out_edges, j);
860 if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
861 && oe->from_offs == ie->from_offs)
862 break;
863 }
864 vg_assert(j < m); // "oe must be findable"
865 OutEdgeArr__deleteIndex(&from_tte->out_edges, j);
866 }
867
868 /* Visit all OutEdges owned by here_tte. */
869 n = OutEdgeArr__size(&here_tte->out_edges);
870 for (i = 0; i < n; i++) {
871 OutEdge* oe = OutEdgeArr__index(&here_tte->out_edges, i);
872 // Find the corresponding entry in the "to" node's in_edges,
873 // and remove it.
874 TTEntry* to_tte = index_tte(oe->to_sNo, oe->to_tteNo);
875 m = InEdgeArr__size(&to_tte->in_edges);
876 vg_assert(m > 0); // it must have at least one entry
877 for (j = 0; j < m; j++) {
878 InEdge* ie = InEdgeArr__index(&to_tte->in_edges, j);
879 if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
880 && ie->from_offs == oe->from_offs)
881 break;
882 }
883 vg_assert(j < m); // "ie must be findable"
884 InEdgeArr__deleteIndex(&to_tte->in_edges, j);
885 }
886
887 InEdgeArr__makeEmpty(&here_tte->in_edges);
888 OutEdgeArr__makeEmpty(&here_tte->out_edges);
889 }
890
891
892 /*-------------------------------------------------------------*/
893 /*--- Address-range equivalence class stuff ---*/
894 /*-------------------------------------------------------------*/
895
896 /* Return equivalence class number for a range. */
897
range_to_eclass(Addr64 start,UInt len)898 static Int range_to_eclass ( Addr64 start, UInt len )
899 {
900 UInt mask = (1 << ECLASS_WIDTH) - 1;
901 UInt lo = (UInt)start;
902 UInt hi = lo + len - 1;
903 UInt loBits = (lo >> ECLASS_SHIFT) & mask;
904 UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
905 if (loBits == hiBits) {
906 vg_assert(loBits < ECLASS_N-1);
907 return loBits;
908 } else {
909 return ECLASS_MISC;
910 }
911 }
912
913
914 /* Calculates the equivalence class numbers for any VexGuestExtent.
915 These are written in *eclasses, which must be big enough to hold 3
916 Ints. The number written, between 1 and 3, is returned. The
917 eclasses are presented in order, and any duplicates are removed.
918 */
919
920 static
vexGuestExtents_to_eclasses(Int * eclasses,VexGuestExtents * vge)921 Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
922 VexGuestExtents* vge )
923 {
924 # define SWAP(_lv1,_lv2) \
925 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
926
927 Int i, j, n_ec, r;
928
929 vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
930
931 n_ec = 0;
932 for (i = 0; i < vge->n_used; i++) {
933 r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
934 if (r == ECLASS_MISC)
935 goto bad;
936 /* only add if we haven't already seen it */
937 for (j = 0; j < n_ec; j++)
938 if (eclasses[j] == r)
939 break;
940 if (j == n_ec)
941 eclasses[n_ec++] = r;
942 }
943
944 if (n_ec == 1)
945 return 1;
946
947 if (n_ec == 2) {
948 /* sort */
949 if (eclasses[0] > eclasses[1])
950 SWAP(eclasses[0], eclasses[1]);
951 return 2;
952 }
953
954 if (n_ec == 3) {
955 /* sort */
956 if (eclasses[0] > eclasses[2])
957 SWAP(eclasses[0], eclasses[2]);
958 if (eclasses[0] > eclasses[1])
959 SWAP(eclasses[0], eclasses[1]);
960 if (eclasses[1] > eclasses[2])
961 SWAP(eclasses[1], eclasses[2]);
962 return 3;
963 }
964
965 /* NOTREACHED */
966 vg_assert(0);
967
968 bad:
969 eclasses[0] = ECLASS_MISC;
970 return 1;
971
972 # undef SWAP
973 }
974
975
976 /* Add tteno to the set of entries listed for equivalence class ec in
977 this sector. Returns used location in eclass array. */
978
979 static
addEClassNo(Sector * sec,Int ec,UShort tteno)980 UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
981 {
982 Int old_sz, new_sz, i, r;
983 UShort *old_ar, *new_ar;
984
985 vg_assert(ec >= 0 && ec < ECLASS_N);
986 vg_assert(tteno < N_TTES_PER_SECTOR);
987
988 if (0) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno);
989
990 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
991
992 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
993
994 old_sz = sec->ec2tte_size[ec];
995 old_ar = sec->ec2tte[ec];
996 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
997 new_ar = ttaux_malloc("transtab.aECN.1",
998 new_sz * sizeof(UShort));
999 for (i = 0; i < old_sz; i++)
1000 new_ar[i] = old_ar[i];
1001 if (old_ar)
1002 ttaux_free(old_ar);
1003 sec->ec2tte_size[ec] = new_sz;
1004 sec->ec2tte[ec] = new_ar;
1005
1006 if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
1007 }
1008
1009 /* Common case */
1010 r = sec->ec2tte_used[ec]++;
1011 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
1012 sec->ec2tte[ec][r] = tteno;
1013 return (UInt)r;
1014 }
1015
1016
1017 /* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate
1018 eclass entries to 'sec'. */
1019
1020 static
upd_eclasses_after_add(Sector * sec,Int tteno)1021 void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
1022 {
1023 Int i, r, eclasses[3];
1024 TTEntry* tte;
1025 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1026
1027 tte = &sec->tt[tteno];
1028 r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
1029
1030 vg_assert(r >= 1 && r <= 3);
1031 tte->n_tte2ec = r;
1032
1033 for (i = 0; i < r; i++) {
1034 tte->tte2ec_ec[i] = eclasses[i];
1035 tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
1036 }
1037 }
1038
1039
1040 /* Check the eclass info in 'sec' to ensure it is consistent. Returns
1041 True if OK, False if something's not right. Expensive. */
1042
sanity_check_eclasses_in_sector(Sector * sec)1043 static Bool sanity_check_eclasses_in_sector ( Sector* sec )
1044 {
1045 # define BAD(_str) do { whassup = (_str); goto bad; } while (0)
1046
1047 HChar* whassup = NULL;
1048 Int i, j, k, n, ec_num, ec_idx;
1049 TTEntry* tte;
1050 UShort tteno;
1051 ULong* tce;
1052
1053 /* Basic checks on this sector */
1054 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
1055 BAD("invalid sec->tt_n_inuse");
1056 tce = sec->tc_next;
1057 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
1058 BAD("sec->tc_next points outside tc");
1059
1060 /* For each eclass ... */
1061 for (i = 0; i < ECLASS_N; i++) {
1062 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
1063 BAD("ec2tte_size/ec2tte mismatch(1)");
1064 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
1065 BAD("ec2tte_size/ec2tte mismatch(2)");
1066 if (sec->ec2tte_used[i] < 0
1067 || sec->ec2tte_used[i] > sec->ec2tte_size[i])
1068 BAD("implausible ec2tte_used");
1069 if (sec->ec2tte_used[i] == 0)
1070 continue;
1071
1072 /* For each tt reference in each eclass .. ensure the reference
1073 is to a valid tt entry, and that the entry's address ranges
1074 really include this eclass. */
1075
1076 for (j = 0; j < sec->ec2tte_used[i]; j++) {
1077 tteno = sec->ec2tte[i][j];
1078 if (tteno == EC2TTE_DELETED)
1079 continue;
1080 if (tteno >= N_TTES_PER_SECTOR)
1081 BAD("implausible tteno");
1082 tte = &sec->tt[tteno];
1083 if (tte->status != InUse)
1084 BAD("tteno points to non-inuse tte");
1085 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1086 BAD("tte->n_tte2ec out of range");
1087 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
1088 must equal i. Inspect tte's eclass info. */
1089 n = 0;
1090 for (k = 0; k < tte->n_tte2ec; k++) {
1091 if (k < tte->n_tte2ec-1
1092 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
1093 BAD("tte->tte2ec_ec[..] out of order");
1094 ec_num = tte->tte2ec_ec[k];
1095 if (ec_num < 0 || ec_num >= ECLASS_N)
1096 BAD("tte->tte2ec_ec[..] out of range");
1097 if (ec_num != i)
1098 continue;
1099 ec_idx = tte->tte2ec_ix[k];
1100 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
1101 BAD("tte->tte2ec_ix[..] out of range");
1102 if (ec_idx == j)
1103 n++;
1104 }
1105 if (n != 1)
1106 BAD("tteno does not point back at eclass");
1107 }
1108 }
1109
1110 /* That establishes that for each forward pointer from TTEntrys
1111 there is a corresponding backward pointer from the eclass[]
1112 arrays. However, it doesn't rule out the possibility of other,
1113 bogus pointers in the eclass[] arrays. So do those similarly:
1114 scan through them and check the TTEntryies they point at point
1115 back. */
1116
1117 for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
1118
1119 tte = &sec->tt[i];
1120 if (tte->status == Empty || tte->status == Deleted) {
1121 if (tte->n_tte2ec != 0)
1122 BAD("tte->n_eclasses nonzero for unused tte");
1123 continue;
1124 }
1125
1126 vg_assert(tte->status == InUse);
1127
1128 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1129 BAD("tte->n_eclasses out of range(2)");
1130
1131 for (j = 0; j < tte->n_tte2ec; j++) {
1132 ec_num = tte->tte2ec_ec[j];
1133 if (ec_num < 0 || ec_num >= ECLASS_N)
1134 BAD("tte->eclass[..] out of range");
1135 ec_idx = tte->tte2ec_ix[j];
1136 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
1137 BAD("tte->ec_idx[..] out of range(2)");
1138 if (sec->ec2tte[ec_num][ec_idx] != i)
1139 BAD("ec2tte does not point back to tte");
1140 }
1141 }
1142
1143 return True;
1144
1145 bad:
1146 if (whassup)
1147 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
1148
1149 # if 0
1150 VG_(printf)("eclass = %d\n", i);
1151 VG_(printf)("tteno = %d\n", (Int)tteno);
1152 switch (tte->status) {
1153 case InUse: VG_(printf)("InUse\n"); break;
1154 case Deleted: VG_(printf)("Deleted\n"); break;
1155 case Empty: VG_(printf)("Empty\n"); break;
1156 }
1157 if (tte->status != Empty) {
1158 for (k = 0; k < tte->vge.n_used; k++)
1159 VG_(printf)("0x%llx %d\n", tte->vge.base[k],
1160 (Int)tte->vge.len[k]);
1161 }
1162 # endif
1163
1164 return False;
1165
1166 # undef BAD
1167 }
1168
1169
1170 /* Sanity check absolutely everything. True == check passed. */
1171
1172 /* forwards */
1173 static Bool sanity_check_redir_tt_tc ( void );
1174
sanity_check_sector_search_order(void)1175 static Bool sanity_check_sector_search_order ( void )
1176 {
1177 Int i, j, nListed;
1178 /* assert the array is the right size */
1179 vg_assert(N_SECTORS == (sizeof(sector_search_order)
1180 / sizeof(sector_search_order[0])));
1181 /* Check it's of the form valid_sector_numbers ++ [-1, -1, ..] */
1182 for (i = 0; i < N_SECTORS; i++) {
1183 if (sector_search_order[i] < 0 || sector_search_order[i] >= N_SECTORS)
1184 break;
1185 }
1186 nListed = i;
1187 for (/* */; i < N_SECTORS; i++) {
1188 if (sector_search_order[i] != -1)
1189 break;
1190 }
1191 if (i != N_SECTORS)
1192 return False;
1193 /* Check each sector number only appears once */
1194 for (i = 0; i < N_SECTORS; i++) {
1195 if (sector_search_order[i] == -1)
1196 continue;
1197 for (j = i+1; j < N_SECTORS; j++) {
1198 if (sector_search_order[j] == sector_search_order[i])
1199 return False;
1200 }
1201 }
1202 /* Check that the number of listed sectors equals the number
1203 in use, by counting nListed back down. */
1204 for (i = 0; i < N_SECTORS; i++) {
1205 if (sectors[i].tc != NULL)
1206 nListed--;
1207 }
1208 if (nListed != 0)
1209 return False;
1210 return True;
1211 }
1212
sanity_check_all_sectors(void)1213 static Bool sanity_check_all_sectors ( void )
1214 {
1215 Int sno;
1216 Bool sane;
1217 Sector* sec;
1218 for (sno = 0; sno < N_SECTORS; sno++) {
1219 sec = §ors[sno];
1220 if (sec->tc == NULL)
1221 continue;
1222 sane = sanity_check_eclasses_in_sector( sec );
1223 if (!sane)
1224 return False;
1225 }
1226 if ( !sanity_check_redir_tt_tc() )
1227 return False;
1228 if ( !sanity_check_sector_search_order() )
1229 return False;
1230 return True;
1231 }
1232
1233
1234
1235 /*-------------------------------------------------------------*/
1236 /*--- Add/find translations ---*/
1237 /*-------------------------------------------------------------*/
1238
vge_osize(VexGuestExtents * vge)1239 static UInt vge_osize ( VexGuestExtents* vge )
1240 {
1241 UInt i, n = 0;
1242 for (i = 0; i < vge->n_used; i++)
1243 n += (UInt)vge->len[i];
1244 return n;
1245 }
1246
isValidSector(Int sector)1247 static Bool isValidSector ( Int sector )
1248 {
1249 if (sector < 0 || sector >= N_SECTORS)
1250 return False;
1251 return True;
1252 }
1253
HASH_TT(Addr64 key)1254 static inline UInt HASH_TT ( Addr64 key )
1255 {
1256 UInt kHi = (UInt)(key >> 32);
1257 UInt kLo = (UInt)key;
1258 UInt k32 = kHi ^ kLo;
1259 UInt ror = 7;
1260 if (ror > 0)
1261 k32 = (k32 >> ror) | (k32 << (32-ror));
1262 return k32 % N_TTES_PER_SECTOR;
1263 }
1264
setFastCacheEntry(Addr64 key,ULong * tcptr)1265 static void setFastCacheEntry ( Addr64 key, ULong* tcptr )
1266 {
1267 UInt cno = (UInt)VG_TT_FAST_HASH(key);
1268 VG_(tt_fast)[cno].guest = (Addr)key;
1269 VG_(tt_fast)[cno].host = (Addr)tcptr;
1270 n_fast_updates++;
1271 /* This shouldn't fail. It should be assured by m_translate
1272 which should reject any attempt to make translation of code
1273 starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1274 vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
1275 }
1276
1277 /* Invalidate the fast cache VG_(tt_fast). */
invalidateFastCache(void)1278 static void invalidateFastCache ( void )
1279 {
1280 UInt j;
1281 /* This loop is popular enough to make it worth unrolling a
1282 bit, at least on ppc32. */
1283 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
1284 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
1285 VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1286 VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1287 VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1288 VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1289 }
1290
1291 vg_assert(j == VG_TT_FAST_SIZE);
1292 n_fast_flushes++;
1293 }
1294
initialiseSector(Int sno)1295 static void initialiseSector ( Int sno )
1296 {
1297 Int i;
1298 SysRes sres;
1299 Sector* sec;
1300 vg_assert(isValidSector(sno));
1301
1302 { Bool sane = sanity_check_sector_search_order();
1303 vg_assert(sane);
1304 }
1305 sec = §ors[sno];
1306
1307 if (sec->tc == NULL) {
1308
1309 /* Sector has never been used before. Need to allocate tt and
1310 tc. */
1311 vg_assert(sec->tt == NULL);
1312 vg_assert(sec->tc_next == NULL);
1313 vg_assert(sec->tt_n_inuse == 0);
1314 for (i = 0; i < ECLASS_N; i++) {
1315 vg_assert(sec->ec2tte_size[i] == 0);
1316 vg_assert(sec->ec2tte_used[i] == 0);
1317 vg_assert(sec->ec2tte[i] == NULL);
1318 }
1319 vg_assert(sec->host_extents == NULL);
1320
1321 VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
1322
1323 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
1324 if (sr_isError(sres)) {
1325 VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
1326 8 * tc_sector_szQ );
1327 /*NOTREACHED*/
1328 }
1329 sec->tc = (ULong*)(AddrH)sr_Res(sres);
1330
1331 sres = VG_(am_mmap_anon_float_valgrind)
1332 ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
1333 if (sr_isError(sres)) {
1334 VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
1335 N_TTES_PER_SECTOR * sizeof(TTEntry) );
1336 /*NOTREACHED*/
1337 }
1338 sec->tt = (TTEntry*)(AddrH)sr_Res(sres);
1339
1340 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1341 sec->tt[i].status = Empty;
1342 sec->tt[i].n_tte2ec = 0;
1343 }
1344
1345 /* Set up the host_extents array. */
1346 sec->host_extents
1347 = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
1348 ttaux_free,
1349 sizeof(HostExtent));
1350
1351 /* Add an entry in the sector_search_order */
1352 for (i = 0; i < N_SECTORS; i++) {
1353 if (sector_search_order[i] == -1)
1354 break;
1355 }
1356 vg_assert(i >= 0 && i < N_SECTORS);
1357 sector_search_order[i] = sno;
1358
1359 if (VG_(clo_verbosity) > 2)
1360 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
1361
1362 } else {
1363
1364 /* Sector has been used before. Dump the old contents. */
1365 VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
1366 vg_assert(sec->tt != NULL);
1367 vg_assert(sec->tc_next != NULL);
1368 n_dump_count += sec->tt_n_inuse;
1369
1370 VexArch vex_arch = VexArch_INVALID;
1371 VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
1372
1373 /* Visit each just-about-to-be-abandoned translation. */
1374 if (0) VG_(printf)("QQQ unlink-entire-sector: %d START\n", sno);
1375 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1376 if (sec->tt[i].status == InUse) {
1377 vg_assert(sec->tt[i].n_tte2ec >= 1);
1378 vg_assert(sec->tt[i].n_tte2ec <= 3);
1379 n_dump_osize += vge_osize(&sec->tt[i].vge);
1380 /* Tell the tool too. */
1381 if (VG_(needs).superblock_discards) {
1382 VG_TDICT_CALL( tool_discard_superblock_info,
1383 sec->tt[i].entry,
1384 sec->tt[i].vge );
1385 }
1386 unchain_in_preparation_for_deletion(vex_arch, sno, i);
1387 } else {
1388 vg_assert(sec->tt[i].n_tte2ec == 0);
1389 }
1390 sec->tt[i].status = Empty;
1391 sec->tt[i].n_tte2ec = 0;
1392 }
1393 if (0) VG_(printf)("QQQ unlink-entire-sector: %d END\n", sno);
1394
1395 /* Free up the eclass structures. */
1396 for (i = 0; i < ECLASS_N; i++) {
1397 if (sec->ec2tte_size[i] == 0) {
1398 vg_assert(sec->ec2tte_used[i] == 0);
1399 vg_assert(sec->ec2tte[i] == NULL);
1400 } else {
1401 vg_assert(sec->ec2tte[i] != NULL);
1402 ttaux_free(sec->ec2tte[i]);
1403 sec->ec2tte[i] = NULL;
1404 sec->ec2tte_size[i] = 0;
1405 sec->ec2tte_used[i] = 0;
1406 }
1407 }
1408
1409 /* Empty out the host extents array. */
1410 vg_assert(sec->host_extents != NULL);
1411 VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
1412 vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
1413
1414 /* Sanity check: ensure it is already in
1415 sector_search_order[]. */
1416 for (i = 0; i < N_SECTORS; i++) {
1417 if (sector_search_order[i] == sno)
1418 break;
1419 }
1420 vg_assert(i >= 0 && i < N_SECTORS);
1421
1422 if (VG_(clo_verbosity) > 2)
1423 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
1424 }
1425
1426 sec->tc_next = sec->tc;
1427 sec->tt_n_inuse = 0;
1428
1429 invalidateFastCache();
1430
1431 { Bool sane = sanity_check_sector_search_order();
1432 vg_assert(sane);
1433 }
1434 }
1435
1436
1437 /* Add a translation of vge to TT/TC. The translation is temporarily
1438 in code[0 .. code_len-1].
1439
1440 pre: youngest_sector points to a valid (although possibly full)
1441 sector.
1442 */
VG_(add_to_transtab)1443 void VG_(add_to_transtab)( VexGuestExtents* vge,
1444 Addr64 entry,
1445 AddrH code,
1446 UInt code_len,
1447 Bool is_self_checking,
1448 Int offs_profInc,
1449 UInt n_guest_instrs,
1450 VexArch arch_host )
1451 {
1452 Int tcAvailQ, reqdQ, y, i;
1453 ULong *tcptr, *tcptr2;
1454 UChar* srcP;
1455 UChar* dstP;
1456
1457 vg_assert(init_done);
1458 vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1459
1460 /* 60000: should agree with N_TMPBUF in m_translate.c. */
1461 vg_assert(code_len > 0 && code_len < 60000);
1462
1463 /* Generally stay sane */
1464 vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
1465
1466 if (0)
1467 VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n",
1468 entry, code_len);
1469
1470 n_in_count++;
1471 n_in_tsize += code_len;
1472 n_in_osize += vge_osize(vge);
1473 if (is_self_checking)
1474 n_in_sc_count++;
1475
1476 y = youngest_sector;
1477 vg_assert(isValidSector(y));
1478
1479 if (sectors[y].tc == NULL)
1480 initialiseSector(y);
1481
1482 /* Try putting the translation in this sector. */
1483 reqdQ = (code_len + 7) >> 3;
1484
1485 /* Will it fit in tc? */
1486 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ]))
1487 - ((ULong*)(sectors[y].tc_next));
1488 vg_assert(tcAvailQ >= 0);
1489 vg_assert(tcAvailQ <= tc_sector_szQ);
1490
1491 if (tcAvailQ < reqdQ
1492 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
1493 /* No. So move on to the next sector. Either it's never been
1494 used before, in which case it will get its tt/tc allocated
1495 now, or it has been used before, in which case it is set to be
1496 empty, hence throwing out the oldest sector. */
1497 vg_assert(tc_sector_szQ > 0);
1498 VG_(debugLog)(1,"transtab",
1499 "declare sector %d full "
1500 "(TT loading %2d%%, TC loading %2d%%)\n",
1501 y,
1502 (100 * sectors[y].tt_n_inuse)
1503 / N_TTES_PER_SECTOR,
1504 (100 * (tc_sector_szQ - tcAvailQ))
1505 / tc_sector_szQ);
1506 youngest_sector++;
1507 if (youngest_sector >= N_SECTORS)
1508 youngest_sector = 0;
1509 y = youngest_sector;
1510 initialiseSector(y);
1511 }
1512
1513 /* Be sure ... */
1514 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ]))
1515 - ((ULong*)(sectors[y].tc_next));
1516 vg_assert(tcAvailQ >= 0);
1517 vg_assert(tcAvailQ <= tc_sector_szQ);
1518 vg_assert(tcAvailQ >= reqdQ);
1519 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
1520 vg_assert(sectors[y].tt_n_inuse >= 0);
1521
1522 /* Copy into tc. */
1523 tcptr = sectors[y].tc_next;
1524 vg_assert(tcptr >= §ors[y].tc[0]);
1525 vg_assert(tcptr <= §ors[y].tc[tc_sector_szQ]);
1526
1527 dstP = (UChar*)tcptr;
1528 srcP = (UChar*)code;
1529 VG_(memcpy)(dstP, srcP, code_len);
1530 sectors[y].tc_next += reqdQ;
1531 sectors[y].tt_n_inuse++;
1532
1533 /* more paranoia */
1534 tcptr2 = sectors[y].tc_next;
1535 vg_assert(tcptr2 >= §ors[y].tc[0]);
1536 vg_assert(tcptr2 <= §ors[y].tc[tc_sector_szQ]);
1537
1538 /* Find an empty tt slot, and use it. There must be such a slot
1539 since tt is never allowed to get completely full. */
1540 i = HASH_TT(entry);
1541 vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
1542 while (True) {
1543 if (sectors[y].tt[i].status == Empty
1544 || sectors[y].tt[i].status == Deleted)
1545 break;
1546 i++;
1547 if (i >= N_TTES_PER_SECTOR)
1548 i = 0;
1549 }
1550
1551 TTEntry__init(§ors[y].tt[i]);
1552 sectors[y].tt[i].status = InUse;
1553 sectors[y].tt[i].tcptr = tcptr;
1554 sectors[y].tt[i].count = 0;
1555 sectors[y].tt[i].weight = n_guest_instrs == 0 ? 1 : n_guest_instrs;
1556 sectors[y].tt[i].vge = *vge;
1557 sectors[y].tt[i].entry = entry;
1558
1559 /* Patch in the profile counter location, if necessary. */
1560 if (offs_profInc != -1) {
1561 vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
1562 VexInvalRange vir
1563 = LibVEX_PatchProfInc( arch_host,
1564 dstP + offs_profInc,
1565 §ors[y].tt[i].count );
1566 VG_(invalidate_icache)( (void*)vir.start, vir.len );
1567 }
1568
1569 VG_(invalidate_icache)( dstP, code_len );
1570
1571 /* Add this entry to the host_extents map, checking that we're
1572 adding in order. */
1573 { HostExtent hx;
1574 hx.start = (UChar*)tcptr;
1575 hx.len = code_len;
1576 hx.tteNo = i;
1577 vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
1578 XArray* hx_array = sectors[y].host_extents;
1579 vg_assert(hx_array);
1580 Word n = VG_(sizeXA)(hx_array);
1581 if (n > 0) {
1582 HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
1583 vg_assert(hx_prev->start + hx_prev->len <= hx.start);
1584 }
1585 VG_(addToXA)(hx_array, &hx);
1586 }
1587
1588 /* Update the fast-cache. */
1589 setFastCacheEntry( entry, tcptr );
1590
1591 /* Note the eclass numbers for this translation. */
1592 upd_eclasses_after_add( §ors[y], i );
1593 }
1594
1595
1596 /* Search for the translation of the given guest address. If
1597 requested, a successful search can also cause the fast-caches to be
1598 updated.
1599 */
VG_(search_transtab)1600 Bool VG_(search_transtab) ( /*OUT*/AddrH* res_hcode,
1601 /*OUT*/UInt* res_sNo,
1602 /*OUT*/UInt* res_tteNo,
1603 Addr64 guest_addr,
1604 Bool upd_cache )
1605 {
1606 Int i, j, k, kstart, sno;
1607
1608 vg_assert(init_done);
1609 /* Find the initial probe point just once. It will be the same in
1610 all sectors and avoids multiple expensive % operations. */
1611 n_full_lookups++;
1612 k = -1;
1613 kstart = HASH_TT(guest_addr);
1614 vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
1615
1616 /* Search in all the sectors,using sector_search_order[] as a
1617 heuristic guide as to what order to visit the sectors. */
1618 for (i = 0; i < N_SECTORS; i++) {
1619
1620 sno = sector_search_order[i];
1621 if (UNLIKELY(sno == -1))
1622 return False; /* run out of sectors to search */
1623
1624 k = kstart;
1625 for (j = 0; j < N_TTES_PER_SECTOR; j++) {
1626 n_lookup_probes++;
1627 if (sectors[sno].tt[k].status == InUse
1628 && sectors[sno].tt[k].entry == guest_addr) {
1629 /* found it */
1630 if (upd_cache)
1631 setFastCacheEntry(
1632 guest_addr, sectors[sno].tt[k].tcptr );
1633 if (res_hcode)
1634 *res_hcode = (AddrH)sectors[sno].tt[k].tcptr;
1635 if (res_sNo)
1636 *res_sNo = sno;
1637 if (res_tteNo)
1638 *res_tteNo = k;
1639 /* pull this one one step closer to the front. For large
1640 apps this more or less halves the number of required
1641 probes. */
1642 if (i > 0) {
1643 Int tmp = sector_search_order[i-1];
1644 sector_search_order[i-1] = sector_search_order[i];
1645 sector_search_order[i] = tmp;
1646 }
1647 return True;
1648 }
1649 if (sectors[sno].tt[k].status == Empty)
1650 break; /* not found in this sector */
1651 k++;
1652 if (k == N_TTES_PER_SECTOR)
1653 k = 0;
1654 }
1655
1656 /* If we fall off the end, all entries are InUse and not
1657 matching, or Deleted. In any case we did not find it in this
1658 sector. */
1659 }
1660
1661 /* Not found in any sector. */
1662 return False;
1663 }
1664
1665
1666 /*-------------------------------------------------------------*/
1667 /*--- Delete translations. ---*/
1668 /*-------------------------------------------------------------*/
1669
1670 /* forward */
1671 static void unredir_discard_translations( Addr64, ULong );
1672
1673 /* Stuff for deleting translations which intersect with a given
1674 address range. Unfortunately, to make this run at a reasonable
1675 speed, it is complex. */
1676
1677 static inline
overlap1(Addr64 s1,ULong r1,Addr64 s2,ULong r2)1678 Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
1679 {
1680 Addr64 e1 = s1 + r1 - 1ULL;
1681 Addr64 e2 = s2 + r2 - 1ULL;
1682 if (e1 < s2 || e2 < s1)
1683 return False;
1684 return True;
1685 }
1686
1687 static inline
overlaps(Addr64 start,ULong range,VexGuestExtents * vge)1688 Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
1689 {
1690 if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
1691 return True;
1692 if (vge->n_used < 2)
1693 return False;
1694 if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
1695 return True;
1696 if (vge->n_used < 3)
1697 return False;
1698 if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
1699 return True;
1700 return False;
1701 }
1702
1703
1704 /* Delete a tt entry, and update all the eclass data accordingly. */
1705
delete_tte(Sector * sec,UInt secNo,Int tteno,VexArch vex_arch)1706 static void delete_tte ( /*MOD*/Sector* sec, UInt secNo, Int tteno,
1707 VexArch vex_arch )
1708 {
1709 Int i, ec_num, ec_idx;
1710 TTEntry* tte;
1711
1712 /* sec and secNo are mutually redundant; cross-check. */
1713 vg_assert(sec == §ors[secNo]);
1714
1715 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1716 tte = &sec->tt[tteno];
1717 vg_assert(tte->status == InUse);
1718 vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1719
1720 /* Unchain .. */
1721 unchain_in_preparation_for_deletion(vex_arch, secNo, tteno);
1722
1723 /* Deal with the ec-to-tte links first. */
1724 for (i = 0; i < tte->n_tte2ec; i++) {
1725 ec_num = (Int)tte->tte2ec_ec[i];
1726 ec_idx = tte->tte2ec_ix[i];
1727 vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1728 vg_assert(ec_idx >= 0);
1729 vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1730 /* Assert that the two links point at each other. */
1731 vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
1732 /* "delete" the pointer back to here. */
1733 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1734 }
1735
1736 /* Now fix up this TTEntry. */
1737 tte->status = Deleted;
1738 tte->n_tte2ec = 0;
1739
1740 /* Stats .. */
1741 sec->tt_n_inuse--;
1742 n_disc_count++;
1743 n_disc_osize += vge_osize(&tte->vge);
1744
1745 /* Tell the tool too. */
1746 if (VG_(needs).superblock_discards) {
1747 VG_TDICT_CALL( tool_discard_superblock_info,
1748 tte->entry,
1749 tte->vge );
1750 }
1751 }
1752
1753
1754 /* Delete translations from sec which intersect specified range, but
1755 only consider translations in the specified eclass. */
1756
1757 static
delete_translations_in_sector_eclass(Sector * sec,UInt secNo,Addr64 guest_start,ULong range,Int ec,VexArch vex_arch)1758 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, UInt secNo,
1759 Addr64 guest_start, ULong range,
1760 Int ec,
1761 VexArch vex_arch )
1762 {
1763 Int i;
1764 UShort tteno;
1765 Bool anyDeld = False;
1766 TTEntry* tte;
1767
1768 vg_assert(ec >= 0 && ec < ECLASS_N);
1769
1770 for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1771
1772 tteno = sec->ec2tte[ec][i];
1773 if (tteno == EC2TTE_DELETED) {
1774 /* already deleted */
1775 continue;
1776 }
1777
1778 vg_assert(tteno < N_TTES_PER_SECTOR);
1779
1780 tte = &sec->tt[tteno];
1781 vg_assert(tte->status == InUse);
1782
1783 if (overlaps( guest_start, range, &tte->vge )) {
1784 anyDeld = True;
1785 delete_tte( sec, secNo, (Int)tteno, vex_arch );
1786 }
1787
1788 }
1789
1790 return anyDeld;
1791 }
1792
1793
1794 /* Delete translations from sec which intersect specified range, the
1795 slow way, by inspecting all translations in sec. */
1796
1797 static
delete_translations_in_sector(Sector * sec,UInt secNo,Addr64 guest_start,ULong range,VexArch vex_arch)1798 Bool delete_translations_in_sector ( /*MOD*/Sector* sec, UInt secNo,
1799 Addr64 guest_start, ULong range,
1800 VexArch vex_arch )
1801 {
1802 Int i;
1803 Bool anyDeld = False;
1804
1805 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1806 if (sec->tt[i].status == InUse
1807 && overlaps( guest_start, range, &sec->tt[i].vge )) {
1808 anyDeld = True;
1809 delete_tte( sec, secNo, i, vex_arch );
1810 }
1811 }
1812
1813 return anyDeld;
1814 }
1815
1816
VG_(discard_translations)1817 void VG_(discard_translations) ( Addr64 guest_start, ULong range,
1818 HChar* who )
1819 {
1820 Sector* sec;
1821 Int sno, ec;
1822 Bool anyDeleted = False;
1823
1824 vg_assert(init_done);
1825
1826 VG_(debugLog)(2, "transtab",
1827 "discard_translations(0x%llx, %lld) req by %s\n",
1828 guest_start, range, who );
1829
1830 /* Pre-deletion sanity check */
1831 if (VG_(clo_sanity_level >= 4)) {
1832 Bool sane = sanity_check_all_sectors();
1833 vg_assert(sane);
1834 }
1835
1836 if (range == 0)
1837 return;
1838
1839 VexArch vex_arch = VexArch_INVALID;
1840 VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
1841
1842 /* There are two different ways to do this.
1843
1844 If the range fits within a single address-range equivalence
1845 class, as will be the case for a cache line sized invalidation,
1846 then we only have to inspect the set of translations listed in
1847 that equivalence class, and also in the "sin-bin" equivalence
1848 class ECLASS_MISC.
1849
1850 Otherwise, the invalidation is of a larger range and probably
1851 results from munmap. In this case it's (probably!) faster just
1852 to inspect all translations, dump those we don't want, and
1853 regenerate the equivalence class information (since modifying it
1854 in-situ is even more expensive).
1855 */
1856
1857 /* First off, figure out if the range falls within a single class,
1858 and if so which one. */
1859
1860 ec = ECLASS_MISC;
1861 if (range < (1ULL << ECLASS_SHIFT))
1862 ec = range_to_eclass( guest_start, (UInt)range );
1863
1864 /* if ec is ECLASS_MISC then we aren't looking at just a single
1865 class, so use the slow scheme. Else use the fast scheme,
1866 examining 'ec' and ECLASS_MISC. */
1867
1868 if (ec != ECLASS_MISC) {
1869
1870 VG_(debugLog)(2, "transtab",
1871 " FAST, ec = %d\n", ec);
1872
1873 /* Fast scheme */
1874 vg_assert(ec >= 0 && ec < ECLASS_MISC);
1875
1876 for (sno = 0; sno < N_SECTORS; sno++) {
1877 sec = §ors[sno];
1878 if (sec->tc == NULL)
1879 continue;
1880 anyDeleted |= delete_translations_in_sector_eclass(
1881 sec, sno, guest_start, range, ec,
1882 vex_arch
1883 );
1884 anyDeleted |= delete_translations_in_sector_eclass(
1885 sec, sno, guest_start, range, ECLASS_MISC,
1886 vex_arch
1887 );
1888 }
1889
1890 } else {
1891
1892 /* slow scheme */
1893
1894 VG_(debugLog)(2, "transtab",
1895 " SLOW, ec = %d\n", ec);
1896
1897 for (sno = 0; sno < N_SECTORS; sno++) {
1898 sec = §ors[sno];
1899 if (sec->tc == NULL)
1900 continue;
1901 anyDeleted |= delete_translations_in_sector(
1902 sec, sno, guest_start, range, vex_arch );
1903 }
1904
1905 }
1906
1907 if (anyDeleted)
1908 invalidateFastCache();
1909
1910 /* don't forget the no-redir cache */
1911 unredir_discard_translations( guest_start, range );
1912
1913 /* Post-deletion sanity check */
1914 if (VG_(clo_sanity_level >= 4)) {
1915 Int i;
1916 TTEntry* tte;
1917 Bool sane = sanity_check_all_sectors();
1918 vg_assert(sane);
1919 /* But now, also check the requested address range isn't
1920 present anywhere. */
1921 for (sno = 0; sno < N_SECTORS; sno++) {
1922 sec = §ors[sno];
1923 if (sec->tc == NULL)
1924 continue;
1925 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1926 tte = &sec->tt[i];
1927 if (tte->status != InUse)
1928 continue;
1929 vg_assert(!overlaps( guest_start, range, &tte->vge ));
1930 }
1931 }
1932 }
1933 }
1934
1935
1936 /*------------------------------------------------------------*/
1937 /*--- AUXILIARY: the unredirected TT/TC ---*/
1938 /*------------------------------------------------------------*/
1939
1940 /* A very simple translation cache which holds a small number of
1941 unredirected translations. This is completely independent of the
1942 main tt/tc structures. When unredir_tc or unredir_tt becomes full,
1943 both structures are simply dumped and we start over.
1944
1945 Since these translations are unredirected, the search key is (by
1946 definition) the first address entry in the .vge field. */
1947
1948 /* Sized to hold 500 translations of average size 1000 bytes. */
1949
1950 #define UNREDIR_SZB 1000
1951
1952 #define N_UNREDIR_TT 500
1953 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
1954
1955 typedef
1956 struct {
1957 VexGuestExtents vge;
1958 Addr hcode;
1959 Bool inUse;
1960 }
1961 UTCEntry;
1962
1963 /* We just allocate forwards in _tc, never deleting. */
1964 static ULong *unredir_tc;
1965 static Int unredir_tc_used = N_UNREDIR_TCQ;
1966
1967 /* Slots in _tt can come into use and out again (.inUse).
1968 Nevertheless _tt_highwater is maintained so that invalidations
1969 don't have to scan all the slots when only a few are in use.
1970 _tt_highwater holds the index of the highest ever allocated
1971 slot. */
1972 static UTCEntry unredir_tt[N_UNREDIR_TT];
1973 static Int unredir_tt_highwater;
1974
1975
init_unredir_tt_tc(void)1976 static void init_unredir_tt_tc ( void )
1977 {
1978 Int i;
1979 if (unredir_tc == NULL) {
1980 SysRes sres = VG_(am_mmap_anon_float_valgrind)
1981 ( N_UNREDIR_TT * UNREDIR_SZB );
1982 if (sr_isError(sres)) {
1983 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
1984 N_UNREDIR_TT * UNREDIR_SZB);
1985 /*NOTREACHED*/
1986 }
1987 unredir_tc = (ULong *)(AddrH)sr_Res(sres);
1988 }
1989 unredir_tc_used = 0;
1990 for (i = 0; i < N_UNREDIR_TT; i++)
1991 unredir_tt[i].inUse = False;
1992 unredir_tt_highwater = -1;
1993 }
1994
1995 /* Do a sanity check; return False on failure. */
sanity_check_redir_tt_tc(void)1996 static Bool sanity_check_redir_tt_tc ( void )
1997 {
1998 Int i;
1999 if (unredir_tt_highwater < -1) return False;
2000 if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
2001
2002 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
2003 if (unredir_tt[i].inUse)
2004 return False;
2005
2006 if (unredir_tc_used < 0) return False;
2007 if (unredir_tc_used > N_UNREDIR_TCQ) return False;
2008
2009 return True;
2010 }
2011
2012
2013 /* Add an UNREDIRECTED translation of vge to TT/TC. The translation
2014 is temporarily in code[0 .. code_len-1].
2015 */
VG_(add_to_unredir_transtab)2016 void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
2017 Addr64 entry,
2018 AddrH code,
2019 UInt code_len )
2020 {
2021 Int i, j, code_szQ;
2022 HChar *srcP, *dstP;
2023
2024 vg_assert(sanity_check_redir_tt_tc());
2025
2026 /* This is the whole point: it's not redirected! */
2027 vg_assert(entry == vge->base[0]);
2028
2029 /* How many unredir_tt slots are needed */
2030 code_szQ = (code_len + 7) / 8;
2031
2032 /* Look for an empty unredir_tc slot */
2033 for (i = 0; i < N_UNREDIR_TT; i++)
2034 if (!unredir_tt[i].inUse)
2035 break;
2036
2037 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
2038 /* It's full; dump everything we currently have */
2039 init_unredir_tt_tc();
2040 i = 0;
2041 }
2042
2043 vg_assert(unredir_tc_used >= 0);
2044 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2045 vg_assert(code_szQ > 0);
2046 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
2047 vg_assert(i >= 0 && i < N_UNREDIR_TT);
2048 vg_assert(unredir_tt[i].inUse == False);
2049
2050 if (i > unredir_tt_highwater)
2051 unredir_tt_highwater = i;
2052
2053 dstP = (HChar*)&unredir_tc[unredir_tc_used];
2054 srcP = (HChar*)code;
2055 for (j = 0; j < code_len; j++)
2056 dstP[j] = srcP[j];
2057
2058 VG_(invalidate_icache)( dstP, code_len );
2059
2060 unredir_tt[i].inUse = True;
2061 unredir_tt[i].vge = *vge;
2062 unredir_tt[i].hcode = (Addr)dstP;
2063
2064 unredir_tc_used += code_szQ;
2065 vg_assert(unredir_tc_used >= 0);
2066 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2067
2068 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
2069 }
2070
VG_(search_unredir_transtab)2071 Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
2072 Addr64 guest_addr )
2073 {
2074 Int i;
2075 for (i = 0; i < N_UNREDIR_TT; i++) {
2076 if (!unredir_tt[i].inUse)
2077 continue;
2078 if (unredir_tt[i].vge.base[0] == guest_addr) {
2079 *result = (AddrH)unredir_tt[i].hcode;
2080 return True;
2081 }
2082 }
2083 return False;
2084 }
2085
unredir_discard_translations(Addr64 guest_start,ULong range)2086 static void unredir_discard_translations( Addr64 guest_start, ULong range )
2087 {
2088 Int i;
2089
2090 vg_assert(sanity_check_redir_tt_tc());
2091
2092 for (i = 0; i <= unredir_tt_highwater; i++) {
2093 if (unredir_tt[i].inUse
2094 && overlaps( guest_start, range, &unredir_tt[i].vge))
2095 unredir_tt[i].inUse = False;
2096 }
2097 }
2098
2099
2100 /*------------------------------------------------------------*/
2101 /*--- Initialisation. ---*/
2102 /*------------------------------------------------------------*/
2103
VG_(init_tt_tc)2104 void VG_(init_tt_tc) ( void )
2105 {
2106 Int i, j, avg_codeszQ;
2107
2108 vg_assert(!init_done);
2109 init_done = True;
2110
2111 /* Otherwise lots of things go wrong... */
2112 vg_assert(sizeof(ULong) == 8);
2113 vg_assert(sizeof(Addr64) == 8);
2114 /* check fast cache entries really are 2 words long */
2115 vg_assert(sizeof(Addr) == sizeof(void*));
2116 vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
2117 /* check fast cache entries are packed back-to-back with no spaces */
2118 vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
2119 /* check fast cache is aligned as we requested. Not fatal if it
2120 isn't, but we might as well make sure. */
2121 vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
2122
2123 if (VG_(clo_verbosity) > 2)
2124 VG_(message)(Vg_DebugMsg,
2125 "TT/TC: VG_(init_tt_tc) "
2126 "(startup of code management)\n");
2127
2128 /* Figure out how big each tc area should be. */
2129 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8;
2130 tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
2131
2132 /* Ensure the calculated value is not way crazy. */
2133 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
2134 vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR_USABLE);
2135
2136 /* Initialise the sectors */
2137 youngest_sector = 0;
2138 for (i = 0; i < N_SECTORS; i++) {
2139 sectors[i].tc = NULL;
2140 sectors[i].tt = NULL;
2141 sectors[i].tc_next = NULL;
2142 sectors[i].tt_n_inuse = 0;
2143 for (j = 0; j < ECLASS_N; j++) {
2144 sectors[i].ec2tte_size[j] = 0;
2145 sectors[i].ec2tte_used[j] = 0;
2146 sectors[i].ec2tte[j] = NULL;
2147 }
2148 sectors[i].host_extents = NULL;
2149 }
2150
2151 /* Initialise the sector_search_order hint table. */
2152 for (i = 0; i < N_SECTORS; i++)
2153 sector_search_order[i] = -1;
2154
2155 /* Initialise the fast cache. */
2156 invalidateFastCache();
2157
2158 /* and the unredir tt/tc */
2159 init_unredir_tt_tc();
2160
2161 if (VG_(clo_verbosity) > 2) {
2162 VG_(message)(Vg_DebugMsg,
2163 "TT/TC: cache: %d sectors of %d bytes each = %d total\n",
2164 N_SECTORS, 8 * tc_sector_szQ,
2165 N_SECTORS * 8 * tc_sector_szQ );
2166 VG_(message)(Vg_DebugMsg,
2167 "TT/TC: table: %d total entries, max occupancy %d (%d%%)\n",
2168 N_SECTORS * N_TTES_PER_SECTOR,
2169 N_SECTORS * N_TTES_PER_SECTOR_USABLE,
2170 SECTOR_TT_LIMIT_PERCENT );
2171 }
2172
2173 VG_(debugLog)(2, "transtab",
2174 "cache: %d sectors of %d bytes each = %d total\n",
2175 N_SECTORS, 8 * tc_sector_szQ,
2176 N_SECTORS * 8 * tc_sector_szQ );
2177 VG_(debugLog)(2, "transtab",
2178 "table: %d total entries, max occupancy %d (%d%%)\n",
2179 N_SECTORS * N_TTES_PER_SECTOR,
2180 N_SECTORS * N_TTES_PER_SECTOR_USABLE,
2181 SECTOR_TT_LIMIT_PERCENT );
2182 }
2183
2184
2185 /*------------------------------------------------------------*/
2186 /*--- Printing out statistics. ---*/
2187 /*------------------------------------------------------------*/
2188
safe_idiv(ULong a,ULong b)2189 static ULong safe_idiv( ULong a, ULong b )
2190 {
2191 return (b == 0 ? 0 : a / b);
2192 }
2193
VG_(get_bbs_translated)2194 UInt VG_(get_bbs_translated) ( void )
2195 {
2196 return n_in_count;
2197 }
2198
VG_(print_tt_tc_stats)2199 void VG_(print_tt_tc_stats) ( void )
2200 {
2201 VG_(message)(Vg_DebugMsg,
2202 " tt/tc: %'llu tt lookups requiring %'llu probes\n",
2203 n_full_lookups, n_lookup_probes );
2204 VG_(message)(Vg_DebugMsg,
2205 " tt/tc: %'llu fast-cache updates, %'llu flushes\n",
2206 n_fast_updates, n_fast_flushes );
2207
2208 VG_(message)(Vg_DebugMsg,
2209 " transtab: new %'lld "
2210 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n",
2211 n_in_count, n_in_osize, n_in_tsize,
2212 safe_idiv(10*n_in_tsize, n_in_osize),
2213 n_in_sc_count);
2214 VG_(message)(Vg_DebugMsg,
2215 " transtab: dumped %'llu (%'llu -> ?" "?)\n",
2216 n_dump_count, n_dump_osize );
2217 VG_(message)(Vg_DebugMsg,
2218 " transtab: discarded %'llu (%'llu -> ?" "?)\n",
2219 n_disc_count, n_disc_osize );
2220
2221 if (0) {
2222 Int i;
2223 VG_(printf)("\n");
2224 for (i = 0; i < ECLASS_N; i++) {
2225 VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
2226 if (i % 16 == 15)
2227 VG_(printf)("\n");
2228 }
2229 VG_(printf)("\n\n");
2230 }
2231 }
2232
2233 /*------------------------------------------------------------*/
2234 /*--- Printing out of profiling results. ---*/
2235 /*------------------------------------------------------------*/
2236
score(TTEntry * tte)2237 static ULong score ( TTEntry* tte )
2238 {
2239 return ((ULong)tte->weight) * ((ULong)tte->count);
2240 }
2241
VG_(get_BB_profile)2242 ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops )
2243 {
2244 Int sno, i, r, s;
2245 ULong score_total;
2246
2247 /* First, compute the total weighted count, and find the top N
2248 ttes. tops contains pointers to the most-used n_tops blocks, in
2249 descending order (viz, tops[0] is the highest scorer). */
2250 for (i = 0; i < n_tops; i++) {
2251 tops[i].addr = 0;
2252 tops[i].score = 0;
2253 }
2254
2255 score_total = 0;
2256
2257 for (sno = 0; sno < N_SECTORS; sno++) {
2258 if (sectors[sno].tc == NULL)
2259 continue;
2260 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2261 if (sectors[sno].tt[i].status != InUse)
2262 continue;
2263 score_total += score(§ors[sno].tt[i]);
2264 /* Find the rank for sectors[sno].tt[i]. */
2265 r = n_tops-1;
2266 while (True) {
2267 if (r == -1)
2268 break;
2269 if (tops[r].addr == 0) {
2270 r--;
2271 continue;
2272 }
2273 if ( score(§ors[sno].tt[i]) > tops[r].score ) {
2274 r--;
2275 continue;
2276 }
2277 break;
2278 }
2279 r++;
2280 vg_assert(r >= 0 && r <= n_tops);
2281 /* This bb should be placed at r, and bbs above it shifted
2282 upwards one slot. */
2283 if (r < n_tops) {
2284 for (s = n_tops-1; s > r; s--)
2285 tops[s] = tops[s-1];
2286 tops[r].addr = sectors[sno].tt[i].entry;
2287 tops[r].score = score( §ors[sno].tt[i] );
2288 }
2289 }
2290 }
2291
2292 return score_total;
2293 }
2294
2295 /*--------------------------------------------------------------------*/
2296 /*--- end ---*/
2297 /*--------------------------------------------------------------------*/
2298