1
2 /*--------------------------------------------------------------------*/
3 /*--- Management of the translation table and cache. ---*/
4 /*--- m_transtab.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2000-2013 Julian Seward
12 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30 */
31
32 #include "pub_core_basics.h"
33 #include "pub_core_debuglog.h"
34 #include "pub_core_machine.h" // For VG_(machine_get_VexArchInfo)
35 #include "pub_core_libcbase.h"
36 #include "pub_core_vki.h" // to keep pub_core_libproc.h happy, sigh
37 #include "pub_core_libcproc.h" // VG_(invalidate_icache)
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcprint.h"
40 #include "pub_core_options.h"
41 #include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB
42 #include "pub_core_transtab.h"
43 #include "pub_core_aspacemgr.h"
44 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
45 #include "pub_core_xarray.h"
46 #include "pub_core_dispatch.h" // For VG_(disp_cp*) addresses
47
48
49 #define DEBUG_TRANSTAB 0
50
51
52 /*-------------------------------------------------------------*/
53 /*--- Management of the FIFO-based translation table+cache. ---*/
54 /*-------------------------------------------------------------*/
55
56 /* Nr of sectors provided via command line parameter. */
57 UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT;
58 /* Nr of sectors.
59 Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */
60 static UInt n_sectors = 0;
61
62 /*------------------ CONSTANTS ------------------*/
63 /* Number of TC entries in each sector. This needs to be a prime
64 number to work properly, it must be <= 65535 (so that a TT index
65 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
66 'deleted') and it is strongly recommended not to change this.
67 65521 is the largest prime <= 65535. */
68 #define N_TTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
69
70 /* Because each sector contains a hash table of TTEntries, we need to
71 specify the maximum allowable loading, after which the sector is
72 deemed full. */
73 #define SECTOR_TT_LIMIT_PERCENT 65
74
75 /* The sector is deemed full when this many entries are in it. */
76 #define N_TTES_PER_SECTOR_USABLE \
77 ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
78
79 /* Equivalence classes for fast address range deletion. There are 1 +
80 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an
81 address range which does not fall cleanly within any specific bin.
82 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
83 #define ECLASS_SHIFT 11
84 #define ECLASS_WIDTH 8
85 #define ECLASS_MISC (1 << ECLASS_WIDTH)
86 #define ECLASS_N (1 + ECLASS_MISC)
87
88 #define EC2TTE_DELETED 0xFFFF /* 16-bit special value */
89
90
91 /*------------------ TYPES ------------------*/
92
93 /* In edges ("to-me") in the graph created by chaining. */
94 typedef
95 struct {
96 UInt from_sNo; /* sector number */
97 UInt from_tteNo; /* TTE number in given sector */
98 UInt from_offs; /* code offset from TCEntry::tcptr where the patch is */
99 Bool to_fastEP; /* Is the patch to a fast or slow entry point? */
100 }
101 InEdge;
102
103
104 /* Out edges ("from-me") in the graph created by chaining. */
105 typedef
106 struct {
107 UInt to_sNo; /* sector number */
108 UInt to_tteNo; /* TTE number in given sector */
109 UInt from_offs; /* code offset in owning translation where patch is */
110 }
111 OutEdge;
112
113
114 #define N_FIXED_IN_EDGE_ARR 3
115 typedef
116 struct {
117 UInt n_fixed; /* 0 .. N_FIXED_IN_EDGE_ARR */
118 InEdge fixed[N_FIXED_IN_EDGE_ARR];
119 XArray* var; /* XArray* of InEdgeArr */
120 }
121 InEdgeArr;
122
123 #define N_FIXED_OUT_EDGE_ARR 2
124 typedef
125 struct {
126 UInt n_fixed; /* 0 .. N_FIXED_OUT_EDGE_ARR */
127 OutEdge fixed[N_FIXED_OUT_EDGE_ARR];
128 XArray* var; /* XArray* of OutEdgeArr */
129 }
130 OutEdgeArr;
131
132
133 /* A translation-table entry. This indicates precisely which areas of
134 guest code are included in the translation, and contains all other
135 auxiliary info too. */
136 typedef
137 struct {
138 /* Profiling only: the count and weight (arbitrary meaning) for
139 this translation. Weight is a property of the translation
140 itself and computed once when the translation is created.
141 Count is an entry count for the translation and is
142 incremented by 1 every time the translation is used, if we
143 are profiling. */
144 ULong count;
145 UShort weight;
146
147 /* Status of the slot. Note, we need to be able to do lazy
148 deletion, hence the Deleted state. */
149 enum { InUse, Deleted, Empty } status;
150
151 /* 64-bit aligned pointer to one or more 64-bit words containing
152 the corresponding host code (must be in the same sector!)
153 This is a pointer into the sector's tc (code) area. */
154 ULong* tcptr;
155
156 /* This is the original guest address that purportedly is the
157 entry point of the translation. You might think that .entry
158 should be the same as .vge->base[0], and most of the time it
159 is. However, when doing redirections, that is not the case.
160 .vge must always correctly describe the guest code sections
161 from which this translation was made. However, .entry may or
162 may not be a lie, depending on whether or not we're doing
163 redirection. */
164 Addr64 entry;
165
166 /* This structure describes precisely what ranges of guest code
167 the translation covers, so we can decide whether or not to
168 delete it when translations of a given address range are
169 invalidated. */
170 VexGuestExtents vge;
171
172 /* Address range summary info: these are pointers back to
173 eclass[] entries in the containing Sector. Those entries in
174 turn point back here -- the two structures are mutually
175 redundant but both necessary to make fast deletions work.
176 The eclass info is similar to, and derived from, this entry's
177 'vge' field, but it is not the same */
178 UShort n_tte2ec; // # tte2ec pointers (1 to 3)
179 UShort tte2ec_ec[3]; // for each, the eclass #
180 UInt tte2ec_ix[3]; // and the index within the eclass.
181 // for i in 0 .. n_tte2ec-1
182 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
183 // should be the index
184 // of this TTEntry in the containing Sector's tt array.
185
186 /* Admin information for chaining. 'in_edges' is a set of the
187 patch points which jump to this translation -- hence are
188 predecessors in the control flow graph. 'out_edges' points
189 to successors in the control flow graph -- translations to
190 which this one has a patched jump. In short these are just
191 backwards and forwards edges in the graph of patched-together
192 blocks. The 'in_edges' contain slightly more info, enough
193 that we can undo the chaining of each mentioned patch point.
194 The 'out_edges' list exists only so that we can visit the
195 'in_edges' entries of all blocks we're patched through to, in
196 order to remove ourselves from then when we're deleted. */
197
198 /* A translation can disappear for two reasons:
199 1. erased (as part of the oldest sector cleanup) when the
200 youngest sector is full.
201 2. discarded due to calls to VG_(discard_translations).
202 VG_(discard_translations) sets the status of the
203 translation to 'Deleted'.
204 A.o., the gdbserver discards one or more translations
205 when a breakpoint is inserted or removed at an Addr,
206 or when single stepping mode is enabled/disabled
207 or when a translation is instrumented for gdbserver
208 (all the target jumps of this translation are
209 invalidated).
210
211 So, it is possible that the translation A to be patched
212 (to obtain a patched jump from A to B) is invalidated
213 after B is translated and before A is patched.
214 In case a translation is erased or discarded, the patching
215 cannot be done. VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
216 are checking the 'from' translation still exists before
217 doing the patching.
218
219 Is it safe to erase or discard the current translation E being
220 executed ? Amazing, but yes, it is safe.
221 Here is the explanation:
222
223 The translation E being executed can only be erased if a new
224 translation N is being done. A new translation is done only
225 if the host addr is a not yet patched jump to another
226 translation. In such a case, the guest address of N is
227 assigned to the PC in the VEX state. Control is returned
228 to the scheduler. N will be translated. This can erase the
229 translation E (in case of sector full). VG_(tt_tc_do_chaining)
230 will not do the chaining to a non found translation E.
231 The execution will continue at the current guest PC
232 (i.e. the translation N).
233 => it is safe to erase the current translation being executed.
234
235 The current translation E being executed can also be discarded
236 (e.g. by gdbserver). VG_(discard_translations) will mark
237 this translation E as Deleted, but the translation itself
238 is not erased. In particular, its host code can only
239 be overwritten or erased in case a new translation is done.
240 A new translation will only be done if a not yet translated
241 jump is to be executed. The execution of the Deleted translation
242 E will continue till a non patched jump is encountered.
243 This situation is then similar to the 'erasing' case above :
244 the current translation E can be erased or overwritten, as the
245 execution will continue at the new translation N.
246
247 */
248
249 /* It is possible, although very unlikely, that a block A has
250 more than one patched jump to block B. This could happen if
251 (eg) A finishes "jcond B; jmp B".
252
253 This means in turn that B's in_edges set can list A more than
254 once (twice in this example). However, each such entry must
255 have a different from_offs, since a patched jump can only
256 jump to one place at once (it's meaningless for it to have
257 multiple destinations.) IOW, the successor and predecessor
258 edges in the graph are not uniquely determined by a
259 TTEntry --> TTEntry pair, but rather by a
260 (TTEntry,offset) --> TTEntry triple.
261
262 If A has multiple edges to B then B will mention A multiple
263 times in its in_edges. To make things simpler, we then
264 require that A mentions B exactly the same number of times in
265 its out_edges. Furthermore, a matching out-in pair must have
266 the same offset (from_offs). This facilitates sanity
267 checking, and it facilitates establishing the invariant that
268 a out_edges set may not have duplicates when using the
269 equality defined by (TTEntry,offset). Hence the out_edges
270 and in_edges sets really do have both have set semantics.
271
272 eg if A has been patched to B at offsets 42 and 87 (in A)
273 then A.out_edges = { (B,42), (B,87) } (in any order)
274 and B.in_edges = { (A,42), (A,87) } (in any order)
275
276 Hence for each node pair P->Q in the graph, there's a 1:1
277 mapping between P.out_edges and Q.in_edges.
278 */
279 InEdgeArr in_edges;
280 OutEdgeArr out_edges;
281 }
282 TTEntry;
283
284
285 /* A structure used for mapping host code addresses back to the
286 relevant TTEntry. Used when doing chaining, for finding the
287 TTEntry to which some arbitrary patch address belongs. */
288 typedef
289 struct {
290 UChar* start;
291 UInt len;
292 UInt tteNo;
293 }
294 HostExtent;
295
296 /* Finally, a sector itself. Each sector contains an array of
297 TCEntries, which hold code, and an array of TTEntries, containing
298 all required administrative info. Profiling is supported using the
299 TTEntry .count and .weight fields, if required.
300
301 If the sector is not in use, all three pointers are NULL and
302 tt_n_inuse is zero.
303 */
304 typedef
305 struct {
306 /* The TCEntry area. Size of this depends on the average
307 translation size. We try and size it so it becomes full
308 precisely when this sector's translation table (tt) reaches
309 its load limit (SECTOR_TT_LIMIT_PERCENT). */
310 ULong* tc;
311
312 /* The TTEntry array. This is a fixed size, always containing
313 exactly N_TTES_PER_SECTOR entries. */
314 TTEntry* tt;
315
316 /* This points to the current allocation point in tc. */
317 ULong* tc_next;
318
319 /* The count of tt entries with state InUse. */
320 Int tt_n_inuse;
321
322 /* Expandable arrays of tt indices for each of the ECLASS_N
323 address range equivalence classes. These hold indices into
324 the containing sector's tt array, which in turn should point
325 back here. */
326 Int ec2tte_size[ECLASS_N];
327 Int ec2tte_used[ECLASS_N];
328 UShort* ec2tte[ECLASS_N];
329
330 /* The host extents. The [start, +len) ranges are constructed
331 in strictly non-overlapping order, so we can binary search
332 them at any time. */
333 XArray* host_extents; /* XArray* of HostExtent */
334 }
335 Sector;
336
337
338 /*------------------ DECLS ------------------*/
339
340 /* The root data structure is an array of sectors. The index of the
341 youngest sector is recorded, and new translations are put into that
342 sector. When it fills up, we move along to the next sector and
343 start to fill that up, wrapping around at the end of the array.
344 That way, once all N_TC_SECTORS have been bought into use for the
345 first time, and are full, we then re-use the oldest sector,
346 endlessly.
347
348 When running, youngest sector should be between >= 0 and <
349 N_TC_SECTORS. The initial -1 value indicates the TT/TC system is
350 not yet initialised.
351 */
352 static Sector sectors[MAX_N_SECTORS];
353 static Int youngest_sector = -1;
354
355 /* The number of ULongs in each TCEntry area. This is computed once
356 at startup and does not change. */
357 static Int tc_sector_szQ = 0;
358
359
360 /* A list of sector numbers, in the order which they should be
361 searched to find translations. This is an optimisation to be used
362 when searching for translations and should not affect
363 correctness. -1 denotes "no entry". */
364 static Int sector_search_order[MAX_N_SECTORS];
365
366
367 /* Fast helper for the TC. A direct-mapped cache which holds a set of
368 recently used (guest address, host address) pairs. This array is
369 referred to directly from m_dispatch/dispatch-<platform>.S.
370
371 Entries in tt_fast may refer to any valid TC entry, regardless of
372 which sector it's in. Consequently we must be very careful to
373 invalidate this cache when TC entries are changed or disappear.
374
375 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
376 pointed at to cause that cache entry to miss. This relies on the
377 assumption that no guest code actually has that address, hence a
378 value 0x1 seems good. m_translate gives the client a synthetic
379 segfault if it tries to execute at this address.
380 */
381 /*
382 typedef
383 struct {
384 Addr guest;
385 Addr host;
386 }
387 FastCacheEntry;
388 */
389 /*global*/ __attribute__((aligned(16)))
390 FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
391
392 /* Make sure we're not used before initialisation. */
393 static Bool init_done = False;
394
395
396 /*------------------ STATS DECLS ------------------*/
397
398 /* Number of fast-cache updates and flushes done. */
399 static ULong n_fast_flushes = 0;
400 static ULong n_fast_updates = 0;
401
402 /* Number of full lookups done. */
403 static ULong n_full_lookups = 0;
404 static ULong n_lookup_probes = 0;
405
406 /* Number/osize/tsize of translations entered; also the number of
407 those for which self-checking was requested. */
408 static ULong n_in_count = 0;
409 static ULong n_in_osize = 0;
410 static ULong n_in_tsize = 0;
411 static ULong n_in_sc_count = 0;
412
413 /* Number/osize of translations discarded due to lack of space. */
414 static ULong n_dump_count = 0;
415 static ULong n_dump_osize = 0;
416
417 /* Number/osize of translations discarded due to requests to do so. */
418 static ULong n_disc_count = 0;
419 static ULong n_disc_osize = 0;
420
421
422 /*-------------------------------------------------------------*/
423 /*--- Misc ---*/
424 /*-------------------------------------------------------------*/
425
ttaux_malloc(const HChar * tag,SizeT n)426 static void* ttaux_malloc ( const HChar* tag, SizeT n )
427 {
428 return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
429 }
430
ttaux_free(void * p)431 static void ttaux_free ( void* p )
432 {
433 VG_(arena_free)(VG_AR_TTAUX, p);
434 }
435
436
437 /*-------------------------------------------------------------*/
438 /*--- Chaining support ---*/
439 /*-------------------------------------------------------------*/
440
index_tte(UInt sNo,UInt tteNo)441 static inline TTEntry* index_tte ( UInt sNo, UInt tteNo )
442 {
443 vg_assert(sNo < n_sectors);
444 vg_assert(tteNo < N_TTES_PER_SECTOR);
445 Sector* s = §ors[sNo];
446 vg_assert(s->tt);
447 TTEntry* tte = &s->tt[tteNo];
448 vg_assert(tte->status == InUse);
449 return tte;
450 }
451
InEdge__init(InEdge * ie)452 static void InEdge__init ( InEdge* ie )
453 {
454 ie->from_sNo = -1; /* invalid */
455 ie->from_tteNo = 0;
456 ie->from_offs = 0;
457 ie->to_fastEP = False;
458 }
459
OutEdge__init(OutEdge * oe)460 static void OutEdge__init ( OutEdge* oe )
461 {
462 oe->to_sNo = -1; /* invalid */
463 oe->to_tteNo = 0;
464 oe->from_offs = 0;
465 }
466
TTEntry__init(TTEntry * tte)467 static void TTEntry__init ( TTEntry* tte )
468 {
469 VG_(memset)(tte, 0, sizeof(*tte));
470 }
471
InEdgeArr__size(InEdgeArr * iea)472 static UWord InEdgeArr__size ( InEdgeArr* iea )
473 {
474 if (iea->var) {
475 vg_assert(iea->n_fixed == 0);
476 return VG_(sizeXA)(iea->var);
477 } else {
478 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
479 return iea->n_fixed;
480 }
481 }
482
InEdgeArr__makeEmpty(InEdgeArr * iea)483 static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
484 {
485 if (iea->var) {
486 vg_assert(iea->n_fixed == 0);
487 VG_(deleteXA)(iea->var);
488 iea->var = NULL;
489 } else {
490 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
491 iea->n_fixed = 0;
492 }
493 }
494
495 static
InEdgeArr__index(InEdgeArr * iea,UWord i)496 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
497 {
498 if (iea->var) {
499 vg_assert(iea->n_fixed == 0);
500 return (InEdge*)VG_(indexXA)(iea->var, i);
501 } else {
502 vg_assert(i < iea->n_fixed);
503 return &iea->fixed[i];
504 }
505 }
506
507 static
InEdgeArr__deleteIndex(InEdgeArr * iea,UWord i)508 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
509 {
510 if (iea->var) {
511 vg_assert(iea->n_fixed == 0);
512 VG_(removeIndexXA)(iea->var, i);
513 } else {
514 vg_assert(i < iea->n_fixed);
515 for (; i+1 < iea->n_fixed; i++) {
516 iea->fixed[i] = iea->fixed[i+1];
517 }
518 iea->n_fixed--;
519 }
520 }
521
522 static
InEdgeArr__add(InEdgeArr * iea,InEdge * ie)523 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
524 {
525 if (iea->var) {
526 vg_assert(iea->n_fixed == 0);
527 VG_(addToXA)(iea->var, ie);
528 } else {
529 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
530 if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
531 /* The fixed array is full, so we have to initialise an
532 XArray and copy the fixed array into it. */
533 iea->var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
534 ttaux_free,
535 sizeof(InEdge));
536 UWord i;
537 for (i = 0; i < iea->n_fixed; i++) {
538 VG_(addToXA)(iea->var, &iea->fixed[i]);
539 }
540 VG_(addToXA)(iea->var, ie);
541 iea->n_fixed = 0;
542 } else {
543 /* Just add to the fixed array. */
544 iea->fixed[iea->n_fixed++] = *ie;
545 }
546 }
547 }
548
OutEdgeArr__size(OutEdgeArr * oea)549 static UWord OutEdgeArr__size ( OutEdgeArr* oea )
550 {
551 if (oea->var) {
552 vg_assert(oea->n_fixed == 0);
553 return VG_(sizeXA)(oea->var);
554 } else {
555 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
556 return oea->n_fixed;
557 }
558 }
559
OutEdgeArr__makeEmpty(OutEdgeArr * oea)560 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
561 {
562 if (oea->var) {
563 vg_assert(oea->n_fixed == 0);
564 VG_(deleteXA)(oea->var);
565 oea->var = NULL;
566 } else {
567 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
568 oea->n_fixed = 0;
569 }
570 }
571
572 static
OutEdgeArr__index(OutEdgeArr * oea,UWord i)573 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
574 {
575 if (oea->var) {
576 vg_assert(oea->n_fixed == 0);
577 return (OutEdge*)VG_(indexXA)(oea->var, i);
578 } else {
579 vg_assert(i < oea->n_fixed);
580 return &oea->fixed[i];
581 }
582 }
583
584 static
OutEdgeArr__deleteIndex(OutEdgeArr * oea,UWord i)585 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
586 {
587 if (oea->var) {
588 vg_assert(oea->n_fixed == 0);
589 VG_(removeIndexXA)(oea->var, i);
590 } else {
591 vg_assert(i < oea->n_fixed);
592 for (; i+1 < oea->n_fixed; i++) {
593 oea->fixed[i] = oea->fixed[i+1];
594 }
595 oea->n_fixed--;
596 }
597 }
598
599 static
OutEdgeArr__add(OutEdgeArr * oea,OutEdge * oe)600 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
601 {
602 if (oea->var) {
603 vg_assert(oea->n_fixed == 0);
604 VG_(addToXA)(oea->var, oe);
605 } else {
606 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
607 if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
608 /* The fixed array is full, so we have to initialise an
609 XArray and copy the fixed array into it. */
610 oea->var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
611 ttaux_free,
612 sizeof(OutEdge));
613 UWord i;
614 for (i = 0; i < oea->n_fixed; i++) {
615 VG_(addToXA)(oea->var, &oea->fixed[i]);
616 }
617 VG_(addToXA)(oea->var, oe);
618 oea->n_fixed = 0;
619 } else {
620 /* Just add to the fixed array. */
621 oea->fixed[oea->n_fixed++] = *oe;
622 }
623 }
624 }
625
626 static
HostExtent__cmpOrd(const void * v1,const void * v2)627 Int HostExtent__cmpOrd ( const void* v1, const void* v2 )
628 {
629 const HostExtent* hx1 = v1;
630 const HostExtent* hx2 = v2;
631 if (hx1->start + hx1->len <= hx2->start) return -1;
632 if (hx2->start + hx2->len <= hx1->start) return 1;
633 return 0; /* partial overlap */
634 }
635
636 /* True if hx is a dead host extent, i.e. corresponds to host code
637 of an entry that was invalidated. */
638 static
HostExtent__is_dead(const HostExtent * hx,const Sector * sec)639 Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec)
640 {
641 const UInt tteNo = hx->tteNo;
642 #define LDEBUG(m) if (DEBUG_TRANSTAB) \
643 VG_(printf) (m \
644 " start 0x%p len %u sector %d ttslot %u" \
645 " tt.entry 0x%llu tt.tcptr 0x%p\n", \
646 hx->start, hx->len, (int)(sec - sectors), \
647 hx->tteNo, \
648 sec->tt[tteNo].entry, sec->tt[tteNo].tcptr)
649
650 /* Entry might have been invalidated and not re-used yet.*/
651 if (sec->tt[tteNo].status == Deleted) {
652 LDEBUG("found deleted entry");
653 return True;
654 }
655 /* Maybe we found this entry via a host_extents which was
656 inserted for an entry which was changed to Deleted then
657 re-used after. If this entry was re-used, then its tcptr
658 is >= to host_extents start (i.e. the previous tcptr) + len.
659 This is the case as there is no re-use of host code: a new
660 entry or re-used entry always gets "higher value" host code. */
661 if ((UChar*) sec->tt[tteNo].tcptr >= hx->start + hx->len) {
662 LDEBUG("found re-used entry");
663 return True;
664 }
665
666 return False;
667 #undef LDEBUG
668 }
669
670 static __attribute__((noinline))
find_TTEntry_from_hcode(UInt * from_sNo,UInt * from_tteNo,void * hcode)671 Bool find_TTEntry_from_hcode( /*OUT*/UInt* from_sNo,
672 /*OUT*/UInt* from_tteNo,
673 void* hcode )
674 {
675 Int i;
676
677 /* Search order logic copied from VG_(search_transtab). */
678 for (i = 0; i < n_sectors; i++) {
679 Int sno = sector_search_order[i];
680 if (UNLIKELY(sno == -1))
681 return False; /* run out of sectors to search */
682
683 Sector* sec = §ors[sno];
684 XArray* /* of HostExtent */ host_extents = sec->host_extents;
685 vg_assert(host_extents);
686
687 HostExtent key;
688 VG_(memset)(&key, 0, sizeof(key));
689 key.start = hcode;
690 key.len = 1;
691 Word firstW = -1, lastW = -1;
692 Bool found = VG_(lookupXA_UNSAFE)(
693 host_extents, &key, &firstW, &lastW,
694 HostExtent__cmpOrd );
695 vg_assert(firstW == lastW); // always true, even if not found
696 if (found) {
697 HostExtent* hx = VG_(indexXA)(host_extents, firstW);
698 UInt tteNo = hx->tteNo;
699 /* Do some additional sanity checks. */
700 vg_assert(tteNo <= N_TTES_PER_SECTOR);
701
702 /* if this hx entry corresponds to dead host code, we must
703 tell this code has not been found, as it cannot be patched. */
704 if (HostExtent__is_dead (hx, sec))
705 return False;
706
707 vg_assert(sec->tt[tteNo].status == InUse);
708 /* Can only half check that the found TTEntry contains hcode,
709 due to not having a length value for the hcode in the
710 TTEntry. */
711 vg_assert((UChar*)sec->tt[tteNo].tcptr <= (UChar*)hcode);
712 /* Looks plausible */
713 *from_sNo = sno;
714 *from_tteNo = (UInt)tteNo;
715 return True;
716 }
717 }
718 return False;
719 }
720
721
722 /* Figure out whether or not hcode is jitted code present in the main
723 code cache (but not in the no-redir cache). Used for sanity
724 checking. */
is_in_the_main_TC(void * hcode)725 static Bool is_in_the_main_TC ( void* hcode )
726 {
727 Int i, sno;
728 for (i = 0; i < n_sectors; i++) {
729 sno = sector_search_order[i];
730 if (sno == -1)
731 break; /* run out of sectors to search */
732 if ((UChar*)hcode >= (UChar*)sectors[sno].tc
733 && (UChar*)hcode <= (UChar*)sectors[sno].tc_next
734 + sizeof(ULong) - 1)
735 return True;
736 }
737 return False;
738 }
739
740
741 /* Fulfill a chaining request, and record admin info so we
742 can undo it later, if required.
743 */
VG_(tt_tc_do_chaining)744 void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
745 UInt to_sNo,
746 UInt to_tteNo,
747 Bool to_fastEP )
748 {
749 /* Get the CPU info established at startup. */
750 VexArch vex_arch = VexArch_INVALID;
751 VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
752
753 // host_code is where we're patching to. So it needs to
754 // take into account, whether we're jumping to the slow
755 // or fast entry point. By definition, the fast entry point
756 // is exactly one event check's worth of code along from
757 // the slow (tcptr) entry point.
758 TTEntry* to_tte = index_tte(to_sNo, to_tteNo);
759 void* host_code = ((UChar*)to_tte->tcptr)
760 + (to_fastEP ? LibVEX_evCheckSzB(vex_arch) : 0);
761
762 // stay sane -- the patch point (dst) is in this sector's code cache
763 vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
764 vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
765 + sizeof(ULong) - 1 );
766
767 /* Find the TTEntry for the from__ code. This isn't simple since
768 we only know the patch address, which is going to be somewhere
769 inside the from_ block. */
770 UInt from_sNo = (UInt)-1;
771 UInt from_tteNo = (UInt)-1;
772 Bool from_found
773 = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
774 from__patch_addr );
775 if (!from_found) {
776 // The from code might have been discarded due to sector re-use
777 // or marked Deleted due to translation invalidation.
778 // In such a case, don't do the chaining.
779 VG_(debugLog)(1,"transtab",
780 "host code %p not found (discarded? sector recycled?)"
781 " => no chaining done\n",
782 from__patch_addr);
783 return;
784 }
785
786 TTEntry* from_tte = index_tte(from_sNo, from_tteNo);
787
788 /* Get VEX to do the patching itself. We have to hand it off
789 since it is host-dependent. */
790 VexInvalRange vir
791 = LibVEX_Chain(
792 vex_arch,
793 from__patch_addr,
794 VG_(fnptr_to_fnentry)(
795 to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
796 : &VG_(disp_cp_chain_me_to_slowEP)),
797 (void*)host_code
798 );
799 VG_(invalidate_icache)( (void*)vir.start, vir.len );
800
801 /* Now do the tricky bit -- update the ch_succs and ch_preds info
802 for the two translations involved, so we can undo the chaining
803 later, which we will have to do if the to_ block gets removed
804 for whatever reason. */
805
806 /* This is the new from_ -> to_ link to add. */
807 InEdge ie;
808 InEdge__init(&ie);
809 ie.from_sNo = from_sNo;
810 ie.from_tteNo = from_tteNo;
811 ie.to_fastEP = to_fastEP;
812 HWord from_offs = (HWord)( (UChar*)from__patch_addr
813 - (UChar*)from_tte->tcptr );
814 vg_assert(from_offs < 100000/* let's say */);
815 ie.from_offs = (UInt)from_offs;
816
817 /* This is the new to_ -> from_ backlink to add. */
818 OutEdge oe;
819 OutEdge__init(&oe);
820 oe.to_sNo = to_sNo;
821 oe.to_tteNo = to_tteNo;
822 oe.from_offs = (UInt)from_offs;
823
824 /* Add .. */
825 InEdgeArr__add(&to_tte->in_edges, &ie);
826 OutEdgeArr__add(&from_tte->out_edges, &oe);
827 }
828
829
830 /* Unchain one patch, as described by the specified InEdge. For
831 sanity check purposes only (to check that the patched location is
832 as expected) it also requires the fast and slow entry point
833 addresses of the destination block (that is, the block that owns
834 this InEdge). */
835 __attribute__((noinline))
unchain_one(VexArch vex_arch,InEdge * ie,void * to_fastEPaddr,void * to_slowEPaddr)836 static void unchain_one ( VexArch vex_arch,
837 InEdge* ie,
838 void* to_fastEPaddr, void* to_slowEPaddr )
839 {
840 vg_assert(ie);
841 TTEntry* tte
842 = index_tte(ie->from_sNo, ie->from_tteNo);
843 UChar* place_to_patch
844 = ((UChar*)tte->tcptr) + ie->from_offs;
845 UChar* disp_cp_chain_me
846 = VG_(fnptr_to_fnentry)(
847 ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
848 : &VG_(disp_cp_chain_me_to_slowEP)
849 );
850 UChar* place_to_jump_to_EXPECTED
851 = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
852
853 // stay sane: both src and dst for this unchaining are
854 // in the main code cache
855 vg_assert( is_in_the_main_TC(place_to_patch) ); // src
856 vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
857 // dst check is ok because LibVEX_UnChain checks that
858 // place_to_jump_to_EXPECTED really is the current dst, and
859 // asserts if it isn't.
860 VexInvalRange vir
861 = LibVEX_UnChain( vex_arch, place_to_patch,
862 place_to_jump_to_EXPECTED, disp_cp_chain_me );
863 VG_(invalidate_icache)( (void*)vir.start, vir.len );
864 }
865
866
867 /* The specified block is about to be deleted. Update the preds and
868 succs of its associated blocks accordingly. This includes undoing
869 any chained jumps to this block. */
870 static
unchain_in_preparation_for_deletion(VexArch vex_arch,UInt here_sNo,UInt here_tteNo)871 void unchain_in_preparation_for_deletion ( VexArch vex_arch,
872 UInt here_sNo, UInt here_tteNo )
873 {
874 if (DEBUG_TRANSTAB)
875 VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo);
876 UWord i, j, n, m;
877 Int evCheckSzB = LibVEX_evCheckSzB(vex_arch);
878 TTEntry* here_tte = index_tte(here_sNo, here_tteNo);
879 if (DEBUG_TRANSTAB)
880 VG_(printf)("... QQQ tt.entry 0x%llu tt.tcptr 0x%p\n",
881 here_tte->entry, here_tte->tcptr);
882 vg_assert(here_tte->status == InUse);
883
884 /* Visit all InEdges owned by here_tte. */
885 n = InEdgeArr__size(&here_tte->in_edges);
886 for (i = 0; i < n; i++) {
887 InEdge* ie = InEdgeArr__index(&here_tte->in_edges, i);
888 // Undo the chaining.
889 UChar* here_slow_EP = (UChar*)here_tte->tcptr;
890 UChar* here_fast_EP = here_slow_EP + evCheckSzB;
891 unchain_one(vex_arch, ie, here_fast_EP, here_slow_EP);
892 // Find the corresponding entry in the "from" node's out_edges,
893 // and remove it.
894 TTEntry* from_tte = index_tte(ie->from_sNo, ie->from_tteNo);
895 m = OutEdgeArr__size(&from_tte->out_edges);
896 vg_assert(m > 0); // it must have at least one entry
897 for (j = 0; j < m; j++) {
898 OutEdge* oe = OutEdgeArr__index(&from_tte->out_edges, j);
899 if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
900 && oe->from_offs == ie->from_offs)
901 break;
902 }
903 vg_assert(j < m); // "oe must be findable"
904 OutEdgeArr__deleteIndex(&from_tte->out_edges, j);
905 }
906
907 /* Visit all OutEdges owned by here_tte. */
908 n = OutEdgeArr__size(&here_tte->out_edges);
909 for (i = 0; i < n; i++) {
910 OutEdge* oe = OutEdgeArr__index(&here_tte->out_edges, i);
911 // Find the corresponding entry in the "to" node's in_edges,
912 // and remove it.
913 TTEntry* to_tte = index_tte(oe->to_sNo, oe->to_tteNo);
914 m = InEdgeArr__size(&to_tte->in_edges);
915 vg_assert(m > 0); // it must have at least one entry
916 for (j = 0; j < m; j++) {
917 InEdge* ie = InEdgeArr__index(&to_tte->in_edges, j);
918 if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
919 && ie->from_offs == oe->from_offs)
920 break;
921 }
922 vg_assert(j < m); // "ie must be findable"
923 InEdgeArr__deleteIndex(&to_tte->in_edges, j);
924 }
925
926 InEdgeArr__makeEmpty(&here_tte->in_edges);
927 OutEdgeArr__makeEmpty(&here_tte->out_edges);
928 }
929
930
931 /*-------------------------------------------------------------*/
932 /*--- Address-range equivalence class stuff ---*/
933 /*-------------------------------------------------------------*/
934
935 /* Return equivalence class number for a range. */
936
range_to_eclass(Addr64 start,UInt len)937 static Int range_to_eclass ( Addr64 start, UInt len )
938 {
939 UInt mask = (1 << ECLASS_WIDTH) - 1;
940 UInt lo = (UInt)start;
941 UInt hi = lo + len - 1;
942 UInt loBits = (lo >> ECLASS_SHIFT) & mask;
943 UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
944 if (loBits == hiBits) {
945 vg_assert(loBits < ECLASS_N-1);
946 return loBits;
947 } else {
948 return ECLASS_MISC;
949 }
950 }
951
952
953 /* Calculates the equivalence class numbers for any VexGuestExtent.
954 These are written in *eclasses, which must be big enough to hold 3
955 Ints. The number written, between 1 and 3, is returned. The
956 eclasses are presented in order, and any duplicates are removed.
957 */
958
959 static
vexGuestExtents_to_eclasses(Int * eclasses,VexGuestExtents * vge)960 Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
961 VexGuestExtents* vge )
962 {
963 # define SWAP(_lv1,_lv2) \
964 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
965
966 Int i, j, n_ec, r;
967
968 vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
969
970 n_ec = 0;
971 for (i = 0; i < vge->n_used; i++) {
972 r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
973 if (r == ECLASS_MISC)
974 goto bad;
975 /* only add if we haven't already seen it */
976 for (j = 0; j < n_ec; j++)
977 if (eclasses[j] == r)
978 break;
979 if (j == n_ec)
980 eclasses[n_ec++] = r;
981 }
982
983 if (n_ec == 1)
984 return 1;
985
986 if (n_ec == 2) {
987 /* sort */
988 if (eclasses[0] > eclasses[1])
989 SWAP(eclasses[0], eclasses[1]);
990 return 2;
991 }
992
993 if (n_ec == 3) {
994 /* sort */
995 if (eclasses[0] > eclasses[2])
996 SWAP(eclasses[0], eclasses[2]);
997 if (eclasses[0] > eclasses[1])
998 SWAP(eclasses[0], eclasses[1]);
999 if (eclasses[1] > eclasses[2])
1000 SWAP(eclasses[1], eclasses[2]);
1001 return 3;
1002 }
1003
1004 /* NOTREACHED */
1005 vg_assert(0);
1006
1007 bad:
1008 eclasses[0] = ECLASS_MISC;
1009 return 1;
1010
1011 # undef SWAP
1012 }
1013
1014
1015 /* Add tteno to the set of entries listed for equivalence class ec in
1016 this sector. Returns used location in eclass array. */
1017
1018 static
addEClassNo(Sector * sec,Int ec,UShort tteno)1019 UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
1020 {
1021 Int old_sz, new_sz, i, r;
1022 UShort *old_ar, *new_ar;
1023
1024 vg_assert(ec >= 0 && ec < ECLASS_N);
1025 vg_assert(tteno < N_TTES_PER_SECTOR);
1026
1027 if (DEBUG_TRANSTAB) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno);
1028
1029 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
1030
1031 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
1032
1033 old_sz = sec->ec2tte_size[ec];
1034 old_ar = sec->ec2tte[ec];
1035 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
1036 new_ar = ttaux_malloc("transtab.aECN.1",
1037 new_sz * sizeof(UShort));
1038 for (i = 0; i < old_sz; i++)
1039 new_ar[i] = old_ar[i];
1040 if (old_ar)
1041 ttaux_free(old_ar);
1042 sec->ec2tte_size[ec] = new_sz;
1043 sec->ec2tte[ec] = new_ar;
1044
1045 if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
1046 }
1047
1048 /* Common case */
1049 r = sec->ec2tte_used[ec]++;
1050 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
1051 sec->ec2tte[ec][r] = tteno;
1052 return (UInt)r;
1053 }
1054
1055
1056 /* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate
1057 eclass entries to 'sec'. */
1058
1059 static
upd_eclasses_after_add(Sector * sec,Int tteno)1060 void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
1061 {
1062 Int i, r, eclasses[3];
1063 TTEntry* tte;
1064 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1065
1066 tte = &sec->tt[tteno];
1067 r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
1068
1069 vg_assert(r >= 1 && r <= 3);
1070 tte->n_tte2ec = r;
1071
1072 for (i = 0; i < r; i++) {
1073 tte->tte2ec_ec[i] = eclasses[i];
1074 tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
1075 }
1076 }
1077
1078
1079 /* Check the eclass info in 'sec' to ensure it is consistent. Returns
1080 True if OK, False if something's not right. Expensive. */
1081
sanity_check_eclasses_in_sector(Sector * sec)1082 static Bool sanity_check_eclasses_in_sector ( Sector* sec )
1083 {
1084 # define BAD(_str) do { whassup = (_str); goto bad; } while (0)
1085
1086 const HChar* whassup = NULL;
1087 Int i, j, k, n, ec_num, ec_idx;
1088 TTEntry* tte;
1089 UShort tteno;
1090 ULong* tce;
1091
1092 /* Basic checks on this sector */
1093 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
1094 BAD("invalid sec->tt_n_inuse");
1095 tce = sec->tc_next;
1096 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
1097 BAD("sec->tc_next points outside tc");
1098
1099 /* For each eclass ... */
1100 for (i = 0; i < ECLASS_N; i++) {
1101 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
1102 BAD("ec2tte_size/ec2tte mismatch(1)");
1103 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
1104 BAD("ec2tte_size/ec2tte mismatch(2)");
1105 if (sec->ec2tte_used[i] < 0
1106 || sec->ec2tte_used[i] > sec->ec2tte_size[i])
1107 BAD("implausible ec2tte_used");
1108 if (sec->ec2tte_used[i] == 0)
1109 continue;
1110
1111 /* For each tt reference in each eclass .. ensure the reference
1112 is to a valid tt entry, and that the entry's address ranges
1113 really include this eclass. */
1114
1115 for (j = 0; j < sec->ec2tte_used[i]; j++) {
1116 tteno = sec->ec2tte[i][j];
1117 if (tteno == EC2TTE_DELETED)
1118 continue;
1119 if (tteno >= N_TTES_PER_SECTOR)
1120 BAD("implausible tteno");
1121 tte = &sec->tt[tteno];
1122 if (tte->status != InUse)
1123 BAD("tteno points to non-inuse tte");
1124 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1125 BAD("tte->n_tte2ec out of range");
1126 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
1127 must equal i. Inspect tte's eclass info. */
1128 n = 0;
1129 for (k = 0; k < tte->n_tte2ec; k++) {
1130 if (k < tte->n_tte2ec-1
1131 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
1132 BAD("tte->tte2ec_ec[..] out of order");
1133 ec_num = tte->tte2ec_ec[k];
1134 if (ec_num < 0 || ec_num >= ECLASS_N)
1135 BAD("tte->tte2ec_ec[..] out of range");
1136 if (ec_num != i)
1137 continue;
1138 ec_idx = tte->tte2ec_ix[k];
1139 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
1140 BAD("tte->tte2ec_ix[..] out of range");
1141 if (ec_idx == j)
1142 n++;
1143 }
1144 if (n != 1)
1145 BAD("tteno does not point back at eclass");
1146 }
1147 }
1148
1149 /* That establishes that for each forward pointer from TTEntrys
1150 there is a corresponding backward pointer from the eclass[]
1151 arrays. However, it doesn't rule out the possibility of other,
1152 bogus pointers in the eclass[] arrays. So do those similarly:
1153 scan through them and check the TTEntryies they point at point
1154 back. */
1155
1156 for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
1157
1158 tte = &sec->tt[i];
1159 if (tte->status == Empty || tte->status == Deleted) {
1160 if (tte->n_tte2ec != 0)
1161 BAD("tte->n_eclasses nonzero for unused tte");
1162 continue;
1163 }
1164
1165 vg_assert(tte->status == InUse);
1166
1167 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1168 BAD("tte->n_eclasses out of range(2)");
1169
1170 for (j = 0; j < tte->n_tte2ec; j++) {
1171 ec_num = tte->tte2ec_ec[j];
1172 if (ec_num < 0 || ec_num >= ECLASS_N)
1173 BAD("tte->eclass[..] out of range");
1174 ec_idx = tte->tte2ec_ix[j];
1175 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
1176 BAD("tte->ec_idx[..] out of range(2)");
1177 if (sec->ec2tte[ec_num][ec_idx] != i)
1178 BAD("ec2tte does not point back to tte");
1179 }
1180 }
1181
1182 return True;
1183
1184 bad:
1185 if (whassup)
1186 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
1187
1188 # if 0
1189 VG_(printf)("eclass = %d\n", i);
1190 VG_(printf)("tteno = %d\n", (Int)tteno);
1191 switch (tte->status) {
1192 case InUse: VG_(printf)("InUse\n"); break;
1193 case Deleted: VG_(printf)("Deleted\n"); break;
1194 case Empty: VG_(printf)("Empty\n"); break;
1195 }
1196 if (tte->status != Empty) {
1197 for (k = 0; k < tte->vge.n_used; k++)
1198 VG_(printf)("0x%llx %d\n", tte->vge.base[k],
1199 (Int)tte->vge.len[k]);
1200 }
1201 # endif
1202
1203 return False;
1204
1205 # undef BAD
1206 }
1207
1208
1209 /* Sanity check absolutely everything. True == check passed. */
1210
1211 /* forwards */
1212 static Bool sanity_check_redir_tt_tc ( void );
1213
sanity_check_sector_search_order(void)1214 static Bool sanity_check_sector_search_order ( void )
1215 {
1216 Int i, j, nListed;
1217 /* assert the array is the right size */
1218 vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order)
1219 / sizeof(sector_search_order[0])));
1220 /* Check it's of the form valid_sector_numbers ++ [-1, -1, ..] */
1221 for (i = 0; i < n_sectors; i++) {
1222 if (sector_search_order[i] < 0 || sector_search_order[i] >= n_sectors)
1223 break;
1224 }
1225 nListed = i;
1226 for (/* */; i < n_sectors; i++) {
1227 if (sector_search_order[i] != -1)
1228 break;
1229 }
1230 if (i != n_sectors)
1231 return False;
1232 /* Check each sector number only appears once */
1233 for (i = 0; i < n_sectors; i++) {
1234 if (sector_search_order[i] == -1)
1235 continue;
1236 for (j = i+1; j < n_sectors; j++) {
1237 if (sector_search_order[j] == sector_search_order[i])
1238 return False;
1239 }
1240 }
1241 /* Check that the number of listed sectors equals the number
1242 in use, by counting nListed back down. */
1243 for (i = 0; i < n_sectors; i++) {
1244 if (sectors[i].tc != NULL)
1245 nListed--;
1246 }
1247 if (nListed != 0)
1248 return False;
1249 return True;
1250 }
1251
sanity_check_all_sectors(void)1252 static Bool sanity_check_all_sectors ( void )
1253 {
1254 Int sno;
1255 Bool sane;
1256 Sector* sec;
1257 for (sno = 0; sno < n_sectors; sno++) {
1258 Int i;
1259 Int nr_not_dead_hx = 0;
1260 Int szhxa;
1261 sec = §ors[sno];
1262 if (sec->tc == NULL)
1263 continue;
1264 sane = sanity_check_eclasses_in_sector( sec );
1265 if (!sane)
1266 return False;
1267 szhxa = VG_(sizeXA)(sec->host_extents);
1268 for (i = 0; i < szhxa; i++) {
1269 const HostExtent* hx = VG_(indexXA)(sec->host_extents, i);
1270 if (!HostExtent__is_dead (hx, sec))
1271 nr_not_dead_hx++;
1272 }
1273 if (nr_not_dead_hx != sec->tt_n_inuse) {
1274 VG_(debugLog)(0, "transtab",
1275 "nr_not_dead_hx %d sanity fail (expected == in use %d)\n",
1276 nr_not_dead_hx, sec->tt_n_inuse);
1277 return False;
1278 }
1279 }
1280
1281 if ( !sanity_check_redir_tt_tc() )
1282 return False;
1283 if ( !sanity_check_sector_search_order() )
1284 return False;
1285 return True;
1286 }
1287
1288
1289
1290 /*-------------------------------------------------------------*/
1291 /*--- Add/find translations ---*/
1292 /*-------------------------------------------------------------*/
1293
vge_osize(VexGuestExtents * vge)1294 static UInt vge_osize ( VexGuestExtents* vge )
1295 {
1296 UInt i, n = 0;
1297 for (i = 0; i < vge->n_used; i++)
1298 n += (UInt)vge->len[i];
1299 return n;
1300 }
1301
isValidSector(Int sector)1302 static Bool isValidSector ( Int sector )
1303 {
1304 if (sector < 0 || sector >= n_sectors)
1305 return False;
1306 return True;
1307 }
1308
HASH_TT(Addr64 key)1309 static inline UInt HASH_TT ( Addr64 key )
1310 {
1311 UInt kHi = (UInt)(key >> 32);
1312 UInt kLo = (UInt)key;
1313 UInt k32 = kHi ^ kLo;
1314 UInt ror = 7;
1315 if (ror > 0)
1316 k32 = (k32 >> ror) | (k32 << (32-ror));
1317 return k32 % N_TTES_PER_SECTOR;
1318 }
1319
setFastCacheEntry(Addr64 key,ULong * tcptr)1320 static void setFastCacheEntry ( Addr64 key, ULong* tcptr )
1321 {
1322 UInt cno = (UInt)VG_TT_FAST_HASH(key);
1323 VG_(tt_fast)[cno].guest = (Addr)key;
1324 VG_(tt_fast)[cno].host = (Addr)tcptr;
1325 n_fast_updates++;
1326 /* This shouldn't fail. It should be assured by m_translate
1327 which should reject any attempt to make translation of code
1328 starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1329 vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
1330 }
1331
1332 /* Invalidate the fast cache VG_(tt_fast). */
invalidateFastCache(void)1333 static void invalidateFastCache ( void )
1334 {
1335 UInt j;
1336 /* This loop is popular enough to make it worth unrolling a
1337 bit, at least on ppc32. */
1338 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
1339 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
1340 VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1341 VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1342 VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1343 VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1344 }
1345
1346 vg_assert(j == VG_TT_FAST_SIZE);
1347 n_fast_flushes++;
1348 }
1349
initialiseSector(Int sno)1350 static void initialiseSector ( Int sno )
1351 {
1352 Int i;
1353 SysRes sres;
1354 Sector* sec;
1355 vg_assert(isValidSector(sno));
1356
1357 { Bool sane = sanity_check_sector_search_order();
1358 vg_assert(sane);
1359 }
1360 sec = §ors[sno];
1361
1362 if (sec->tc == NULL) {
1363
1364 /* Sector has never been used before. Need to allocate tt and
1365 tc. */
1366 vg_assert(sec->tt == NULL);
1367 vg_assert(sec->tc_next == NULL);
1368 vg_assert(sec->tt_n_inuse == 0);
1369 for (i = 0; i < ECLASS_N; i++) {
1370 vg_assert(sec->ec2tte_size[i] == 0);
1371 vg_assert(sec->ec2tte_used[i] == 0);
1372 vg_assert(sec->ec2tte[i] == NULL);
1373 }
1374 vg_assert(sec->host_extents == NULL);
1375
1376 VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
1377 if (VG_(clo_stats))
1378 VG_(dmsg)("transtab: " "allocate sector %d\n", sno);
1379
1380 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
1381 if (sr_isError(sres)) {
1382 VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
1383 8 * tc_sector_szQ );
1384 /*NOTREACHED*/
1385 }
1386 sec->tc = (ULong*)(AddrH)sr_Res(sres);
1387
1388 sres = VG_(am_mmap_anon_float_valgrind)
1389 ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
1390 if (sr_isError(sres)) {
1391 VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
1392 N_TTES_PER_SECTOR * sizeof(TTEntry) );
1393 /*NOTREACHED*/
1394 }
1395 sec->tt = (TTEntry*)(AddrH)sr_Res(sres);
1396
1397 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1398 sec->tt[i].status = Empty;
1399 sec->tt[i].n_tte2ec = 0;
1400 }
1401
1402 /* Set up the host_extents array. */
1403 sec->host_extents
1404 = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
1405 ttaux_free,
1406 sizeof(HostExtent));
1407
1408 /* Add an entry in the sector_search_order */
1409 for (i = 0; i < n_sectors; i++) {
1410 if (sector_search_order[i] == -1)
1411 break;
1412 }
1413 vg_assert(i >= 0 && i < n_sectors);
1414 sector_search_order[i] = sno;
1415
1416 if (VG_(clo_verbosity) > 2)
1417 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
1418
1419 } else {
1420
1421 /* Sector has been used before. Dump the old contents. */
1422 VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
1423 if (VG_(clo_stats))
1424 VG_(dmsg)("transtab: " "recycle sector %d\n", sno);
1425
1426 vg_assert(sec->tt != NULL);
1427 vg_assert(sec->tc_next != NULL);
1428 n_dump_count += sec->tt_n_inuse;
1429
1430 VexArch vex_arch = VexArch_INVALID;
1431 VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
1432
1433 /* Visit each just-about-to-be-abandoned translation. */
1434 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n",
1435 sno);
1436 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1437 if (sec->tt[i].status == InUse) {
1438 vg_assert(sec->tt[i].n_tte2ec >= 1);
1439 vg_assert(sec->tt[i].n_tte2ec <= 3);
1440 n_dump_osize += vge_osize(&sec->tt[i].vge);
1441 /* Tell the tool too. */
1442 if (VG_(needs).superblock_discards) {
1443 VG_TDICT_CALL( tool_discard_superblock_info,
1444 sec->tt[i].entry,
1445 sec->tt[i].vge );
1446 }
1447 unchain_in_preparation_for_deletion(vex_arch, sno, i);
1448 } else {
1449 vg_assert(sec->tt[i].n_tte2ec == 0);
1450 }
1451 sec->tt[i].status = Empty;
1452 sec->tt[i].n_tte2ec = 0;
1453 }
1454 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n",
1455 sno);
1456
1457 /* Free up the eclass structures. */
1458 for (i = 0; i < ECLASS_N; i++) {
1459 if (sec->ec2tte_size[i] == 0) {
1460 vg_assert(sec->ec2tte_used[i] == 0);
1461 vg_assert(sec->ec2tte[i] == NULL);
1462 } else {
1463 vg_assert(sec->ec2tte[i] != NULL);
1464 ttaux_free(sec->ec2tte[i]);
1465 sec->ec2tte[i] = NULL;
1466 sec->ec2tte_size[i] = 0;
1467 sec->ec2tte_used[i] = 0;
1468 }
1469 }
1470
1471 /* Empty out the host extents array. */
1472 vg_assert(sec->host_extents != NULL);
1473 VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
1474 vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
1475
1476 /* Sanity check: ensure it is already in
1477 sector_search_order[]. */
1478 for (i = 0; i < n_sectors; i++) {
1479 if (sector_search_order[i] == sno)
1480 break;
1481 }
1482 vg_assert(i >= 0 && i < n_sectors);
1483
1484 if (VG_(clo_verbosity) > 2)
1485 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
1486 }
1487
1488 sec->tc_next = sec->tc;
1489 sec->tt_n_inuse = 0;
1490
1491 invalidateFastCache();
1492
1493 { Bool sane = sanity_check_sector_search_order();
1494 vg_assert(sane);
1495 }
1496 }
1497
1498
1499 /* Add a translation of vge to TT/TC. The translation is temporarily
1500 in code[0 .. code_len-1].
1501
1502 pre: youngest_sector points to a valid (although possibly full)
1503 sector.
1504 */
VG_(add_to_transtab)1505 void VG_(add_to_transtab)( VexGuestExtents* vge,
1506 Addr64 entry,
1507 AddrH code,
1508 UInt code_len,
1509 Bool is_self_checking,
1510 Int offs_profInc,
1511 UInt n_guest_instrs,
1512 VexArch arch_host )
1513 {
1514 Int tcAvailQ, reqdQ, y, i;
1515 ULong *tcptr, *tcptr2;
1516 UChar* srcP;
1517 UChar* dstP;
1518
1519 vg_assert(init_done);
1520 vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1521
1522 /* 60000: should agree with N_TMPBUF in m_translate.c. */
1523 vg_assert(code_len > 0 && code_len < 60000);
1524
1525 /* Generally stay sane */
1526 vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
1527
1528 if (DEBUG_TRANSTAB)
1529 VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d) ...\n",
1530 entry, code_len);
1531
1532 n_in_count++;
1533 n_in_tsize += code_len;
1534 n_in_osize += vge_osize(vge);
1535 if (is_self_checking)
1536 n_in_sc_count++;
1537
1538 y = youngest_sector;
1539 vg_assert(isValidSector(y));
1540
1541 if (sectors[y].tc == NULL)
1542 initialiseSector(y);
1543
1544 /* Try putting the translation in this sector. */
1545 reqdQ = (code_len + 7) >> 3;
1546
1547 /* Will it fit in tc? */
1548 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ]))
1549 - ((ULong*)(sectors[y].tc_next));
1550 vg_assert(tcAvailQ >= 0);
1551 vg_assert(tcAvailQ <= tc_sector_szQ);
1552
1553 if (tcAvailQ < reqdQ
1554 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
1555 /* No. So move on to the next sector. Either it's never been
1556 used before, in which case it will get its tt/tc allocated
1557 now, or it has been used before, in which case it is set to be
1558 empty, hence throwing out the oldest sector. */
1559 vg_assert(tc_sector_szQ > 0);
1560 Int tt_loading_pct = (100 * sectors[y].tt_n_inuse)
1561 / N_TTES_PER_SECTOR;
1562 Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ))
1563 / tc_sector_szQ;
1564 VG_(debugLog)(1,"transtab",
1565 "declare sector %d full "
1566 "(TT loading %2d%%, TC loading %2d%%)\n",
1567 y, tt_loading_pct, tc_loading_pct);
1568 if (VG_(clo_stats)) {
1569 VG_(dmsg)("transtab: "
1570 "declare sector %d full "
1571 "(TT loading %2d%%, TC loading %2d%%)\n",
1572 y, tt_loading_pct, tc_loading_pct);
1573 }
1574 youngest_sector++;
1575 if (youngest_sector >= n_sectors)
1576 youngest_sector = 0;
1577 y = youngest_sector;
1578 initialiseSector(y);
1579 }
1580
1581 /* Be sure ... */
1582 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ]))
1583 - ((ULong*)(sectors[y].tc_next));
1584 vg_assert(tcAvailQ >= 0);
1585 vg_assert(tcAvailQ <= tc_sector_szQ);
1586 vg_assert(tcAvailQ >= reqdQ);
1587 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
1588 vg_assert(sectors[y].tt_n_inuse >= 0);
1589
1590 /* Copy into tc. */
1591 tcptr = sectors[y].tc_next;
1592 vg_assert(tcptr >= §ors[y].tc[0]);
1593 vg_assert(tcptr <= §ors[y].tc[tc_sector_szQ]);
1594
1595 dstP = (UChar*)tcptr;
1596 srcP = (UChar*)code;
1597 VG_(memcpy)(dstP, srcP, code_len);
1598 sectors[y].tc_next += reqdQ;
1599 sectors[y].tt_n_inuse++;
1600
1601 /* more paranoia */
1602 tcptr2 = sectors[y].tc_next;
1603 vg_assert(tcptr2 >= §ors[y].tc[0]);
1604 vg_assert(tcptr2 <= §ors[y].tc[tc_sector_szQ]);
1605
1606 /* Find an empty tt slot, and use it. There must be such a slot
1607 since tt is never allowed to get completely full. */
1608 i = HASH_TT(entry);
1609 vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
1610 while (True) {
1611 if (sectors[y].tt[i].status == Empty
1612 || sectors[y].tt[i].status == Deleted)
1613 break;
1614 i++;
1615 if (i >= N_TTES_PER_SECTOR)
1616 i = 0;
1617 }
1618
1619 TTEntry__init(§ors[y].tt[i]);
1620 sectors[y].tt[i].status = InUse;
1621 sectors[y].tt[i].tcptr = tcptr;
1622 sectors[y].tt[i].count = 0;
1623 sectors[y].tt[i].weight = n_guest_instrs == 0 ? 1 : n_guest_instrs;
1624 sectors[y].tt[i].vge = *vge;
1625 sectors[y].tt[i].entry = entry;
1626
1627 /* Patch in the profile counter location, if necessary. */
1628 if (offs_profInc != -1) {
1629 vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
1630 VexInvalRange vir
1631 = LibVEX_PatchProfInc( arch_host,
1632 dstP + offs_profInc,
1633 §ors[y].tt[i].count );
1634 VG_(invalidate_icache)( (void*)vir.start, vir.len );
1635 }
1636
1637 VG_(invalidate_icache)( dstP, code_len );
1638
1639 /* Add this entry to the host_extents map, checking that we're
1640 adding in order. */
1641 { HostExtent hx;
1642 hx.start = (UChar*)tcptr;
1643 hx.len = code_len;
1644 hx.tteNo = i;
1645 vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
1646 XArray* hx_array = sectors[y].host_extents;
1647 vg_assert(hx_array);
1648 Word n = VG_(sizeXA)(hx_array);
1649 if (n > 0) {
1650 HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
1651 vg_assert(hx_prev->start + hx_prev->len <= hx.start);
1652 }
1653 VG_(addToXA)(hx_array, &hx);
1654 if (DEBUG_TRANSTAB)
1655 VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n",
1656 hx.start, hx.len, y, i);
1657 }
1658
1659 /* Update the fast-cache. */
1660 setFastCacheEntry( entry, tcptr );
1661
1662 /* Note the eclass numbers for this translation. */
1663 upd_eclasses_after_add( §ors[y], i );
1664 }
1665
1666
1667 /* Search for the translation of the given guest address. If
1668 requested, a successful search can also cause the fast-caches to be
1669 updated.
1670 */
VG_(search_transtab)1671 Bool VG_(search_transtab) ( /*OUT*/AddrH* res_hcode,
1672 /*OUT*/UInt* res_sNo,
1673 /*OUT*/UInt* res_tteNo,
1674 Addr64 guest_addr,
1675 Bool upd_cache )
1676 {
1677 Int i, j, k, kstart, sno;
1678
1679 vg_assert(init_done);
1680 /* Find the initial probe point just once. It will be the same in
1681 all sectors and avoids multiple expensive % operations. */
1682 n_full_lookups++;
1683 k = -1;
1684 kstart = HASH_TT(guest_addr);
1685 vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
1686
1687 /* Search in all the sectors,using sector_search_order[] as a
1688 heuristic guide as to what order to visit the sectors. */
1689 for (i = 0; i < n_sectors; i++) {
1690
1691 sno = sector_search_order[i];
1692 if (UNLIKELY(sno == -1))
1693 return False; /* run out of sectors to search */
1694
1695 k = kstart;
1696 for (j = 0; j < N_TTES_PER_SECTOR; j++) {
1697 n_lookup_probes++;
1698 if (sectors[sno].tt[k].status == InUse
1699 && sectors[sno].tt[k].entry == guest_addr) {
1700 /* found it */
1701 if (upd_cache)
1702 setFastCacheEntry(
1703 guest_addr, sectors[sno].tt[k].tcptr );
1704 if (res_hcode)
1705 *res_hcode = (AddrH)sectors[sno].tt[k].tcptr;
1706 if (res_sNo)
1707 *res_sNo = sno;
1708 if (res_tteNo)
1709 *res_tteNo = k;
1710 /* pull this one one step closer to the front. For large
1711 apps this more or less halves the number of required
1712 probes. */
1713 if (i > 0) {
1714 Int tmp = sector_search_order[i-1];
1715 sector_search_order[i-1] = sector_search_order[i];
1716 sector_search_order[i] = tmp;
1717 }
1718 return True;
1719 }
1720 if (sectors[sno].tt[k].status == Empty)
1721 break; /* not found in this sector */
1722 k++;
1723 if (k == N_TTES_PER_SECTOR)
1724 k = 0;
1725 }
1726
1727 /* If we fall off the end, all entries are InUse and not
1728 matching, or Deleted. In any case we did not find it in this
1729 sector. */
1730 }
1731
1732 /* Not found in any sector. */
1733 return False;
1734 }
1735
1736
1737 /*-------------------------------------------------------------*/
1738 /*--- Delete translations. ---*/
1739 /*-------------------------------------------------------------*/
1740
1741 /* forward */
1742 static void unredir_discard_translations( Addr64, ULong );
1743
1744 /* Stuff for deleting translations which intersect with a given
1745 address range. Unfortunately, to make this run at a reasonable
1746 speed, it is complex. */
1747
1748 static inline
overlap1(Addr64 s1,ULong r1,Addr64 s2,ULong r2)1749 Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
1750 {
1751 Addr64 e1 = s1 + r1 - 1ULL;
1752 Addr64 e2 = s2 + r2 - 1ULL;
1753 if (e1 < s2 || e2 < s1)
1754 return False;
1755 return True;
1756 }
1757
1758 static inline
overlaps(Addr64 start,ULong range,VexGuestExtents * vge)1759 Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
1760 {
1761 if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
1762 return True;
1763 if (vge->n_used < 2)
1764 return False;
1765 if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
1766 return True;
1767 if (vge->n_used < 3)
1768 return False;
1769 if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
1770 return True;
1771 return False;
1772 }
1773
1774
1775 /* Delete a tt entry, and update all the eclass data accordingly. */
1776
delete_tte(Sector * sec,UInt secNo,Int tteno,VexArch vex_arch)1777 static void delete_tte ( /*MOD*/Sector* sec, UInt secNo, Int tteno,
1778 VexArch vex_arch )
1779 {
1780 Int i, ec_num, ec_idx;
1781 TTEntry* tte;
1782
1783 /* sec and secNo are mutually redundant; cross-check. */
1784 vg_assert(sec == §ors[secNo]);
1785
1786 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1787 tte = &sec->tt[tteno];
1788 vg_assert(tte->status == InUse);
1789 vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1790
1791 /* Unchain .. */
1792 unchain_in_preparation_for_deletion(vex_arch, secNo, tteno);
1793
1794 /* Deal with the ec-to-tte links first. */
1795 for (i = 0; i < tte->n_tte2ec; i++) {
1796 ec_num = (Int)tte->tte2ec_ec[i];
1797 ec_idx = tte->tte2ec_ix[i];
1798 vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1799 vg_assert(ec_idx >= 0);
1800 vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1801 /* Assert that the two links point at each other. */
1802 vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
1803 /* "delete" the pointer back to here. */
1804 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1805 }
1806
1807 /* Now fix up this TTEntry. */
1808 tte->status = Deleted;
1809 tte->n_tte2ec = 0;
1810
1811 /* Stats .. */
1812 sec->tt_n_inuse--;
1813 n_disc_count++;
1814 n_disc_osize += vge_osize(&tte->vge);
1815
1816 /* Tell the tool too. */
1817 if (VG_(needs).superblock_discards) {
1818 VG_TDICT_CALL( tool_discard_superblock_info,
1819 tte->entry,
1820 tte->vge );
1821 }
1822 }
1823
1824
1825 /* Delete translations from sec which intersect specified range, but
1826 only consider translations in the specified eclass. */
1827
1828 static
delete_translations_in_sector_eclass(Sector * sec,UInt secNo,Addr64 guest_start,ULong range,Int ec,VexArch vex_arch)1829 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, UInt secNo,
1830 Addr64 guest_start, ULong range,
1831 Int ec,
1832 VexArch vex_arch )
1833 {
1834 Int i;
1835 UShort tteno;
1836 Bool anyDeld = False;
1837 TTEntry* tte;
1838
1839 vg_assert(ec >= 0 && ec < ECLASS_N);
1840
1841 for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1842
1843 tteno = sec->ec2tte[ec][i];
1844 if (tteno == EC2TTE_DELETED) {
1845 /* already deleted */
1846 continue;
1847 }
1848
1849 vg_assert(tteno < N_TTES_PER_SECTOR);
1850
1851 tte = &sec->tt[tteno];
1852 vg_assert(tte->status == InUse);
1853
1854 if (overlaps( guest_start, range, &tte->vge )) {
1855 anyDeld = True;
1856 delete_tte( sec, secNo, (Int)tteno, vex_arch );
1857 }
1858
1859 }
1860
1861 return anyDeld;
1862 }
1863
1864
1865 /* Delete translations from sec which intersect specified range, the
1866 slow way, by inspecting all translations in sec. */
1867
1868 static
delete_translations_in_sector(Sector * sec,UInt secNo,Addr64 guest_start,ULong range,VexArch vex_arch)1869 Bool delete_translations_in_sector ( /*MOD*/Sector* sec, UInt secNo,
1870 Addr64 guest_start, ULong range,
1871 VexArch vex_arch )
1872 {
1873 Int i;
1874 Bool anyDeld = False;
1875
1876 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1877 if (sec->tt[i].status == InUse
1878 && overlaps( guest_start, range, &sec->tt[i].vge )) {
1879 anyDeld = True;
1880 delete_tte( sec, secNo, i, vex_arch );
1881 }
1882 }
1883
1884 return anyDeld;
1885 }
1886
1887
VG_(discard_translations)1888 void VG_(discard_translations) ( Addr64 guest_start, ULong range,
1889 const HChar* who )
1890 {
1891 Sector* sec;
1892 Int sno, ec;
1893 Bool anyDeleted = False;
1894
1895 vg_assert(init_done);
1896
1897 VG_(debugLog)(2, "transtab",
1898 "discard_translations(0x%llx, %lld) req by %s\n",
1899 guest_start, range, who );
1900
1901 /* Pre-deletion sanity check */
1902 if (VG_(clo_sanity_level >= 4)) {
1903 Bool sane = sanity_check_all_sectors();
1904 vg_assert(sane);
1905 }
1906
1907 if (range == 0)
1908 return;
1909
1910 VexArch vex_arch = VexArch_INVALID;
1911 VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
1912
1913 /* There are two different ways to do this.
1914
1915 If the range fits within a single address-range equivalence
1916 class, as will be the case for a cache line sized invalidation,
1917 then we only have to inspect the set of translations listed in
1918 that equivalence class, and also in the "sin-bin" equivalence
1919 class ECLASS_MISC.
1920
1921 Otherwise, the invalidation is of a larger range and probably
1922 results from munmap. In this case it's (probably!) faster just
1923 to inspect all translations, dump those we don't want, and
1924 regenerate the equivalence class information (since modifying it
1925 in-situ is even more expensive).
1926 */
1927
1928 /* First off, figure out if the range falls within a single class,
1929 and if so which one. */
1930
1931 ec = ECLASS_MISC;
1932 if (range < (1ULL << ECLASS_SHIFT))
1933 ec = range_to_eclass( guest_start, (UInt)range );
1934
1935 /* if ec is ECLASS_MISC then we aren't looking at just a single
1936 class, so use the slow scheme. Else use the fast scheme,
1937 examining 'ec' and ECLASS_MISC. */
1938
1939 if (ec != ECLASS_MISC) {
1940
1941 VG_(debugLog)(2, "transtab",
1942 " FAST, ec = %d\n", ec);
1943
1944 /* Fast scheme */
1945 vg_assert(ec >= 0 && ec < ECLASS_MISC);
1946
1947 for (sno = 0; sno < n_sectors; sno++) {
1948 sec = §ors[sno];
1949 if (sec->tc == NULL)
1950 continue;
1951 anyDeleted |= delete_translations_in_sector_eclass(
1952 sec, sno, guest_start, range, ec,
1953 vex_arch
1954 );
1955 anyDeleted |= delete_translations_in_sector_eclass(
1956 sec, sno, guest_start, range, ECLASS_MISC,
1957 vex_arch
1958 );
1959 }
1960
1961 } else {
1962
1963 /* slow scheme */
1964
1965 VG_(debugLog)(2, "transtab",
1966 " SLOW, ec = %d\n", ec);
1967
1968 for (sno = 0; sno < n_sectors; sno++) {
1969 sec = §ors[sno];
1970 if (sec->tc == NULL)
1971 continue;
1972 anyDeleted |= delete_translations_in_sector(
1973 sec, sno, guest_start, range, vex_arch );
1974 }
1975
1976 }
1977
1978 if (anyDeleted)
1979 invalidateFastCache();
1980
1981 /* don't forget the no-redir cache */
1982 unredir_discard_translations( guest_start, range );
1983
1984 /* Post-deletion sanity check */
1985 if (VG_(clo_sanity_level >= 4)) {
1986 Int i;
1987 TTEntry* tte;
1988 Bool sane = sanity_check_all_sectors();
1989 vg_assert(sane);
1990 /* But now, also check the requested address range isn't
1991 present anywhere. */
1992 for (sno = 0; sno < n_sectors; sno++) {
1993 sec = §ors[sno];
1994 if (sec->tc == NULL)
1995 continue;
1996 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1997 tte = &sec->tt[i];
1998 if (tte->status != InUse)
1999 continue;
2000 vg_assert(!overlaps( guest_start, range, &tte->vge ));
2001 }
2002 }
2003 }
2004 }
2005
2006
2007 /*------------------------------------------------------------*/
2008 /*--- AUXILIARY: the unredirected TT/TC ---*/
2009 /*------------------------------------------------------------*/
2010
2011 /* A very simple translation cache which holds a small number of
2012 unredirected translations. This is completely independent of the
2013 main tt/tc structures. When unredir_tc or unredir_tt becomes full,
2014 both structures are simply dumped and we start over.
2015
2016 Since these translations are unredirected, the search key is (by
2017 definition) the first address entry in the .vge field. */
2018
2019 /* Sized to hold 500 translations of average size 1000 bytes. */
2020
2021 #define UNREDIR_SZB 1000
2022
2023 #define N_UNREDIR_TT 500
2024 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
2025
2026 typedef
2027 struct {
2028 VexGuestExtents vge;
2029 Addr hcode;
2030 Bool inUse;
2031 }
2032 UTCEntry;
2033
2034 /* We just allocate forwards in _tc, never deleting. */
2035 static ULong *unredir_tc;
2036 static Int unredir_tc_used = N_UNREDIR_TCQ;
2037
2038 /* Slots in _tt can come into use and out again (.inUse).
2039 Nevertheless _tt_highwater is maintained so that invalidations
2040 don't have to scan all the slots when only a few are in use.
2041 _tt_highwater holds the index of the highest ever allocated
2042 slot. */
2043 static UTCEntry unredir_tt[N_UNREDIR_TT];
2044 static Int unredir_tt_highwater;
2045
2046
init_unredir_tt_tc(void)2047 static void init_unredir_tt_tc ( void )
2048 {
2049 Int i;
2050 if (unredir_tc == NULL) {
2051 SysRes sres = VG_(am_mmap_anon_float_valgrind)
2052 ( N_UNREDIR_TT * UNREDIR_SZB );
2053 if (sr_isError(sres)) {
2054 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
2055 N_UNREDIR_TT * UNREDIR_SZB);
2056 /*NOTREACHED*/
2057 }
2058 unredir_tc = (ULong *)(AddrH)sr_Res(sres);
2059 }
2060 unredir_tc_used = 0;
2061 for (i = 0; i < N_UNREDIR_TT; i++)
2062 unredir_tt[i].inUse = False;
2063 unredir_tt_highwater = -1;
2064 }
2065
2066 /* Do a sanity check; return False on failure. */
sanity_check_redir_tt_tc(void)2067 static Bool sanity_check_redir_tt_tc ( void )
2068 {
2069 Int i;
2070 if (unredir_tt_highwater < -1) return False;
2071 if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
2072
2073 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
2074 if (unredir_tt[i].inUse)
2075 return False;
2076
2077 if (unredir_tc_used < 0) return False;
2078 if (unredir_tc_used > N_UNREDIR_TCQ) return False;
2079
2080 return True;
2081 }
2082
2083
2084 /* Add an UNREDIRECTED translation of vge to TT/TC. The translation
2085 is temporarily in code[0 .. code_len-1].
2086 */
VG_(add_to_unredir_transtab)2087 void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
2088 Addr64 entry,
2089 AddrH code,
2090 UInt code_len )
2091 {
2092 Int i, j, code_szQ;
2093 HChar *srcP, *dstP;
2094
2095 vg_assert(sanity_check_redir_tt_tc());
2096
2097 /* This is the whole point: it's not redirected! */
2098 vg_assert(entry == vge->base[0]);
2099
2100 /* How many unredir_tt slots are needed */
2101 code_szQ = (code_len + 7) / 8;
2102
2103 /* Look for an empty unredir_tc slot */
2104 for (i = 0; i < N_UNREDIR_TT; i++)
2105 if (!unredir_tt[i].inUse)
2106 break;
2107
2108 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
2109 /* It's full; dump everything we currently have */
2110 init_unredir_tt_tc();
2111 i = 0;
2112 }
2113
2114 vg_assert(unredir_tc_used >= 0);
2115 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2116 vg_assert(code_szQ > 0);
2117 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
2118 vg_assert(i >= 0 && i < N_UNREDIR_TT);
2119 vg_assert(unredir_tt[i].inUse == False);
2120
2121 if (i > unredir_tt_highwater)
2122 unredir_tt_highwater = i;
2123
2124 dstP = (HChar*)&unredir_tc[unredir_tc_used];
2125 srcP = (HChar*)code;
2126 for (j = 0; j < code_len; j++)
2127 dstP[j] = srcP[j];
2128
2129 VG_(invalidate_icache)( dstP, code_len );
2130
2131 unredir_tt[i].inUse = True;
2132 unredir_tt[i].vge = *vge;
2133 unredir_tt[i].hcode = (Addr)dstP;
2134
2135 unredir_tc_used += code_szQ;
2136 vg_assert(unredir_tc_used >= 0);
2137 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2138
2139 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
2140 }
2141
VG_(search_unredir_transtab)2142 Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
2143 Addr64 guest_addr )
2144 {
2145 Int i;
2146 for (i = 0; i < N_UNREDIR_TT; i++) {
2147 if (!unredir_tt[i].inUse)
2148 continue;
2149 if (unredir_tt[i].vge.base[0] == guest_addr) {
2150 *result = (AddrH)unredir_tt[i].hcode;
2151 return True;
2152 }
2153 }
2154 return False;
2155 }
2156
unredir_discard_translations(Addr64 guest_start,ULong range)2157 static void unredir_discard_translations( Addr64 guest_start, ULong range )
2158 {
2159 Int i;
2160
2161 vg_assert(sanity_check_redir_tt_tc());
2162
2163 for (i = 0; i <= unredir_tt_highwater; i++) {
2164 if (unredir_tt[i].inUse
2165 && overlaps( guest_start, range, &unredir_tt[i].vge))
2166 unredir_tt[i].inUse = False;
2167 }
2168 }
2169
2170
2171 /*------------------------------------------------------------*/
2172 /*--- Initialisation. ---*/
2173 /*------------------------------------------------------------*/
2174
VG_(init_tt_tc)2175 void VG_(init_tt_tc) ( void )
2176 {
2177 Int i, avg_codeszQ;
2178
2179 vg_assert(!init_done);
2180 init_done = True;
2181
2182 /* Otherwise lots of things go wrong... */
2183 vg_assert(sizeof(ULong) == 8);
2184 vg_assert(sizeof(Addr64) == 8);
2185 /* check fast cache entries really are 2 words long */
2186 vg_assert(sizeof(Addr) == sizeof(void*));
2187 vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
2188 /* check fast cache entries are packed back-to-back with no spaces */
2189 vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
2190 /* check fast cache is aligned as we requested. Not fatal if it
2191 isn't, but we might as well make sure. */
2192 vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
2193
2194 if (VG_(clo_verbosity) > 2)
2195 VG_(message)(Vg_DebugMsg,
2196 "TT/TC: VG_(init_tt_tc) "
2197 "(startup of code management)\n");
2198
2199 /* Figure out how big each tc area should be. */
2200 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8;
2201 tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
2202
2203 /* Ensure the calculated value is not way crazy. */
2204 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
2205 vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR_USABLE);
2206
2207 n_sectors = VG_(clo_num_transtab_sectors);
2208 vg_assert(n_sectors >= MIN_N_SECTORS);
2209 vg_assert(n_sectors <= MAX_N_SECTORS);
2210
2211 /* Initialise the sectors, even the ones we aren't going to use.
2212 Set all fields to zero. */
2213 youngest_sector = 0;
2214 for (i = 0; i < MAX_N_SECTORS; i++)
2215 VG_(memset)(§ors[i], 0, sizeof(sectors[i]));
2216
2217 /* Initialise the sector_search_order hint table, including the
2218 entries we aren't going to use. */
2219 for (i = 0; i < MAX_N_SECTORS; i++)
2220 sector_search_order[i] = -1;
2221
2222 /* Initialise the fast cache. */
2223 invalidateFastCache();
2224
2225 /* and the unredir tt/tc */
2226 init_unredir_tt_tc();
2227
2228 if (VG_(clo_verbosity) > 2 || VG_(clo_stats)
2229 || VG_(debugLog_getLevel) () >= 2) {
2230 VG_(message)(Vg_DebugMsg,
2231 "TT/TC: cache: %d sectors of %d bytes each = %d total\n",
2232 n_sectors, 8 * tc_sector_szQ,
2233 n_sectors * 8 * tc_sector_szQ );
2234 VG_(message)(Vg_DebugMsg,
2235 "TT/TC: table: %d tables of %d bytes each = %d total\n",
2236 n_sectors, (int)(N_TTES_PER_SECTOR * sizeof(TTEntry)),
2237 (int)(n_sectors * N_TTES_PER_SECTOR * sizeof(TTEntry)));
2238 VG_(message)(Vg_DebugMsg,
2239 "TT/TC: table: %d entries each = %d total entries"
2240 " max occupancy %d (%d%%)\n",
2241 N_TTES_PER_SECTOR,
2242 n_sectors * N_TTES_PER_SECTOR,
2243 n_sectors * N_TTES_PER_SECTOR_USABLE,
2244 SECTOR_TT_LIMIT_PERCENT );
2245 }
2246 }
2247
2248
2249 /*------------------------------------------------------------*/
2250 /*--- Printing out statistics. ---*/
2251 /*------------------------------------------------------------*/
2252
safe_idiv(ULong a,ULong b)2253 static ULong safe_idiv( ULong a, ULong b )
2254 {
2255 return (b == 0 ? 0 : a / b);
2256 }
2257
VG_(get_bbs_translated)2258 UInt VG_(get_bbs_translated) ( void )
2259 {
2260 return n_in_count;
2261 }
2262
VG_(print_tt_tc_stats)2263 void VG_(print_tt_tc_stats) ( void )
2264 {
2265 VG_(message)(Vg_DebugMsg,
2266 " tt/tc: %'llu tt lookups requiring %'llu probes\n",
2267 n_full_lookups, n_lookup_probes );
2268 VG_(message)(Vg_DebugMsg,
2269 " tt/tc: %'llu fast-cache updates, %'llu flushes\n",
2270 n_fast_updates, n_fast_flushes );
2271
2272 VG_(message)(Vg_DebugMsg,
2273 " transtab: new %'lld "
2274 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n",
2275 n_in_count, n_in_osize, n_in_tsize,
2276 safe_idiv(10*n_in_tsize, n_in_osize),
2277 n_in_sc_count);
2278 VG_(message)(Vg_DebugMsg,
2279 " transtab: dumped %'llu (%'llu -> ?" "?)\n",
2280 n_dump_count, n_dump_osize );
2281 VG_(message)(Vg_DebugMsg,
2282 " transtab: discarded %'llu (%'llu -> ?" "?)\n",
2283 n_disc_count, n_disc_osize );
2284
2285 if (DEBUG_TRANSTAB) {
2286 Int i;
2287 VG_(printf)("\n");
2288 for (i = 0; i < ECLASS_N; i++) {
2289 VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
2290 if (i % 16 == 15)
2291 VG_(printf)("\n");
2292 }
2293 VG_(printf)("\n\n");
2294 }
2295 }
2296
2297 /*------------------------------------------------------------*/
2298 /*--- Printing out of profiling results. ---*/
2299 /*------------------------------------------------------------*/
2300
score(TTEntry * tte)2301 static ULong score ( TTEntry* tte )
2302 {
2303 return ((ULong)tte->weight) * ((ULong)tte->count);
2304 }
2305
VG_(get_SB_profile)2306 ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops )
2307 {
2308 Int sno, i, r, s;
2309 ULong score_total;
2310
2311 /* First, compute the total weighted count, and find the top N
2312 ttes. tops contains pointers to the most-used n_tops blocks, in
2313 descending order (viz, tops[0] is the highest scorer). */
2314 for (i = 0; i < n_tops; i++) {
2315 tops[i].addr = 0;
2316 tops[i].score = 0;
2317 }
2318
2319 score_total = 0;
2320
2321 for (sno = 0; sno < n_sectors; sno++) {
2322 if (sectors[sno].tc == NULL)
2323 continue;
2324 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2325 if (sectors[sno].tt[i].status != InUse)
2326 continue;
2327 score_total += score(§ors[sno].tt[i]);
2328 /* Find the rank for sectors[sno].tt[i]. */
2329 r = n_tops-1;
2330 while (True) {
2331 if (r == -1)
2332 break;
2333 if (tops[r].addr == 0) {
2334 r--;
2335 continue;
2336 }
2337 if ( score(§ors[sno].tt[i]) > tops[r].score ) {
2338 r--;
2339 continue;
2340 }
2341 break;
2342 }
2343 r++;
2344 vg_assert(r >= 0 && r <= n_tops);
2345 /* This bb should be placed at r, and bbs above it shifted
2346 upwards one slot. */
2347 if (r < n_tops) {
2348 for (s = n_tops-1; s > r; s--)
2349 tops[s] = tops[s-1];
2350 tops[r].addr = sectors[sno].tt[i].entry;
2351 tops[r].score = score( §ors[sno].tt[i] );
2352 }
2353 }
2354 }
2355
2356 /* Now zero out all the counter fields, so that we can make
2357 multiple calls here and just get the values since the last call,
2358 each time, rather than values accumulated for the whole run. */
2359 for (sno = 0; sno < n_sectors; sno++) {
2360 if (sectors[sno].tc == NULL)
2361 continue;
2362 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2363 if (sectors[sno].tt[i].status != InUse)
2364 continue;
2365 sectors[sno].tt[i].count = 0;
2366 }
2367 }
2368
2369 return score_total;
2370 }
2371
2372 /*--------------------------------------------------------------------*/
2373 /*--- end ---*/
2374 /*--------------------------------------------------------------------*/
2375