1
2 /*--------------------------------------------------------------------*/
3 /*--- Management of the translation table and cache. ---*/
4 /*--- m_transtab.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2000-2017 Julian Seward
12 jseward@acm.org
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30 */
31
32 #include "pub_core_basics.h"
33 #include "pub_core_debuglog.h"
34 #include "pub_core_machine.h" // For VG_(machine_get_VexArchInfo)
35 #include "pub_core_libcbase.h"
36 #include "pub_core_vki.h" // to keep pub_core_libproc.h happy, sigh
37 #include "pub_core_libcproc.h" // VG_(invalidate_icache)
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcprint.h"
40 #include "pub_core_options.h"
41 #include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB
42 #include "pub_core_transtab.h"
43 #include "pub_core_aspacemgr.h"
44 #include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
45 #include "pub_core_xarray.h"
46 #include "pub_core_dispatch.h" // For VG_(disp_cp*) addresses
47
48
49 #define DEBUG_TRANSTAB 0
50
51
52 /*-------------------------------------------------------------*/
53 /*--- Management of the FIFO-based translation table+cache. ---*/
54 /*-------------------------------------------------------------*/
55
56 /* Nr of sectors provided via command line parameter. */
57 UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT;
58 /* Nr of sectors.
59 Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */
60 static SECno n_sectors = 0;
61
62 /* Average size of a transtab code entry. 0 means to use the tool
63 provided default. */
64 UInt VG_(clo_avg_transtab_entry_size) = 0;
65
66 /*------------------ CONSTANTS ------------------*/
67 /* Number of entries in hash table of each sector. This needs to be a prime
68 number to work properly, it must be <= 65535 (so that a TTE index
69 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED, HTT_DELETED)
70 to denote 'deleted') and 0xFFFE (HTT_EMPTY) to denote 'Empty' in the
71 hash table.
72 It is strongly recommended not to change this.
73 65521 is the largest prime <= 65535. */
74 #define N_HTTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
75
76 #define EC2TTE_DELETED 0xFFFF /* 16-bit special value */
77 #define HTT_DELETED EC2TTE_DELETED
78 #define HTT_EMPTY 0XFFFE
79
80 // HTTno is the Sector->htt hash table index. Must be the same type as TTEno.
81 typedef UShort HTTno;
82
83 /* Because each sector contains a hash table of TTEntries, we need to
84 specify the maximum allowable loading, after which the sector is
85 deemed full. */
86 #define SECTOR_TT_LIMIT_PERCENT 65
87
88 /* The sector is deemed full when this many entries are in it. */
89 #define N_TTES_PER_SECTOR \
90 ((N_HTTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
91
92 /* Equivalence classes for fast address range deletion. There are 1 +
93 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an
94 address range which does not fall cleanly within any specific bin.
95 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32.
96 ECLASS_N must fit in a EclassNo. */
97 #define ECLASS_SHIFT 13
98 #define ECLASS_WIDTH 9
99 #define ECLASS_MISC (1 << ECLASS_WIDTH)
100 #define ECLASS_N (1 + ECLASS_MISC)
101 STATIC_ASSERT(ECLASS_SHIFT + ECLASS_WIDTH < 32);
102
103 typedef UShort EClassNo;
104
105 /*------------------ TYPES ------------------*/
106
107 /* In edges ("to-me") in the graph created by chaining. */
108 typedef
109 struct {
110 SECno from_sNo; /* sector number */
111 TTEno from_tteNo; /* TTE number in given sector */
112 UInt from_offs: (sizeof(UInt)*8)-1; /* code offset from TCEntry::tcptr
113 where the patch is */
114 Bool to_fastEP:1; /* Is the patch to a fast or slow entry point? */
115 }
116 InEdge;
117
118
119 /* Out edges ("from-me") in the graph created by chaining. */
120 typedef
121 struct {
122 SECno to_sNo; /* sector number */
123 TTEno to_tteNo; /* TTE number in given sector */
124 UInt from_offs; /* code offset in owning translation where patch is */
125 }
126 OutEdge;
127
128
129 #define N_FIXED_IN_EDGE_ARR 3
130 typedef
131 struct {
132 Bool has_var:1; /* True if var is used (then n_fixed must be 0) */
133 UInt n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_IN_EDGE_ARR */
134 union {
135 InEdge fixed[N_FIXED_IN_EDGE_ARR]; /* if !has_var */
136 XArray* var; /* XArray* of InEdgeArr */ /* if has_var */
137 } edges;
138 }
139 InEdgeArr;
140
141 #define N_FIXED_OUT_EDGE_ARR 2
142 typedef
143 struct {
144 Bool has_var:1; /* True if var is used (then n_fixed must be 0) */
145 UInt n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_OUT_EDGE_ARR */
146 union {
147 OutEdge fixed[N_FIXED_OUT_EDGE_ARR]; /* if !has_var */
148 XArray* var; /* XArray* of OutEdgeArr */ /* if has_var */
149 } edges;
150 }
151 OutEdgeArr;
152
153
154 /* A translation-table entry. This indicates precisely which areas of
155 guest code are included in the translation, and contains all other
156 auxiliary info too. These are split into hold and cold parts,
157 TTEntryH and TTEntryC, so as to be more cache-friendly
158 (a.k.a. "faster") when searching for translations that intersect
159 with a certain guest code address range, for deletion. In the
160 worst case this can involve a sequential scan through all the hot
161 parts, so they are packed as compactly as possible -- 32 bytes on a
162 64-bit platform, 20 bytes on a 32-bit platform.
163
164 Each TTEntry is logically a matching cold-and-hot pair, and in fact
165 it was originally one structure. First, the cold part.
166 */
167 typedef
168 struct {
169 union {
170 struct {
171 /* Profiling only: the count and weight (arbitrary meaning) for
172 this translation. Weight is a property of the translation
173 itself and computed once when the translation is created.
174 Count is an entry count for the translation and is
175 incremented by 1 every time the translation is used, if we
176 are profiling. */
177 ULong count;
178 UShort weight;
179 } prof; // if status == InUse
180 TTEno next_empty_tte; // if status != InUse
181 } usage;
182
183 /* 64-bit aligned pointer to one or more 64-bit words containing
184 the corresponding host code (must be in the same sector!)
185 This is a pointer into the sector's tc (code) area. */
186 ULong* tcptr;
187
188 /* This is the original guest address that purportedly is the
189 entry point of the translation. You might think that .entry
190 should be the same as .vge->base[0], and most of the time it
191 is. However, when doing redirections, that is not the case.
192 .vge must always correctly describe the guest code sections
193 from which this translation was made. However, .entry may or
194 may not be a lie, depending on whether or not we're doing
195 redirection. */
196 Addr entry;
197
198 /* Address range summary info: these are pointers back to
199 eclass[] entries in the containing Sector. Those entries in
200 turn point back here -- the two structures are mutually
201 redundant but both necessary to make fast deletions work.
202 The eclass info is similar to, and derived from, this entry's
203 'vge' field, but it is not the same */
204 UShort n_tte2ec; // # tte2ec pointers (1 to 3)
205 EClassNo tte2ec_ec[3]; // for each, the eclass #
206 UInt tte2ec_ix[3]; // and the index within the eclass.
207 // for i in 0 .. n_tte2ec-1
208 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
209 // should be the index
210 // of this TTEntry in the containing Sector's tt array.
211
212 /* Admin information for chaining. 'in_edges' is a set of the
213 patch points which jump to this translation -- hence are
214 predecessors in the control flow graph. 'out_edges' points
215 to successors in the control flow graph -- translations to
216 which this one has a patched jump. In short these are just
217 backwards and forwards edges in the graph of patched-together
218 blocks. The 'in_edges' contain slightly more info, enough
219 that we can undo the chaining of each mentioned patch point.
220 The 'out_edges' list exists only so that we can visit the
221 'in_edges' entries of all blocks we're patched through to, in
222 order to remove ourselves from then when we're deleted. */
223
224 /* A translation can disappear for two reasons:
225 1. erased (as part of the oldest sector cleanup) when the
226 youngest sector is full.
227 2. discarded due to calls to VG_(discard_translations).
228 VG_(discard_translations) sets the status of the
229 translation to 'Deleted'.
230 A.o., the gdbserver discards one or more translations
231 when a breakpoint is inserted or removed at an Addr,
232 or when single stepping mode is enabled/disabled
233 or when a translation is instrumented for gdbserver
234 (all the target jumps of this translation are
235 invalidated).
236
237 So, it is possible that the translation A to be patched
238 (to obtain a patched jump from A to B) is invalidated
239 after B is translated and before A is patched.
240 In case a translation is erased or discarded, the patching
241 cannot be done. VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
242 are checking the 'from' translation still exists before
243 doing the patching.
244
245 Is it safe to erase or discard the current translation E being
246 executed ? Amazing, but yes, it is safe.
247 Here is the explanation:
248
249 The translation E being executed can only be erased if a new
250 translation N is being done. A new translation is done only
251 if the host addr is a not yet patched jump to another
252 translation. In such a case, the guest address of N is
253 assigned to the PC in the VEX state. Control is returned
254 to the scheduler. N will be translated. This can erase the
255 translation E (in case of sector full). VG_(tt_tc_do_chaining)
256 will not do the chaining to a non found translation E.
257 The execution will continue at the current guest PC
258 (i.e. the translation N).
259 => it is safe to erase the current translation being executed.
260
261 The current translation E being executed can also be discarded
262 (e.g. by gdbserver). VG_(discard_translations) will mark
263 this translation E as Deleted, but the translation itself
264 is not erased. In particular, its host code can only
265 be overwritten or erased in case a new translation is done.
266 A new translation will only be done if a not yet translated
267 jump is to be executed. The execution of the Deleted translation
268 E will continue till a non patched jump is encountered.
269 This situation is then similar to the 'erasing' case above :
270 the current translation E can be erased or overwritten, as the
271 execution will continue at the new translation N.
272 */
273
274 /* It is possible, although very unlikely, that a block A has
275 more than one patched jump to block B. This could happen if
276 (eg) A finishes "jcond B; jmp B".
277
278 This means in turn that B's in_edges set can list A more than
279 once (twice in this example). However, each such entry must
280 have a different from_offs, since a patched jump can only
281 jump to one place at once (it's meaningless for it to have
282 multiple destinations.) IOW, the successor and predecessor
283 edges in the graph are not uniquely determined by a
284 TTEntry --> TTEntry pair, but rather by a
285 (TTEntry,offset) --> TTEntry triple.
286
287 If A has multiple edges to B then B will mention A multiple
288 times in its in_edges. To make things simpler, we then
289 require that A mentions B exactly the same number of times in
290 its out_edges. Furthermore, a matching out-in pair must have
291 the same offset (from_offs). This facilitates sanity
292 checking, and it facilitates establishing the invariant that
293 a out_edges set may not have duplicates when using the
294 equality defined by (TTEntry,offset). Hence the out_edges
295 and in_edges sets really do have both have set semantics.
296
297 eg if A has been patched to B at offsets 42 and 87 (in A)
298 then A.out_edges = { (B,42), (B,87) } (in any order)
299 and B.in_edges = { (A,42), (A,87) } (in any order)
300
301 Hence for each node pair P->Q in the graph, there's a 1:1
302 mapping between P.out_edges and Q.in_edges.
303 */
304 InEdgeArr in_edges;
305 OutEdgeArr out_edges;
306 }
307 TTEntryC;
308
309 /* And this is the hot part. */
310 typedef
311 struct {
312 /* This structure describes precisely what ranges of guest code
313 the translation covers, so we can decide whether or not to
314 delete it when translations of a given address range are
315 invalidated. Originally this was a VexGuestExtents, but that
316 itself is 32 bytes on a 64-bit target, and we really want to
317 squeeze in an 8-bit |status| field into the 32 byte field, so
318 as to get 2 of them in a 64 byte LLC line. Hence the
319 VexGuestExtents fields are inlined, the _n_used field is
320 changed to a UChar (it only ever has the values 1, 2 or 3)
321 and the 8-bit status field is placed in byte 31 of the
322 structure. */
323 /* ORIGINALLY: VexGuestExtents vge; */
324 Addr vge_base[3];
325 UShort vge_len[3];
326 UChar vge_n_used; /* BEWARE: is UShort in VexGuestExtents */
327
328 /* Status of the slot. Note, we need to be able to do lazy
329 deletion, hence the Deleted state. */
330 enum { InUse, Deleted, Empty } status : 8;
331 }
332 TTEntryH;
333
334
335 /* Impedance matchers, that convert between a VexGuestExtents and a
336 TTEntryH, ignoring TTEntryH::status, which doesn't exist in a
337 VexGuestExtents -- it is entirely unrelated. */
338
339 /* Takes a VexGuestExtents and pushes it into a TTEntryH. The
340 TTEntryH.status field is left unchanged. */
341 static
TTEntryH__from_VexGuestExtents(TTEntryH * tteH,const VexGuestExtents * vge)342 inline void TTEntryH__from_VexGuestExtents ( /*MOD*/TTEntryH* tteH,
343 const VexGuestExtents* vge )
344 {
345 tteH->vge_base[0] = vge->base[0];
346 tteH->vge_base[1] = vge->base[1];
347 tteH->vge_base[2] = vge->base[2];
348 tteH->vge_len[0] = vge->len[0];
349 tteH->vge_len[1] = vge->len[1];
350 tteH->vge_len[2] = vge->len[2];
351 tteH->vge_n_used = (UChar)vge->n_used; /* BEWARE: no range check. */
352 }
353
354 /* Takes a TTEntryH and pushes the vge_ components into a VexGuestExtents. */
355 static
TTEntryH__to_VexGuestExtents(VexGuestExtents * vge,const TTEntryH * tteH)356 inline void TTEntryH__to_VexGuestExtents ( /*MOD*/VexGuestExtents* vge,
357 const TTEntryH* tteH )
358 {
359 vge->base[0] = tteH->vge_base[0];
360 vge->base[1] = tteH->vge_base[1];
361 vge->base[2] = tteH->vge_base[2];
362 vge->len[0] = tteH->vge_len[0] ;
363 vge->len[1] = tteH->vge_len[1] ;
364 vge->len[2] = tteH->vge_len[2] ;
365 vge->n_used = (UShort)tteH->vge_n_used ;
366 }
367
368
369 /* A structure used for mapping host code addresses back to the
370 relevant TTEntry. Used when doing chaining, for finding the
371 TTEntry to which some arbitrary patch address belongs. */
372 typedef
373 struct {
374 UChar* start;
375 UInt len;
376 TTEno tteNo;
377 }
378 HostExtent;
379
380 /* Finally, a sector itself. Each sector contains an array of
381 TCEntries, which hold code, and an array of TTEntries, containing
382 all required administrative info. Profiling is supported using the
383 TTEntry usage.prof.count and usage.prof.weight fields, if required.
384
385 If the sector is not in use, all three pointers are NULL and
386 tt_n_inuse is zero.
387 */
388 typedef
389 struct {
390 /* The TCEntry area. Size of this depends on the average
391 translation size. We try and size it so it becomes full
392 precisely when this sector's translation table (tt) reaches
393 its load limit (SECTOR_TT_LIMIT_PERCENT). */
394 ULong* tc;
395
396 /* An hash table, mapping guest address to an index in the tt array.
397 htt is a fixed size, always containing
398 exactly N_HTTES_PER_SECTOR entries. */
399 TTEno* htt;
400
401 /* The TTEntry{C,H} arrays. These are a fixed size, always
402 containing exactly N_TTES_PER_SECTOR entries. */
403 TTEntryC* ttC;
404 TTEntryH* ttH;
405
406 /* This points to the current allocation point in tc. */
407 ULong* tc_next;
408
409 /* The count of tt entries with state InUse. */
410 Int tt_n_inuse;
411
412 /* A list of Empty/Deleted entries, chained by tte->next_empty_tte */
413 TTEno empty_tt_list;
414
415 /* Expandable arrays of tt indices for each of the ECLASS_N
416 address range equivalence classes. These hold indices into
417 the containing sector's tt array, which in turn should point
418 back here. */
419 Int ec2tte_size[ECLASS_N];
420 Int ec2tte_used[ECLASS_N];
421 TTEno* ec2tte[ECLASS_N];
422
423 /* The host extents. The [start, +len) ranges are constructed
424 in strictly non-overlapping order, so we can binary search
425 them at any time. */
426 XArray* host_extents; /* XArray* of HostExtent */
427 }
428 Sector;
429
430
431 /*------------------ DECLS ------------------*/
432
433 /* The root data structure is an array of sectors. The index of the
434 youngest sector is recorded, and new translations are put into that
435 sector. When it fills up, we move along to the next sector and
436 start to fill that up, wrapping around at the end of the array.
437 That way, once all N_TC_SECTORS have been bought into use for the
438 first time, and are full, we then re-use the oldest sector,
439 endlessly.
440
441 When running, youngest sector should be between >= 0 and <
442 N_TC_SECTORS. The initial value indicates the TT/TC system is
443 not yet initialised.
444 */
445 static Sector sectors[MAX_N_SECTORS];
446 static Int youngest_sector = INV_SNO;
447
448 /* The number of ULongs in each TCEntry area. This is computed once
449 at startup and does not change. */
450 static Int tc_sector_szQ = 0;
451
452
453 /* A list of sector numbers, in the order which they should be
454 searched to find translations. This is an optimisation to be used
455 when searching for translations and should not affect
456 correctness. INV_SNO denotes "no entry". */
457 static SECno sector_search_order[MAX_N_SECTORS];
458
459
460 /* Fast helper for the TC. A direct-mapped cache which holds a set of
461 recently used (guest address, host address) pairs. This array is
462 referred to directly from m_dispatch/dispatch-<platform>.S.
463
464 Entries in tt_fast may refer to any valid TC entry, regardless of
465 which sector it's in. Consequently we must be very careful to
466 invalidate this cache when TC entries are changed or disappear.
467
468 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
469 pointed at to cause that cache entry to miss. This relies on the
470 assumption that no guest code actually has that address, hence a
471 value 0x1 seems good. m_translate gives the client a synthetic
472 segfault if it tries to execute at this address.
473 */
474 /*
475 typedef
476 struct {
477 Addr guest;
478 Addr host;
479 }
480 FastCacheEntry;
481 */
482 /*global*/ __attribute__((aligned(16)))
483 FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
484
485 /* Make sure we're not used before initialisation. */
486 static Bool init_done = False;
487
488
489 /*------------------ STATS DECLS ------------------*/
490
491 /* Number of fast-cache updates and flushes done. */
492 static ULong n_fast_flushes = 0;
493 static ULong n_fast_updates = 0;
494
495 /* Number of full lookups done. */
496 static ULong n_full_lookups = 0;
497 static ULong n_lookup_probes = 0;
498
499 /* Number/osize/tsize of translations entered; also the number of
500 those for which self-checking was requested. */
501 static ULong n_in_count = 0;
502 static ULong n_in_osize = 0;
503 static ULong n_in_tsize = 0;
504 static ULong n_in_sc_count = 0;
505
506 /* Number/osize of translations discarded due to lack of space. */
507 static ULong n_dump_count = 0;
508 static ULong n_dump_osize = 0;
509 static ULong n_sectors_recycled = 0;
510
511 /* Number/osize of translations discarded due to requests to do so. */
512 static ULong n_disc_count = 0;
513 static ULong n_disc_osize = 0;
514
515
516 /*-------------------------------------------------------------*/
517 /*--- Misc ---*/
518 /*-------------------------------------------------------------*/
519
ttaux_malloc(const HChar * tag,SizeT n)520 static void* ttaux_malloc ( const HChar* tag, SizeT n )
521 {
522 return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
523 }
524
ttaux_free(void * p)525 static void ttaux_free ( void* p )
526 {
527 VG_(arena_free)(VG_AR_TTAUX, p);
528 }
529
530
531 /*-------------------------------------------------------------*/
532 /*--- Chaining support ---*/
533 /*-------------------------------------------------------------*/
534
index_tteC(SECno sNo,TTEno tteNo)535 static inline TTEntryC* index_tteC ( SECno sNo, TTEno tteNo )
536 {
537 vg_assert(sNo < n_sectors);
538 vg_assert(tteNo < N_TTES_PER_SECTOR);
539 Sector* s = §ors[sNo];
540 vg_assert(s->ttC && s->ttH);
541 TTEntryC* tteC = &s->ttC[tteNo];
542 TTEntryH* tteH = &s->ttH[tteNo];
543 vg_assert(tteH->status == InUse);
544 return tteC;
545 }
546
index_tteH(SECno sNo,TTEno tteNo)547 static inline TTEntryH* index_tteH ( SECno sNo, TTEno tteNo )
548 {
549 vg_assert(sNo < n_sectors);
550 vg_assert(tteNo < N_TTES_PER_SECTOR);
551 Sector* s = §ors[sNo];
552 vg_assert(s->ttH);
553 TTEntryH* tteH = &s->ttH[tteNo];
554 vg_assert(tteH->status == InUse);
555 return tteH;
556 }
557
InEdge__init(InEdge * ie)558 static void InEdge__init ( InEdge* ie )
559 {
560 ie->from_sNo = INV_SNO; /* invalid */
561 ie->from_tteNo = 0;
562 ie->from_offs = 0;
563 ie->to_fastEP = False;
564 }
565
OutEdge__init(OutEdge * oe)566 static void OutEdge__init ( OutEdge* oe )
567 {
568 oe->to_sNo = INV_SNO; /* invalid */
569 oe->to_tteNo = 0;
570 oe->from_offs = 0;
571 }
572
TTEntryC__init(TTEntryC * tteC)573 static void TTEntryC__init ( TTEntryC* tteC )
574 {
575 VG_(bzero_inline)(tteC, sizeof(*tteC));
576 }
577
TTEntryH__init(TTEntryH * tteH)578 static void TTEntryH__init ( TTEntryH* tteH )
579 {
580 VG_(bzero_inline)(tteH, sizeof(*tteH));
581 }
582
InEdgeArr__size(const InEdgeArr * iea)583 static UWord InEdgeArr__size ( const InEdgeArr* iea )
584 {
585 if (iea->has_var) {
586 vg_assert(iea->n_fixed == 0);
587 return VG_(sizeXA)(iea->edges.var);
588 } else {
589 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
590 return iea->n_fixed;
591 }
592 }
593
InEdgeArr__makeEmpty(InEdgeArr * iea)594 static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
595 {
596 if (iea->has_var) {
597 vg_assert(iea->n_fixed == 0);
598 VG_(deleteXA)(iea->edges.var);
599 iea->edges.var = NULL;
600 iea->has_var = False;
601 } else {
602 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
603 iea->n_fixed = 0;
604 }
605 }
606
607 static
InEdgeArr__index(InEdgeArr * iea,UWord i)608 InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
609 {
610 if (iea->has_var) {
611 vg_assert(iea->n_fixed == 0);
612 return (InEdge*)VG_(indexXA)(iea->edges.var, i);
613 } else {
614 vg_assert(i < iea->n_fixed);
615 return &iea->edges.fixed[i];
616 }
617 }
618
619 static
InEdgeArr__deleteIndex(InEdgeArr * iea,UWord i)620 void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
621 {
622 if (iea->has_var) {
623 vg_assert(iea->n_fixed == 0);
624 VG_(removeIndexXA)(iea->edges.var, i);
625 } else {
626 vg_assert(i < iea->n_fixed);
627 for (; i+1 < iea->n_fixed; i++) {
628 iea->edges.fixed[i] = iea->edges.fixed[i+1];
629 }
630 iea->n_fixed--;
631 }
632 }
633
634 static
InEdgeArr__add(InEdgeArr * iea,InEdge * ie)635 void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
636 {
637 if (iea->has_var) {
638 vg_assert(iea->n_fixed == 0);
639 VG_(addToXA)(iea->edges.var, ie);
640 } else {
641 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
642 if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
643 /* The fixed array is full, so we have to initialise an
644 XArray and copy the fixed array into it. */
645 XArray *var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
646 ttaux_free,
647 sizeof(InEdge));
648 VG_(hintSizeXA) (var, iea->n_fixed + 1);
649 UWord i;
650 for (i = 0; i < iea->n_fixed; i++) {
651 VG_(addToXA)(var, &iea->edges.fixed[i]);
652 }
653 VG_(addToXA)(var, ie);
654 iea->n_fixed = 0;
655 iea->has_var = True;
656 iea->edges.var = var;
657 } else {
658 /* Just add to the fixed array. */
659 iea->edges.fixed[iea->n_fixed++] = *ie;
660 }
661 }
662 }
663
OutEdgeArr__size(const OutEdgeArr * oea)664 static UWord OutEdgeArr__size ( const OutEdgeArr* oea )
665 {
666 if (oea->has_var) {
667 vg_assert(oea->n_fixed == 0);
668 return VG_(sizeXA)(oea->edges.var);
669 } else {
670 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
671 return oea->n_fixed;
672 }
673 }
674
OutEdgeArr__makeEmpty(OutEdgeArr * oea)675 static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
676 {
677 if (oea->has_var) {
678 vg_assert(oea->n_fixed == 0);
679 VG_(deleteXA)(oea->edges.var);
680 oea->edges.var = NULL;
681 oea->has_var = False;
682 } else {
683 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
684 oea->n_fixed = 0;
685 }
686 }
687
688 static
OutEdgeArr__index(OutEdgeArr * oea,UWord i)689 OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
690 {
691 if (oea->has_var) {
692 vg_assert(oea->n_fixed == 0);
693 return (OutEdge*)VG_(indexXA)(oea->edges.var, i);
694 } else {
695 vg_assert(i < oea->n_fixed);
696 return &oea->edges.fixed[i];
697 }
698 }
699
700 static
OutEdgeArr__deleteIndex(OutEdgeArr * oea,UWord i)701 void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
702 {
703 if (oea->has_var) {
704 vg_assert(oea->n_fixed == 0);
705 VG_(removeIndexXA)(oea->edges.var, i);
706 } else {
707 vg_assert(i < oea->n_fixed);
708 for (; i+1 < oea->n_fixed; i++) {
709 oea->edges.fixed[i] = oea->edges.fixed[i+1];
710 }
711 oea->n_fixed--;
712 }
713 }
714
715 static
OutEdgeArr__add(OutEdgeArr * oea,OutEdge * oe)716 void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
717 {
718 if (oea->has_var) {
719 vg_assert(oea->n_fixed == 0);
720 VG_(addToXA)(oea->edges.var, oe);
721 } else {
722 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
723 if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
724 /* The fixed array is full, so we have to initialise an
725 XArray and copy the fixed array into it. */
726 XArray *var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
727 ttaux_free,
728 sizeof(OutEdge));
729 VG_(hintSizeXA) (var, oea->n_fixed+1);
730 UWord i;
731 for (i = 0; i < oea->n_fixed; i++) {
732 VG_(addToXA)(var, &oea->edges.fixed[i]);
733 }
734 VG_(addToXA)(var, oe);
735 oea->n_fixed = 0;
736 oea->has_var = True;
737 oea->edges.var = var;
738 } else {
739 /* Just add to the fixed array. */
740 oea->edges.fixed[oea->n_fixed++] = *oe;
741 }
742 }
743 }
744
745 static
HostExtent__cmpOrd(const void * v1,const void * v2)746 Int HostExtent__cmpOrd ( const void* v1, const void* v2 )
747 {
748 const HostExtent* hx1 = v1;
749 const HostExtent* hx2 = v2;
750 if (hx1->start + hx1->len <= hx2->start) return -1;
751 if (hx2->start + hx2->len <= hx1->start) return 1;
752 return 0; /* partial overlap */
753 }
754
755 /* True if hx is a dead host extent, i.e. corresponds to host code
756 of an entry that was invalidated. */
757 static
HostExtent__is_dead(const HostExtent * hx,const Sector * sec)758 Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec)
759 {
760 const TTEno tteNo = hx->tteNo;
761 #define LDEBUG(m) if (DEBUG_TRANSTAB) \
762 VG_(printf) (m \
763 " start 0x%p len %u sector %d ttslot %u" \
764 " tt.entry 0x%lu tt.tcptr 0x%p\n", \
765 hx->start, hx->len, (int)(sec - sectors), \
766 hx->tteNo, \
767 sec->ttC[tteNo].entry, sec->ttC[tteNo].tcptr)
768
769 /* Entry might have been invalidated and not re-used yet.*/
770 if (sec->ttH[tteNo].status == Deleted) {
771 LDEBUG("found deleted entry");
772 return True;
773 }
774 /* Maybe we found this entry via a host_extents which was
775 inserted for an entry which was changed to Deleted then
776 re-used after. If this entry was re-used, then its tcptr
777 is >= to host_extents start (i.e. the previous tcptr) + len.
778 This is the case as there is no re-use of host code: a new
779 entry or re-used entry always gets "higher value" host code. */
780 if ((UChar*) sec->ttC[tteNo].tcptr >= hx->start + hx->len) {
781 LDEBUG("found re-used entry");
782 return True;
783 }
784
785 return False;
786 #undef LDEBUG
787 }
788
789 static __attribute__((noinline))
find_TTEntry_from_hcode(SECno * from_sNo,TTEno * from_tteNo,void * hcode)790 Bool find_TTEntry_from_hcode( /*OUT*/SECno* from_sNo,
791 /*OUT*/TTEno* from_tteNo,
792 void* hcode )
793 {
794 SECno i;
795
796 /* Search order logic copied from VG_(search_transtab). */
797 for (i = 0; i < n_sectors; i++) {
798 SECno sno = sector_search_order[i];
799 if (UNLIKELY(sno == INV_SNO))
800 return False; /* run out of sectors to search */
801
802 const Sector* sec = §ors[sno];
803 const XArray* /* of HostExtent */ host_extents = sec->host_extents;
804 vg_assert(host_extents);
805
806 HostExtent key;
807 VG_(memset)(&key, 0, sizeof(key));
808 key.start = hcode;
809 key.len = 1;
810 Word firstW = -1, lastW = -1;
811 Bool found = VG_(lookupXA_UNSAFE)(
812 host_extents, &key, &firstW, &lastW,
813 HostExtent__cmpOrd );
814 vg_assert(firstW == lastW); // always true, even if not found
815 if (found) {
816 HostExtent* hx = VG_(indexXA)(host_extents, firstW);
817 TTEno tteNo = hx->tteNo;
818 /* Do some additional sanity checks. */
819 vg_assert(tteNo < N_TTES_PER_SECTOR);
820
821 /* if this hx entry corresponds to dead host code, we must
822 tell this code has not been found, as it cannot be patched. */
823 if (HostExtent__is_dead (hx, sec))
824 return False;
825
826 vg_assert(sec->ttH[tteNo].status == InUse);
827 /* Can only half check that the found TTEntry contains hcode,
828 due to not having a length value for the hcode in the
829 TTEntry. */
830 vg_assert((UChar*)sec->ttC[tteNo].tcptr <= (UChar*)hcode);
831 /* Looks plausible */
832 *from_sNo = sno;
833 *from_tteNo = tteNo;
834 return True;
835 }
836 }
837 return False;
838 }
839
840
841 /* Figure out whether or not hcode is jitted code present in the main
842 code cache (but not in the no-redir cache). Used for sanity
843 checking. */
is_in_the_main_TC(const void * hcode)844 static Bool is_in_the_main_TC ( const void* hcode )
845 {
846 SECno i, sno;
847 for (i = 0; i < n_sectors; i++) {
848 sno = sector_search_order[i];
849 if (sno == INV_SNO)
850 break; /* run out of sectors to search */
851 if ((const UChar*)hcode >= (const UChar*)sectors[sno].tc
852 && (const UChar*)hcode <= (const UChar*)sectors[sno].tc_next
853 + sizeof(ULong) - 1)
854 return True;
855 }
856 return False;
857 }
858
859
860 /* Fulfill a chaining request, and record admin info so we
861 can undo it later, if required.
862 */
VG_(tt_tc_do_chaining)863 void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
864 SECno to_sNo,
865 TTEno to_tteNo,
866 Bool to_fastEP )
867 {
868 /* Get the CPU info established at startup. */
869 VexArch arch_host = VexArch_INVALID;
870 VexArchInfo archinfo_host;
871 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
872 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
873 VexEndness endness_host = archinfo_host.endness;
874
875 // host_code is where we're patching to. So it needs to
876 // take into account, whether we're jumping to the slow
877 // or fast entry point. By definition, the fast entry point
878 // is exactly one event check's worth of code along from
879 // the slow (tcptr) entry point.
880 TTEntryC* to_tteC = index_tteC(to_sNo, to_tteNo);
881 void* host_code = ((UChar*)to_tteC->tcptr)
882 + (to_fastEP ? LibVEX_evCheckSzB(arch_host) : 0);
883
884 // stay sane -- the patch point (dst) is in this sector's code cache
885 vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
886 vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
887 + sizeof(ULong) - 1 );
888
889 /* Find the TTEntry for the from__ code. This isn't simple since
890 we only know the patch address, which is going to be somewhere
891 inside the from_ block. */
892 SECno from_sNo = INV_SNO;
893 TTEno from_tteNo = INV_TTE;
894 Bool from_found
895 = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
896 from__patch_addr );
897 if (!from_found) {
898 // The from code might have been discarded due to sector re-use
899 // or marked Deleted due to translation invalidation.
900 // In such a case, don't do the chaining.
901 VG_(debugLog)(1,"transtab",
902 "host code %p not found (discarded? sector recycled?)"
903 " => no chaining done\n",
904 from__patch_addr);
905 return;
906 }
907
908 TTEntryC* from_tteC = index_tteC(from_sNo, from_tteNo);
909
910 /* Get VEX to do the patching itself. We have to hand it off
911 since it is host-dependent. */
912 VexInvalRange vir
913 = LibVEX_Chain(
914 arch_host, endness_host,
915 from__patch_addr,
916 VG_(fnptr_to_fnentry)(
917 to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
918 : &VG_(disp_cp_chain_me_to_slowEP)),
919 (void*)host_code
920 );
921 VG_(invalidate_icache)( (void*)vir.start, vir.len );
922
923 /* Now do the tricky bit -- update the ch_succs and ch_preds info
924 for the two translations involved, so we can undo the chaining
925 later, which we will have to do if the to_ block gets removed
926 for whatever reason. */
927
928 /* This is the new from_ -> to_ link to add. */
929 InEdge ie;
930 InEdge__init(&ie);
931 ie.from_sNo = from_sNo;
932 ie.from_tteNo = from_tteNo;
933 ie.to_fastEP = to_fastEP;
934 HWord from_offs = (HWord)( (UChar*)from__patch_addr
935 - (UChar*)from_tteC->tcptr );
936 vg_assert(from_offs < 100000/* let's say */);
937 ie.from_offs = (UInt)from_offs;
938
939 /* This is the new to_ -> from_ backlink to add. */
940 OutEdge oe;
941 OutEdge__init(&oe);
942 oe.to_sNo = to_sNo;
943 oe.to_tteNo = to_tteNo;
944 oe.from_offs = (UInt)from_offs;
945
946 /* Add .. */
947 InEdgeArr__add(&to_tteC->in_edges, &ie);
948 OutEdgeArr__add(&from_tteC->out_edges, &oe);
949 }
950
951
952 /* Unchain one patch, as described by the specified InEdge. For
953 sanity check purposes only (to check that the patched location is
954 as expected) it also requires the fast and slow entry point
955 addresses of the destination block (that is, the block that owns
956 this InEdge). */
957 __attribute__((noinline))
unchain_one(VexArch arch_host,VexEndness endness_host,InEdge * ie,void * to_fastEPaddr,void * to_slowEPaddr)958 static void unchain_one ( VexArch arch_host, VexEndness endness_host,
959 InEdge* ie,
960 void* to_fastEPaddr, void* to_slowEPaddr )
961 {
962 vg_assert(ie);
963 TTEntryC* tteC
964 = index_tteC(ie->from_sNo, ie->from_tteNo);
965 UChar* place_to_patch
966 = ((UChar*)tteC->tcptr) + ie->from_offs;
967 UChar* disp_cp_chain_me
968 = VG_(fnptr_to_fnentry)(
969 ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
970 : &VG_(disp_cp_chain_me_to_slowEP)
971 );
972 UChar* place_to_jump_to_EXPECTED
973 = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
974
975 // stay sane: both src and dst for this unchaining are
976 // in the main code cache
977 vg_assert( is_in_the_main_TC(place_to_patch) ); // src
978 vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
979 // dst check is ok because LibVEX_UnChain checks that
980 // place_to_jump_to_EXPECTED really is the current dst, and
981 // asserts if it isn't.
982 VexInvalRange vir
983 = LibVEX_UnChain( arch_host, endness_host, place_to_patch,
984 place_to_jump_to_EXPECTED, disp_cp_chain_me );
985 VG_(invalidate_icache)( (void*)vir.start, vir.len );
986 }
987
988
989 /* The specified block is about to be deleted. Update the preds and
990 succs of its associated blocks accordingly. This includes undoing
991 any chained jumps to this block. */
992 static
unchain_in_preparation_for_deletion(VexArch arch_host,VexEndness endness_host,SECno here_sNo,TTEno here_tteNo)993 void unchain_in_preparation_for_deletion ( VexArch arch_host,
994 VexEndness endness_host,
995 SECno here_sNo, TTEno here_tteNo )
996 {
997 if (DEBUG_TRANSTAB)
998 VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo);
999 UWord i, j, n, m;
1000 Int evCheckSzB = LibVEX_evCheckSzB(arch_host);
1001 TTEntryC* here_tteC = index_tteC(here_sNo, here_tteNo);
1002 TTEntryH* here_tteH = index_tteH(here_sNo, here_tteNo);
1003 if (DEBUG_TRANSTAB)
1004 VG_(printf)("... QQQ tt.entry 0x%lu tt.tcptr 0x%p\n",
1005 here_tteC->entry, here_tteC->tcptr);
1006 vg_assert(here_tteH->status == InUse);
1007
1008 /* Visit all InEdges owned by here_tte. */
1009 n = InEdgeArr__size(&here_tteC->in_edges);
1010 for (i = 0; i < n; i++) {
1011 InEdge* ie = InEdgeArr__index(&here_tteC->in_edges, i);
1012 // Undo the chaining.
1013 UChar* here_slow_EP = (UChar*)here_tteC->tcptr;
1014 UChar* here_fast_EP = here_slow_EP + evCheckSzB;
1015 unchain_one(arch_host, endness_host, ie, here_fast_EP, here_slow_EP);
1016 // Find the corresponding entry in the "from" node's out_edges,
1017 // and remove it.
1018 TTEntryC* from_tteC = index_tteC(ie->from_sNo, ie->from_tteNo);
1019 m = OutEdgeArr__size(&from_tteC->out_edges);
1020 vg_assert(m > 0); // it must have at least one entry
1021 for (j = 0; j < m; j++) {
1022 OutEdge* oe = OutEdgeArr__index(&from_tteC->out_edges, j);
1023 if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
1024 && oe->from_offs == ie->from_offs)
1025 break;
1026 }
1027 vg_assert(j < m); // "oe must be findable"
1028 OutEdgeArr__deleteIndex(&from_tteC->out_edges, j);
1029 }
1030
1031 /* Visit all OutEdges owned by here_tte. */
1032 n = OutEdgeArr__size(&here_tteC->out_edges);
1033 for (i = 0; i < n; i++) {
1034 OutEdge* oe = OutEdgeArr__index(&here_tteC->out_edges, i);
1035 // Find the corresponding entry in the "to" node's in_edges,
1036 // and remove it.
1037 TTEntryC* to_tteC = index_tteC(oe->to_sNo, oe->to_tteNo);
1038 m = InEdgeArr__size(&to_tteC->in_edges);
1039 vg_assert(m > 0); // it must have at least one entry
1040 for (j = 0; j < m; j++) {
1041 InEdge* ie = InEdgeArr__index(&to_tteC->in_edges, j);
1042 if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
1043 && ie->from_offs == oe->from_offs)
1044 break;
1045 }
1046 vg_assert(j < m); // "ie must be findable"
1047 InEdgeArr__deleteIndex(&to_tteC->in_edges, j);
1048 }
1049
1050 InEdgeArr__makeEmpty(&here_tteC->in_edges);
1051 OutEdgeArr__makeEmpty(&here_tteC->out_edges);
1052 }
1053
1054
1055 /*-------------------------------------------------------------*/
1056 /*--- Address-range equivalence class stuff ---*/
1057 /*-------------------------------------------------------------*/
1058
1059 /* Return equivalence class number for a range. */
1060
range_to_eclass(Addr start,UInt len)1061 static EClassNo range_to_eclass ( Addr start, UInt len )
1062 {
1063 UInt mask = (1 << ECLASS_WIDTH) - 1;
1064 UInt lo = (UInt)start;
1065 UInt hi = lo + len - 1;
1066 UInt loBits = (lo >> ECLASS_SHIFT) & mask;
1067 UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
1068 if (loBits == hiBits) {
1069 vg_assert(loBits < ECLASS_N-1);
1070 return loBits;
1071 } else {
1072 return ECLASS_MISC;
1073 }
1074 }
1075
1076
1077 /* Calculates the equivalence class numbers for any VexGuestExtent.
1078 These are written in *eclasses, which must be big enough to hold 3
1079 Ints. The number written, between 1 and 3, is returned. The
1080 eclasses are presented in order, and any duplicates are removed.
1081 */
1082
1083 static
vexGuestExtents_to_eclasses(EClassNo * eclasses,const TTEntryH * tteH)1084 Int vexGuestExtents_to_eclasses ( /*OUT*/EClassNo* eclasses,
1085 const TTEntryH* tteH )
1086 {
1087
1088 # define SWAP(_lv1,_lv2) \
1089 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
1090
1091 UInt i, j, n_ec;
1092 EClassNo r;
1093
1094 vg_assert(tteH->vge_n_used >= 1 && tteH->vge_n_used <= 3);
1095
1096 n_ec = 0;
1097 for (i = 0; i < tteH->vge_n_used; i++) {
1098 r = range_to_eclass( tteH->vge_base[i], tteH->vge_len[i] );
1099 if (r == ECLASS_MISC)
1100 goto bad;
1101 /* only add if we haven't already seen it */
1102 for (j = 0; j < n_ec; j++)
1103 if (eclasses[j] == r)
1104 break;
1105 if (j == n_ec)
1106 eclasses[n_ec++] = r;
1107 }
1108
1109 if (n_ec == 1)
1110 return 1;
1111
1112 if (n_ec == 2) {
1113 /* sort */
1114 if (eclasses[0] > eclasses[1])
1115 SWAP(eclasses[0], eclasses[1]);
1116 return 2;
1117 }
1118
1119 if (n_ec == 3) {
1120 /* sort */
1121 if (eclasses[0] > eclasses[2])
1122 SWAP(eclasses[0], eclasses[2]);
1123 if (eclasses[0] > eclasses[1])
1124 SWAP(eclasses[0], eclasses[1]);
1125 if (eclasses[1] > eclasses[2])
1126 SWAP(eclasses[1], eclasses[2]);
1127 return 3;
1128 }
1129
1130 /* NOTREACHED */
1131 vg_assert(0);
1132
1133 bad:
1134 eclasses[0] = ECLASS_MISC;
1135 return 1;
1136
1137 # undef SWAP
1138 }
1139
1140
1141 /* Add tteno to the set of entries listed for equivalence class ec in
1142 this sector. Returns used location in eclass array. */
1143
1144 static
addEClassNo(Sector * sec,EClassNo ec,TTEno tteno)1145 UInt addEClassNo ( /*MOD*/Sector* sec, EClassNo ec, TTEno tteno )
1146 {
1147 Int old_sz, new_sz, i, r;
1148 TTEno *old_ar, *new_ar;
1149
1150 vg_assert(ec >= 0 && ec < ECLASS_N);
1151 vg_assert(tteno < N_TTES_PER_SECTOR);
1152
1153 if (DEBUG_TRANSTAB) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno);
1154
1155 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
1156
1157 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
1158
1159 old_sz = sec->ec2tte_size[ec];
1160 old_ar = sec->ec2tte[ec];
1161 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
1162 new_ar = ttaux_malloc("transtab.aECN.1",
1163 new_sz * sizeof(TTEno));
1164 for (i = 0; i < old_sz; i++)
1165 new_ar[i] = old_ar[i];
1166 if (old_ar)
1167 ttaux_free(old_ar);
1168 sec->ec2tte_size[ec] = new_sz;
1169 sec->ec2tte[ec] = new_ar;
1170
1171 if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
1172 }
1173
1174 /* Common case */
1175 r = sec->ec2tte_used[ec]++;
1176 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
1177 sec->ec2tte[ec][r] = tteno;
1178 return (UInt)r;
1179 }
1180
1181
1182 /* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate
1183 eclass entries to 'sec'. */
1184
1185 static
upd_eclasses_after_add(Sector * sec,TTEno tteno)1186 void upd_eclasses_after_add ( /*MOD*/Sector* sec, TTEno tteno )
1187 {
1188 Int i, r;
1189 EClassNo eclasses[3];
1190 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1191
1192 TTEntryH* tteH = &sec->ttH[tteno];
1193 r = vexGuestExtents_to_eclasses( eclasses, tteH );
1194 vg_assert(r >= 1 && r <= 3);
1195
1196 TTEntryC* tteC = &sec->ttC[tteno];
1197 tteC->n_tte2ec = r;
1198 for (i = 0; i < r; i++) {
1199 tteC->tte2ec_ec[i] = eclasses[i];
1200 tteC->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], tteno );
1201 }
1202 }
1203
1204
1205 /* Check the eclass info in 'sec' to ensure it is consistent. Returns
1206 True if OK, False if something's not right. Expensive. */
1207
sanity_check_eclasses_in_sector(const Sector * sec)1208 static Bool sanity_check_eclasses_in_sector ( const Sector* sec )
1209 {
1210 # define BAD(_str) do { whassup = (_str); goto bad; } while (0)
1211
1212 const HChar* whassup = NULL;
1213 Int j, k, n, ec_idx;
1214 EClassNo i;
1215 EClassNo ec_num;
1216 TTEno tteno;
1217 ULong* tce;
1218
1219 /* Basic checks on this sector */
1220 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR)
1221 BAD("invalid sec->tt_n_inuse");
1222 tce = sec->tc_next;
1223 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
1224 BAD("sec->tc_next points outside tc");
1225
1226 /* For each eclass ... */
1227 for (i = 0; i < ECLASS_N; i++) {
1228 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
1229 BAD("ec2tte_size/ec2tte mismatch(1)");
1230 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
1231 BAD("ec2tte_size/ec2tte mismatch(2)");
1232 if (sec->ec2tte_used[i] < 0
1233 || sec->ec2tte_used[i] > sec->ec2tte_size[i])
1234 BAD("implausible ec2tte_used");
1235 if (sec->ec2tte_used[i] == 0)
1236 continue;
1237
1238 /* For each tt reference in each eclass .. ensure the reference
1239 is to a valid tt entry, and that the entry's address ranges
1240 really include this eclass. */
1241
1242 for (j = 0; j < sec->ec2tte_used[i]; j++) {
1243 tteno = sec->ec2tte[i][j];
1244 if (tteno == EC2TTE_DELETED)
1245 continue;
1246 if (tteno >= N_TTES_PER_SECTOR)
1247 BAD("implausible tteno");
1248 TTEntryC* tteC = &sec->ttC[tteno];
1249 TTEntryH* tteH = &sec->ttH[tteno];
1250 if (tteH->status != InUse)
1251 BAD("tteno points to non-inuse tte");
1252 if (tteC->n_tte2ec < 1 || tteC->n_tte2ec > 3)
1253 BAD("tteC->n_tte2ec out of range");
1254 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
1255 must equal i. Inspect tte's eclass info. */
1256 n = 0;
1257 for (k = 0; k < tteC->n_tte2ec; k++) {
1258 if (k < tteC->n_tte2ec-1
1259 && tteC->tte2ec_ec[k] >= tteC->tte2ec_ec[k+1])
1260 BAD("tteC->tte2ec_ec[..] out of order");
1261 ec_num = tteC->tte2ec_ec[k];
1262 if (ec_num < 0 || ec_num >= ECLASS_N)
1263 BAD("tteC->tte2ec_ec[..] out of range");
1264 if (ec_num != i)
1265 continue;
1266 ec_idx = tteC->tte2ec_ix[k];
1267 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
1268 BAD("tteC->tte2ec_ix[..] out of range");
1269 if (ec_idx == j)
1270 n++;
1271 }
1272 if (n != 1)
1273 BAD("tteno does not point back at eclass");
1274 }
1275 }
1276
1277 /* That establishes that for each forward pointer from TTEntrys
1278 there is a corresponding backward pointer from the eclass[]
1279 arrays. However, it doesn't rule out the possibility of other,
1280 bogus pointers in the eclass[] arrays. So do those similarly:
1281 scan through them and check the TTEntryies they point at point
1282 back. */
1283
1284 for (tteno = 0; tteno < N_TTES_PER_SECTOR; tteno++) {
1285
1286 TTEntryC* tteC = &sec->ttC[tteno];
1287 TTEntryH* tteH = &sec->ttH[tteno];
1288 if (tteH->status == Empty || tteH->status == Deleted) {
1289 if (tteC->n_tte2ec != 0)
1290 BAD("tteC->n_tte2ec nonzero for unused tte");
1291 continue;
1292 }
1293
1294 vg_assert(tteH->status == InUse);
1295
1296 if (tteC->n_tte2ec < 1 || tteC->n_tte2ec > 3)
1297 BAD("tteC->n_tte2ec out of range(2)");
1298
1299 for (j = 0; j < tteC->n_tte2ec; j++) {
1300 ec_num = tteC->tte2ec_ec[j];
1301 if (ec_num < 0 || ec_num >= ECLASS_N)
1302 BAD("tteC->tte2ec_ec[..] out of range");
1303 ec_idx = tteC->tte2ec_ix[j];
1304 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
1305 BAD("tteC->tte2ec_ix[..] out of range(2)");
1306 if (sec->ec2tte[ec_num][ec_idx] != tteno)
1307 BAD("ec2tte does not point back to tte");
1308 }
1309 }
1310
1311 return True;
1312
1313 bad:
1314 if (whassup)
1315 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
1316
1317 # if 0
1318 VG_(printf)("eclass = %d\n", i);
1319 VG_(printf)("tteno = %d\n", (Int)tteno);
1320 switch (tte->status) {
1321 case InUse: VG_(printf)("InUse\n"); break;
1322 case Deleted: VG_(printf)("Deleted\n"); break;
1323 case Empty: VG_(printf)("Empty\n"); break;
1324 }
1325 if (tte->status != Empty) {
1326 for (k = 0; k < tte->vge.n_used; k++)
1327 VG_(printf)("0x%lx %u\n", tte->vge.base[k], (UInt)tte->vge.len[k]);
1328 }
1329 # endif
1330
1331 return False;
1332
1333 # undef BAD
1334 }
1335
1336
1337 /* Sanity check absolutely everything. True == check passed. */
1338
1339 /* forwards */
1340 static Bool sanity_check_redir_tt_tc ( void );
1341
sanity_check_sector_search_order(void)1342 static Bool sanity_check_sector_search_order ( void )
1343 {
1344 SECno i, j, nListed;
1345 /* assert the array is the right size */
1346 vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order)
1347 / sizeof(sector_search_order[0])));
1348 /* Check it's of the form valid_sector_numbers ++ [INV_SNO, INV_SNO, ..] */
1349 for (i = 0; i < n_sectors; i++) {
1350 if (sector_search_order[i] == INV_SNO
1351 || sector_search_order[i] >= n_sectors)
1352 break;
1353 }
1354 nListed = i;
1355 for (/* */; i < n_sectors; i++) {
1356 if (sector_search_order[i] != INV_SNO)
1357 break;
1358 }
1359 if (i != n_sectors)
1360 return False;
1361 /* Check each sector number only appears once */
1362 for (i = 0; i < n_sectors; i++) {
1363 if (sector_search_order[i] == INV_SNO)
1364 continue;
1365 for (j = i+1; j < n_sectors; j++) {
1366 if (sector_search_order[j] == sector_search_order[i])
1367 return False;
1368 }
1369 }
1370 /* Check that the number of listed sectors equals the number
1371 in use, by counting nListed back down. */
1372 for (i = 0; i < n_sectors; i++) {
1373 if (sectors[i].tc != NULL)
1374 nListed--;
1375 }
1376 if (nListed != 0)
1377 return False;
1378 return True;
1379 }
1380
sanity_check_all_sectors(void)1381 static Bool sanity_check_all_sectors ( void )
1382 {
1383 SECno sno;
1384 Bool sane;
1385 Sector* sec;
1386 for (sno = 0; sno < n_sectors; sno++) {
1387 Int i;
1388 Int nr_not_dead_hx = 0;
1389 Int szhxa;
1390 sec = §ors[sno];
1391 if (sec->tc == NULL)
1392 continue;
1393 sane = sanity_check_eclasses_in_sector( sec );
1394 if (!sane)
1395 return False;
1396 szhxa = VG_(sizeXA)(sec->host_extents);
1397 for (i = 0; i < szhxa; i++) {
1398 const HostExtent* hx = VG_(indexXA)(sec->host_extents, i);
1399 if (!HostExtent__is_dead (hx, sec))
1400 nr_not_dead_hx++;
1401 }
1402 if (nr_not_dead_hx != sec->tt_n_inuse) {
1403 VG_(debugLog)(0, "transtab",
1404 "nr_not_dead_hx %d sanity fail "
1405 "(expected == in use %d)\n",
1406 nr_not_dead_hx, sec->tt_n_inuse);
1407 return False;
1408 }
1409 }
1410
1411 if ( !sanity_check_redir_tt_tc() )
1412 return False;
1413 if ( !sanity_check_sector_search_order() )
1414 return False;
1415 return True;
1416 }
1417
1418
1419
1420 /*-------------------------------------------------------------*/
1421 /*--- Add/find translations ---*/
1422 /*-------------------------------------------------------------*/
1423
vge_osize(const VexGuestExtents * vge)1424 static UInt vge_osize ( const VexGuestExtents* vge )
1425 {
1426 UInt i, n = 0;
1427 for (i = 0; i < vge->n_used; i++)
1428 n += (UInt)vge->len[i];
1429 return n;
1430 }
1431
TTEntryH__osize(const TTEntryH * tteH)1432 static UInt TTEntryH__osize ( const TTEntryH* tteH )
1433 {
1434 UInt i, n = 0;
1435 for (i = 0; i < tteH->vge_n_used; i++)
1436 n += (UInt)tteH->vge_len[i];
1437 return n;
1438 }
1439
isValidSector(SECno sector)1440 static Bool isValidSector ( SECno sector )
1441 {
1442 if (sector == INV_SNO || sector >= n_sectors)
1443 return False;
1444 return True;
1445 }
1446
HASH_TT(Addr key)1447 static inline HTTno HASH_TT ( Addr key )
1448 {
1449 UInt kHi = sizeof(Addr) == 4 ? 0 : (key >> 32);
1450 UInt kLo = (UInt)key;
1451 UInt k32 = kHi ^ kLo;
1452 UInt ror = 7;
1453 if (ror > 0)
1454 k32 = (k32 >> ror) | (k32 << (32-ror));
1455 return (HTTno)(k32 % N_HTTES_PER_SECTOR);
1456 }
1457
setFastCacheEntry(Addr key,ULong * tcptr)1458 static void setFastCacheEntry ( Addr key, ULong* tcptr )
1459 {
1460 UInt cno = (UInt)VG_TT_FAST_HASH(key);
1461 VG_(tt_fast)[cno].guest = key;
1462 VG_(tt_fast)[cno].host = (Addr)tcptr;
1463 n_fast_updates++;
1464 /* This shouldn't fail. It should be assured by m_translate
1465 which should reject any attempt to make translation of code
1466 starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1467 vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
1468 }
1469
1470 /* Invalidate the fast cache VG_(tt_fast). */
invalidateFastCache(void)1471 static void invalidateFastCache ( void )
1472 {
1473 UInt j;
1474 /* This loop is popular enough to make it worth unrolling a
1475 bit, at least on ppc32. */
1476 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
1477 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
1478 VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1479 VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1480 VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1481 VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1482 }
1483
1484 vg_assert(j == VG_TT_FAST_SIZE);
1485 n_fast_flushes++;
1486 }
1487
1488
get_empty_tt_slot(SECno sNo)1489 static TTEno get_empty_tt_slot(SECno sNo)
1490 {
1491 TTEno i;
1492
1493 i = sectors[sNo].empty_tt_list;
1494 sectors[sNo].empty_tt_list = sectors[sNo].ttC[i].usage.next_empty_tte;
1495
1496 vg_assert (i >= 0 && i < N_TTES_PER_SECTOR);
1497
1498 return i;
1499 }
1500
add_to_empty_tt_list(SECno sNo,TTEno tteno)1501 static void add_to_empty_tt_list (SECno sNo, TTEno tteno)
1502 {
1503 sectors[sNo].ttC[tteno].usage.next_empty_tte = sectors[sNo].empty_tt_list;
1504 sectors[sNo].empty_tt_list = tteno;
1505 }
1506
initialiseSector(SECno sno)1507 static void initialiseSector ( SECno sno )
1508 {
1509 UInt i;
1510 SysRes sres;
1511 Sector* sec;
1512 vg_assert(isValidSector(sno));
1513
1514 { Bool sane = sanity_check_sector_search_order();
1515 vg_assert(sane);
1516 }
1517 sec = §ors[sno];
1518
1519 if (sec->tc == NULL) {
1520
1521 /* Sector has never been used before. Need to allocate tt and
1522 tc. */
1523 vg_assert(sec->ttC == NULL);
1524 vg_assert(sec->ttH == NULL);
1525 vg_assert(sec->tc_next == NULL);
1526 vg_assert(sec->tt_n_inuse == 0);
1527 for (EClassNo e = 0; e < ECLASS_N; e++) {
1528 vg_assert(sec->ec2tte_size[e] == 0);
1529 vg_assert(sec->ec2tte_used[e] == 0);
1530 vg_assert(sec->ec2tte[e] == NULL);
1531 }
1532 vg_assert(sec->host_extents == NULL);
1533
1534 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1535 VG_(dmsg)("transtab: " "allocate sector %d\n", sno);
1536
1537 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
1538 if (sr_isError(sres)) {
1539 VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
1540 8 * tc_sector_szQ );
1541 /*NOTREACHED*/
1542 }
1543 sec->tc = (ULong*)(Addr)sr_Res(sres);
1544
1545 sres = VG_(am_mmap_anon_float_valgrind)
1546 ( N_TTES_PER_SECTOR * sizeof(TTEntryC) );
1547 if (sr_isError(sres)) {
1548 VG_(out_of_memory_NORETURN)("initialiseSector(TTC)",
1549 N_TTES_PER_SECTOR * sizeof(TTEntryC) );
1550 /*NOTREACHED*/
1551 }
1552 sec->ttC = (TTEntryC*)(Addr)sr_Res(sres);
1553
1554 sres = VG_(am_mmap_anon_float_valgrind)
1555 ( N_TTES_PER_SECTOR * sizeof(TTEntryH) );
1556 if (sr_isError(sres)) {
1557 VG_(out_of_memory_NORETURN)("initialiseSector(TTH)",
1558 N_TTES_PER_SECTOR * sizeof(TTEntryH) );
1559 /*NOTREACHED*/
1560 }
1561 sec->ttH = (TTEntryH*)(Addr)sr_Res(sres);
1562
1563 sec->empty_tt_list = HTT_EMPTY;
1564 for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
1565 sec->ttH[ei].status = Empty;
1566 sec->ttC[ei].n_tte2ec = 0;
1567 add_to_empty_tt_list(sno, ei);
1568 }
1569
1570 sres = VG_(am_mmap_anon_float_valgrind)
1571 ( N_HTTES_PER_SECTOR * sizeof(TTEno) );
1572 if (sr_isError(sres)) {
1573 VG_(out_of_memory_NORETURN)("initialiseSector(HTT)",
1574 N_HTTES_PER_SECTOR * sizeof(TTEno) );
1575 /*NOTREACHED*/
1576 }
1577 sec->htt = (TTEno*)(Addr)sr_Res(sres);
1578 for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
1579 sec->htt[hi] = HTT_EMPTY;
1580
1581 /* Set up the host_extents array. */
1582 sec->host_extents
1583 = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
1584 ttaux_free,
1585 sizeof(HostExtent));
1586
1587 /* Add an entry in the sector_search_order */
1588 for (i = 0; i < n_sectors; i++) {
1589 if (sector_search_order[i] == INV_SNO)
1590 break;
1591 }
1592 vg_assert(i >= 0 && i < n_sectors);
1593 sector_search_order[i] = sno;
1594
1595 if (VG_(clo_verbosity) > 2)
1596 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
1597
1598 } else {
1599
1600 /* Sector has been used before. Dump the old contents. */
1601 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1602 VG_(dmsg)("transtab: " "recycle sector %d\n", sno);
1603 n_sectors_recycled++;
1604
1605 vg_assert(sec->ttC != NULL);
1606 vg_assert(sec->ttH != NULL);
1607 vg_assert(sec->tc_next != NULL);
1608 n_dump_count += sec->tt_n_inuse;
1609
1610 VexArch arch_host = VexArch_INVALID;
1611 VexArchInfo archinfo_host;
1612 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1613 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1614 VexEndness endness_host = archinfo_host.endness;
1615
1616 /* Visit each just-about-to-be-abandoned translation. */
1617 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n",
1618 sno);
1619 sec->empty_tt_list = HTT_EMPTY;
1620 for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
1621 if (sec->ttH[ei].status == InUse) {
1622 vg_assert(sec->ttC[ei].n_tte2ec >= 1);
1623 vg_assert(sec->ttC[ei].n_tte2ec <= 3);
1624 n_dump_osize += TTEntryH__osize(&sec->ttH[ei]);
1625 /* Tell the tool too. */
1626 if (VG_(needs).superblock_discards) {
1627 VexGuestExtents vge_tmp;
1628 TTEntryH__to_VexGuestExtents( &vge_tmp, &sec->ttH[ei] );
1629 VG_TDICT_CALL( tool_discard_superblock_info,
1630 sec->ttC[ei].entry, vge_tmp );
1631 }
1632 unchain_in_preparation_for_deletion(arch_host,
1633 endness_host, sno, ei);
1634 } else {
1635 vg_assert(sec->ttC[ei].n_tte2ec == 0);
1636 }
1637 sec->ttH[ei].status = Empty;
1638 sec->ttC[ei].n_tte2ec = 0;
1639 add_to_empty_tt_list(sno, ei);
1640 }
1641 for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
1642 sec->htt[hi] = HTT_EMPTY;
1643
1644 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n",
1645 sno);
1646
1647 /* Free up the eclass structures. */
1648 for (EClassNo e = 0; e < ECLASS_N; e++) {
1649 if (sec->ec2tte_size[e] == 0) {
1650 vg_assert(sec->ec2tte_used[e] == 0);
1651 vg_assert(sec->ec2tte[e] == NULL);
1652 } else {
1653 vg_assert(sec->ec2tte[e] != NULL);
1654 ttaux_free(sec->ec2tte[e]);
1655 sec->ec2tte[e] = NULL;
1656 sec->ec2tte_size[e] = 0;
1657 sec->ec2tte_used[e] = 0;
1658 }
1659 }
1660
1661 /* Empty out the host extents array. */
1662 vg_assert(sec->host_extents != NULL);
1663 VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
1664 vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
1665
1666 /* Sanity check: ensure it is already in
1667 sector_search_order[]. */
1668 SECno ix;
1669 for (ix = 0; ix < n_sectors; ix++) {
1670 if (sector_search_order[ix] == sno)
1671 break;
1672 }
1673 vg_assert(ix >= 0 && ix < n_sectors);
1674
1675 if (VG_(clo_verbosity) > 2)
1676 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
1677 }
1678
1679 sec->tc_next = sec->tc;
1680 sec->tt_n_inuse = 0;
1681
1682 invalidateFastCache();
1683
1684 { Bool sane = sanity_check_sector_search_order();
1685 vg_assert(sane);
1686 }
1687 }
1688
1689 /* Add a translation of vge to TT/TC. The translation is temporarily
1690 in code[0 .. code_len-1].
1691
1692 pre: youngest_sector points to a valid (although possibly full)
1693 sector.
1694 */
VG_(add_to_transtab)1695 void VG_(add_to_transtab)( const VexGuestExtents* vge,
1696 Addr entry,
1697 Addr code,
1698 UInt code_len,
1699 Bool is_self_checking,
1700 Int offs_profInc,
1701 UInt n_guest_instrs )
1702 {
1703 Int tcAvailQ, reqdQ, y;
1704 ULong *tcptr, *tcptr2;
1705 UChar* srcP;
1706 UChar* dstP;
1707
1708 vg_assert(init_done);
1709 vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1710
1711 /* 60000: should agree with N_TMPBUF in m_translate.c. */
1712 vg_assert(code_len > 0 && code_len < 60000);
1713
1714 /* Generally stay sane */
1715 vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
1716
1717 if (DEBUG_TRANSTAB)
1718 VG_(printf)("add_to_transtab(entry = 0x%lx, len = %u) ...\n",
1719 entry, code_len);
1720
1721 n_in_count++;
1722 n_in_tsize += code_len;
1723 n_in_osize += vge_osize(vge);
1724 if (is_self_checking)
1725 n_in_sc_count++;
1726
1727 y = youngest_sector;
1728 vg_assert(isValidSector(y));
1729
1730 if (sectors[y].tc == NULL)
1731 initialiseSector(y);
1732
1733 /* Try putting the translation in this sector. */
1734 reqdQ = (code_len + 7) >> 3;
1735
1736 /* Will it fit in tc? */
1737 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ]))
1738 - ((ULong*)(sectors[y].tc_next));
1739 vg_assert(tcAvailQ >= 0);
1740 vg_assert(tcAvailQ <= tc_sector_szQ);
1741
1742 if (tcAvailQ < reqdQ
1743 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR) {
1744 /* No. So move on to the next sector. Either it's never been
1745 used before, in which case it will get its tt/tc allocated
1746 now, or it has been used before, in which case it is set to be
1747 empty, hence throwing out the oldest sector. */
1748 vg_assert(tc_sector_szQ > 0);
1749 Int tt_loading_pct = (100 * sectors[y].tt_n_inuse)
1750 / N_HTTES_PER_SECTOR;
1751 Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ))
1752 / tc_sector_szQ;
1753 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) {
1754 VG_(dmsg)("transtab: "
1755 "declare sector %d full "
1756 "(TT loading %2d%%, TC loading %2d%%, avg tce size %d)\n",
1757 y, tt_loading_pct, tc_loading_pct,
1758 8 * (tc_sector_szQ - tcAvailQ)/sectors[y].tt_n_inuse);
1759 }
1760 youngest_sector++;
1761 if (youngest_sector >= n_sectors)
1762 youngest_sector = 0;
1763 y = youngest_sector;
1764 initialiseSector(y);
1765 }
1766
1767 /* Be sure ... */
1768 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ]))
1769 - ((ULong*)(sectors[y].tc_next));
1770 vg_assert(tcAvailQ >= 0);
1771 vg_assert(tcAvailQ <= tc_sector_szQ);
1772 vg_assert(tcAvailQ >= reqdQ);
1773 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR);
1774 vg_assert(sectors[y].tt_n_inuse >= 0);
1775
1776 /* Copy into tc. */
1777 tcptr = sectors[y].tc_next;
1778 vg_assert(tcptr >= §ors[y].tc[0]);
1779 vg_assert(tcptr <= §ors[y].tc[tc_sector_szQ]);
1780
1781 dstP = (UChar*)tcptr;
1782 srcP = (UChar*)code;
1783 VG_(memcpy)(dstP, srcP, code_len);
1784 sectors[y].tc_next += reqdQ;
1785 sectors[y].tt_n_inuse++;
1786
1787 /* more paranoia */
1788 tcptr2 = sectors[y].tc_next;
1789 vg_assert(tcptr2 >= §ors[y].tc[0]);
1790 vg_assert(tcptr2 <= §ors[y].tc[tc_sector_szQ]);
1791
1792 /* Find an empty tt slot, and use it. There must be such a slot
1793 since tt is never allowed to get completely full. */
1794 TTEno tteix = get_empty_tt_slot(y);
1795 TTEntryC__init(§ors[y].ttC[tteix]);
1796 TTEntryH__init(§ors[y].ttH[tteix]);
1797 sectors[y].ttC[tteix].tcptr = tcptr;
1798 sectors[y].ttC[tteix].usage.prof.count = 0;
1799 sectors[y].ttC[tteix].usage.prof.weight =
1800 n_guest_instrs == 0 ? 1 : n_guest_instrs;
1801 sectors[y].ttC[tteix].entry = entry;
1802 TTEntryH__from_VexGuestExtents( §ors[y].ttH[tteix], vge );
1803 sectors[y].ttH[tteix].status = InUse;
1804
1805 // Point an htt entry to the tt slot
1806 HTTno htti = HASH_TT(entry);
1807 vg_assert(htti >= 0 && htti < N_HTTES_PER_SECTOR);
1808 while (True) {
1809 if (sectors[y].htt[htti] == HTT_EMPTY
1810 || sectors[y].htt[htti] == HTT_DELETED)
1811 break;
1812 htti++;
1813 if (htti >= N_HTTES_PER_SECTOR)
1814 htti = 0;
1815 }
1816 sectors[y].htt[htti] = tteix;
1817
1818 /* Patch in the profile counter location, if necessary. */
1819 if (offs_profInc != -1) {
1820 vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
1821 VexArch arch_host = VexArch_INVALID;
1822 VexArchInfo archinfo_host;
1823 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1824 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1825 VexEndness endness_host = archinfo_host.endness;
1826 VexInvalRange vir
1827 = LibVEX_PatchProfInc( arch_host, endness_host,
1828 dstP + offs_profInc,
1829 §ors[y].ttC[tteix].usage.prof.count );
1830 VG_(invalidate_icache)( (void*)vir.start, vir.len );
1831 }
1832
1833 VG_(invalidate_icache)( dstP, code_len );
1834
1835 /* Add this entry to the host_extents map, checking that we're
1836 adding in order. */
1837 { HostExtent hx;
1838 hx.start = (UChar*)tcptr;
1839 hx.len = code_len;
1840 hx.tteNo = tteix;
1841 vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
1842 XArray* hx_array = sectors[y].host_extents;
1843 vg_assert(hx_array);
1844 Word n = VG_(sizeXA)(hx_array);
1845 if (n > 0) {
1846 HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
1847 vg_assert(hx_prev->start + hx_prev->len <= hx.start);
1848 }
1849 VG_(addToXA)(hx_array, &hx);
1850 if (DEBUG_TRANSTAB)
1851 VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n",
1852 hx.start, hx.len, y, tteix);
1853 }
1854
1855 /* Update the fast-cache. */
1856 setFastCacheEntry( entry, tcptr );
1857
1858 /* Note the eclass numbers for this translation. */
1859 upd_eclasses_after_add( §ors[y], tteix );
1860 }
1861
1862
1863 /* Search for the translation of the given guest address. If
1864 requested, a successful search can also cause the fast-caches to be
1865 updated.
1866 */
VG_(search_transtab)1867 Bool VG_(search_transtab) ( /*OUT*/Addr* res_hcode,
1868 /*OUT*/SECno* res_sNo,
1869 /*OUT*/TTEno* res_tteNo,
1870 Addr guest_addr,
1871 Bool upd_cache )
1872 {
1873 SECno i, sno;
1874 HTTno j, k, kstart;
1875 TTEno tti;
1876
1877 vg_assert(init_done);
1878 /* Find the initial probe point just once. It will be the same in
1879 all sectors and avoids multiple expensive % operations. */
1880 n_full_lookups++;
1881 kstart = HASH_TT(guest_addr);
1882 vg_assert(kstart >= 0 && kstart < N_HTTES_PER_SECTOR);
1883
1884 /* Search in all the sectors,using sector_search_order[] as a
1885 heuristic guide as to what order to visit the sectors. */
1886 for (i = 0; i < n_sectors; i++) {
1887
1888 sno = sector_search_order[i];
1889 if (UNLIKELY(sno == INV_SNO))
1890 return False; /* run out of sectors to search */
1891
1892 k = kstart;
1893 for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
1894 n_lookup_probes++;
1895 tti = sectors[sno].htt[k];
1896 if (tti < N_TTES_PER_SECTOR
1897 && sectors[sno].ttC[tti].entry == guest_addr) {
1898 /* found it */
1899 if (upd_cache)
1900 setFastCacheEntry(
1901 guest_addr, sectors[sno].ttC[tti].tcptr );
1902 if (res_hcode)
1903 *res_hcode = (Addr)sectors[sno].ttC[tti].tcptr;
1904 if (res_sNo)
1905 *res_sNo = sno;
1906 if (res_tteNo)
1907 *res_tteNo = tti;
1908 /* pull this one one step closer to the front. For large
1909 apps this more or less halves the number of required
1910 probes. */
1911 if (i > 0) {
1912 Int tmp = sector_search_order[i-1];
1913 sector_search_order[i-1] = sector_search_order[i];
1914 sector_search_order[i] = tmp;
1915 }
1916 return True;
1917 }
1918 // tti is HTT_EMPTY or HTT_DELETED or not the entry of guest_addr
1919 if (sectors[sno].htt[k] == HTT_EMPTY)
1920 break; /* not found in this sector */
1921 k++;
1922 if (k == N_HTTES_PER_SECTOR)
1923 k = 0;
1924 }
1925
1926 /* If we fall off the end, all entries are InUse and not
1927 matching, or Deleted. In any case we did not find it in this
1928 sector. */
1929 }
1930
1931 /* Not found in any sector. */
1932 return False;
1933 }
1934
1935
1936 /*-------------------------------------------------------------*/
1937 /*--- Delete translations. ---*/
1938 /*-------------------------------------------------------------*/
1939
1940 /* forward */
1941 static void unredir_discard_translations( Addr, ULong );
1942
1943 /* Stuff for deleting translations which intersect with a given
1944 address range. Unfortunately, to make this run at a reasonable
1945 speed, it is complex. */
1946
1947 static inline
overlap1(Addr s1,ULong r1,Addr s2,ULong r2)1948 Bool overlap1 ( Addr s1, ULong r1, Addr s2, ULong r2 )
1949 {
1950 Addr e1 = s1 + r1 - 1;
1951 Addr e2 = s2 + r2 - 1;
1952 if (e1 < s2 || e2 < s1)
1953 return False;
1954 return True;
1955 }
1956
1957 static inline
overlaps(Addr start,ULong range,const TTEntryH * tteH)1958 Bool overlaps ( Addr start, ULong range, const TTEntryH* tteH )
1959 {
1960 if (overlap1(start, range, tteH->vge_base[0], tteH->vge_len[0]))
1961 return True;
1962 if (tteH->vge_n_used < 2)
1963 return False;
1964 if (overlap1(start, range, tteH->vge_base[1], tteH->vge_len[1]))
1965 return True;
1966 if (tteH->vge_n_used < 3)
1967 return False;
1968 if (overlap1(start, range, tteH->vge_base[2], tteH->vge_len[2]))
1969 return True;
1970 return False;
1971 }
1972
1973
1974 /* Delete a tt entry, and update all the eclass data accordingly. */
1975
delete_tte(Sector * sec,SECno secNo,TTEno tteno,VexArch arch_host,VexEndness endness_host)1976 static void delete_tte ( /*MOD*/Sector* sec, SECno secNo, TTEno tteno,
1977 VexArch arch_host, VexEndness endness_host )
1978 {
1979 Int i, ec_idx;
1980 EClassNo ec_num;
1981
1982 /* sec and secNo are mutually redundant; cross-check. */
1983 vg_assert(sec == §ors[secNo]);
1984
1985 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1986 TTEntryC* tteC = &sec->ttC[tteno];
1987 TTEntryH* tteH = &sec->ttH[tteno];
1988 vg_assert(tteH->status == InUse);
1989 vg_assert(tteC->n_tte2ec >= 1 && tteC->n_tte2ec <= 3);
1990
1991 /* Unchain .. */
1992 unchain_in_preparation_for_deletion(arch_host, endness_host, secNo, tteno);
1993
1994 /* Deal with the ec-to-tte links first. */
1995 for (i = 0; i < tteC->n_tte2ec; i++) {
1996 ec_num = tteC->tte2ec_ec[i];
1997 ec_idx = tteC->tte2ec_ix[i];
1998 vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1999 vg_assert(ec_idx >= 0);
2000 vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
2001 /* Assert that the two links point at each other. */
2002 vg_assert(sec->ec2tte[ec_num][ec_idx] == tteno);
2003 /* "delete" the pointer back to here. */
2004 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
2005 }
2006
2007 /* Now fix up this TTEntry. */
2008 /* Mark the entry as deleted in htt.
2009 Note: we could avoid the below hash table search by
2010 adding a reference from tte to its hash position in tt. */
2011 HTTno j;
2012 HTTno k = HASH_TT(tteC->entry);
2013 vg_assert(k >= 0 && k < N_HTTES_PER_SECTOR);
2014 for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
2015 if (sec->htt[k] == tteno)
2016 break;
2017 k++;
2018 if (k == N_HTTES_PER_SECTOR)
2019 k = 0;
2020 }
2021 vg_assert(j < N_HTTES_PER_SECTOR);
2022 sec->htt[k] = HTT_DELETED;
2023 tteH->status = Deleted;
2024 tteC->n_tte2ec = 0;
2025 add_to_empty_tt_list(secNo, tteno);
2026
2027 /* Stats .. */
2028 sec->tt_n_inuse--;
2029 n_disc_count++;
2030 n_disc_osize += TTEntryH__osize(tteH);
2031
2032 /* Tell the tool too. */
2033 if (VG_(needs).superblock_discards) {
2034 VexGuestExtents vge_tmp;
2035 TTEntryH__to_VexGuestExtents( &vge_tmp, tteH );
2036 VG_TDICT_CALL( tool_discard_superblock_info, tteC->entry, vge_tmp );
2037 }
2038 }
2039
2040
2041 /* Delete translations from sec which intersect specified range, but
2042 only consider translations in the specified eclass. */
2043
2044 static
delete_translations_in_sector_eclass(Sector * sec,SECno secNo,Addr guest_start,ULong range,EClassNo ec,VexArch arch_host,VexEndness endness_host)2045 Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, SECno secNo,
2046 Addr guest_start, ULong range,
2047 EClassNo ec,
2048 VexArch arch_host,
2049 VexEndness endness_host )
2050 {
2051 Int i;
2052 TTEno tteno;
2053 Bool anyDeld = False;
2054
2055 vg_assert(ec >= 0 && ec < ECLASS_N);
2056
2057 for (i = 0; i < sec->ec2tte_used[ec]; i++) {
2058
2059 tteno = sec->ec2tte[ec][i];
2060 if (tteno == EC2TTE_DELETED) {
2061 /* already deleted */
2062 continue;
2063 }
2064
2065 vg_assert(tteno < N_TTES_PER_SECTOR);
2066
2067 TTEntryH* tteH = &sec->ttH[tteno];
2068 vg_assert(tteH->status == InUse);
2069
2070 if (overlaps( guest_start, range, tteH )) {
2071 anyDeld = True;
2072 delete_tte( sec, secNo, tteno, arch_host, endness_host );
2073 }
2074
2075 }
2076
2077 return anyDeld;
2078 }
2079
2080
2081 /* Delete translations from sec which intersect specified range, the
2082 slow way, by inspecting all translations in sec. */
2083
2084 static
delete_translations_in_sector(Sector * sec,SECno secNo,Addr guest_start,ULong range,VexArch arch_host,VexEndness endness_host)2085 Bool delete_translations_in_sector ( /*MOD*/Sector* sec, SECno secNo,
2086 Addr guest_start, ULong range,
2087 VexArch arch_host,
2088 VexEndness endness_host )
2089 {
2090 TTEno i;
2091 Bool anyDeld = False;
2092
2093 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2094 /* The entire and only purpose of splitting TTEntry into cold
2095 and hot parts (TTEntryC and TTEntryH) is to make this search
2096 loop run as fast as possible, by avoiding having to pull all
2097 of the cold data up the memory hierarchy. */
2098 if (UNLIKELY(sec->ttH[i].status == InUse
2099 && overlaps( guest_start, range, &sec->ttH[i] ))) {
2100 anyDeld = True;
2101 delete_tte( sec, secNo, i, arch_host, endness_host );
2102 }
2103 }
2104
2105 return anyDeld;
2106 }
2107
2108
VG_(discard_translations)2109 void VG_(discard_translations) ( Addr guest_start, ULong range,
2110 const HChar* who )
2111 {
2112 Sector* sec;
2113 SECno sno;
2114 EClassNo ec;
2115 Bool anyDeleted = False;
2116
2117 vg_assert(init_done);
2118
2119 VG_(debugLog)(2, "transtab",
2120 "discard_translations(0x%lx, %llu) req by %s\n",
2121 guest_start, range, who );
2122
2123 /* Pre-deletion sanity check */
2124 if (VG_(clo_sanity_level) >= 4) {
2125 Bool sane = sanity_check_all_sectors();
2126 vg_assert(sane);
2127 }
2128
2129 if (range == 0)
2130 return;
2131
2132 VexArch arch_host = VexArch_INVALID;
2133 VexArchInfo archinfo_host;
2134 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
2135 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
2136 VexEndness endness_host = archinfo_host.endness;
2137
2138 /* There are two different ways to do this.
2139
2140 If the range fits within a single address-range equivalence
2141 class, as will be the case for a cache line sized invalidation,
2142 then we only have to inspect the set of translations listed in
2143 that equivalence class, and also in the "sin-bin" equivalence
2144 class ECLASS_MISC.
2145
2146 Otherwise, the invalidation is of a larger range and probably
2147 results from munmap. In this case it's (probably!) faster just
2148 to inspect all translations, dump those we don't want, and
2149 regenerate the equivalence class information (since modifying it
2150 in-situ is even more expensive).
2151 */
2152
2153 /* First off, figure out if the range falls within a single class,
2154 and if so which one. */
2155
2156 ec = ECLASS_MISC;
2157 if (range <= (1ULL << ECLASS_SHIFT))
2158 ec = range_to_eclass( guest_start, (UInt)range );
2159
2160 /* if ec is ECLASS_MISC then we aren't looking at just a single
2161 class, so use the slow scheme. Else use the fast scheme,
2162 examining 'ec' and ECLASS_MISC. */
2163
2164 if (ec != ECLASS_MISC) {
2165
2166 VG_(debugLog)(2, "transtab",
2167 " FAST, ec = %d\n", ec);
2168
2169 /* Fast scheme */
2170 vg_assert(ec >= 0 && ec < ECLASS_MISC);
2171
2172 for (sno = 0; sno < n_sectors; sno++) {
2173 sec = §ors[sno];
2174 if (sec->tc == NULL)
2175 continue;
2176 anyDeleted |= delete_translations_in_sector_eclass(
2177 sec, sno, guest_start, range, ec,
2178 arch_host, endness_host
2179 );
2180 anyDeleted |= delete_translations_in_sector_eclass(
2181 sec, sno, guest_start, range, ECLASS_MISC,
2182 arch_host, endness_host
2183 );
2184 }
2185
2186 } else {
2187
2188 /* slow scheme */
2189
2190 VG_(debugLog)(2, "transtab",
2191 " SLOW, ec = %d\n", ec);
2192
2193 for (sno = 0; sno < n_sectors; sno++) {
2194 sec = §ors[sno];
2195 if (sec->tc == NULL)
2196 continue;
2197 anyDeleted |= delete_translations_in_sector(
2198 sec, sno, guest_start, range,
2199 arch_host, endness_host
2200 );
2201 }
2202
2203 }
2204
2205 if (anyDeleted)
2206 invalidateFastCache();
2207
2208 /* don't forget the no-redir cache */
2209 unredir_discard_translations( guest_start, range );
2210
2211 /* Post-deletion sanity check */
2212 if (VG_(clo_sanity_level) >= 4) {
2213 TTEno i;
2214 Bool sane = sanity_check_all_sectors();
2215 vg_assert(sane);
2216 /* But now, also check the requested address range isn't
2217 present anywhere. */
2218 for (sno = 0; sno < n_sectors; sno++) {
2219 sec = §ors[sno];
2220 if (sec->tc == NULL)
2221 continue;
2222 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2223 TTEntryH* tteH = &sec->ttH[i];
2224 if (tteH->status != InUse)
2225 continue;
2226 vg_assert(!overlaps( guest_start, range, tteH ));
2227 }
2228 }
2229 }
2230 }
2231
2232 /* Whether or not tools may discard translations. */
2233 Bool VG_(ok_to_discard_translations) = False;
2234
2235 /* This function is exported to tools which can use it to discard
2236 translations, provided it is safe to do so. */
VG_(discard_translations_safely)2237 void VG_(discard_translations_safely) ( Addr start, SizeT len,
2238 const HChar* who )
2239 {
2240 vg_assert(VG_(ok_to_discard_translations));
2241 VG_(discard_translations)(start, len, who);
2242 }
2243
2244 /*------------------------------------------------------------*/
2245 /*--- AUXILIARY: the unredirected TT/TC ---*/
2246 /*------------------------------------------------------------*/
2247
2248 /* A very simple translation cache which holds a small number of
2249 unredirected translations. This is completely independent of the
2250 main tt/tc structures. When unredir_tc or unredir_tt becomes full,
2251 both structures are simply dumped and we start over.
2252
2253 Since these translations are unredirected, the search key is (by
2254 definition) the first address entry in the .vge field. */
2255
2256 /* Sized to hold 500 translations of average size 1000 bytes. */
2257
2258 #define UNREDIR_SZB 1000
2259
2260 #define N_UNREDIR_TT 500
2261 #define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
2262
2263 typedef
2264 struct {
2265 VexGuestExtents vge;
2266 Addr hcode;
2267 Bool inUse;
2268 }
2269 UTCEntry;
2270
2271 /* We just allocate forwards in _tc, never deleting. */
2272 static ULong *unredir_tc;
2273 static Int unredir_tc_used = N_UNREDIR_TCQ;
2274
2275 /* Slots in _tt can come into use and out again (.inUse).
2276 Nevertheless _tt_highwater is maintained so that invalidations
2277 don't have to scan all the slots when only a few are in use.
2278 _tt_highwater holds the index of the highest ever allocated
2279 slot. */
2280 static UTCEntry unredir_tt[N_UNREDIR_TT];
2281 static Int unredir_tt_highwater;
2282
2283
init_unredir_tt_tc(void)2284 static void init_unredir_tt_tc ( void )
2285 {
2286 Int i;
2287 if (unredir_tc == NULL) {
2288 SysRes sres = VG_(am_mmap_anon_float_valgrind)
2289 ( N_UNREDIR_TT * UNREDIR_SZB );
2290 if (sr_isError(sres)) {
2291 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
2292 N_UNREDIR_TT * UNREDIR_SZB);
2293 /*NOTREACHED*/
2294 }
2295 unredir_tc = (ULong *)(Addr)sr_Res(sres);
2296 }
2297 unredir_tc_used = 0;
2298 for (i = 0; i < N_UNREDIR_TT; i++)
2299 unredir_tt[i].inUse = False;
2300 unredir_tt_highwater = -1;
2301 }
2302
2303 /* Do a sanity check; return False on failure. */
sanity_check_redir_tt_tc(void)2304 static Bool sanity_check_redir_tt_tc ( void )
2305 {
2306 Int i;
2307 if (unredir_tt_highwater < -1) return False;
2308 if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
2309
2310 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
2311 if (unredir_tt[i].inUse)
2312 return False;
2313
2314 if (unredir_tc_used < 0) return False;
2315 if (unredir_tc_used > N_UNREDIR_TCQ) return False;
2316
2317 return True;
2318 }
2319
2320
2321 /* Add an UNREDIRECTED translation of vge to TT/TC. The translation
2322 is temporarily in code[0 .. code_len-1].
2323 */
VG_(add_to_unredir_transtab)2324 void VG_(add_to_unredir_transtab)( const VexGuestExtents* vge,
2325 Addr entry,
2326 Addr code,
2327 UInt code_len )
2328 {
2329 Int i, j, code_szQ;
2330 HChar *srcP, *dstP;
2331
2332 vg_assert(sanity_check_redir_tt_tc());
2333
2334 /* This is the whole point: it's not redirected! */
2335 vg_assert(entry == vge->base[0]);
2336
2337 /* How many unredir_tt slots are needed */
2338 code_szQ = (code_len + 7) / 8;
2339
2340 /* Look for an empty unredir_tc slot */
2341 for (i = 0; i < N_UNREDIR_TT; i++)
2342 if (!unredir_tt[i].inUse)
2343 break;
2344
2345 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
2346 /* It's full; dump everything we currently have */
2347 init_unredir_tt_tc();
2348 i = 0;
2349 }
2350
2351 vg_assert(unredir_tc_used >= 0);
2352 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2353 vg_assert(code_szQ > 0);
2354 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
2355 vg_assert(i >= 0 && i < N_UNREDIR_TT);
2356 vg_assert(unredir_tt[i].inUse == False);
2357
2358 if (i > unredir_tt_highwater)
2359 unredir_tt_highwater = i;
2360
2361 dstP = (HChar*)&unredir_tc[unredir_tc_used];
2362 srcP = (HChar*)code;
2363 for (j = 0; j < code_len; j++)
2364 dstP[j] = srcP[j];
2365
2366 VG_(invalidate_icache)( dstP, code_len );
2367
2368 unredir_tt[i].inUse = True;
2369 unredir_tt[i].vge = *vge;
2370 unredir_tt[i].hcode = (Addr)dstP;
2371
2372 unredir_tc_used += code_szQ;
2373 vg_assert(unredir_tc_used >= 0);
2374 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2375
2376 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
2377 }
2378
VG_(search_unredir_transtab)2379 Bool VG_(search_unredir_transtab) ( /*OUT*/Addr* result,
2380 Addr guest_addr )
2381 {
2382 Int i;
2383 for (i = 0; i < N_UNREDIR_TT; i++) {
2384 if (!unredir_tt[i].inUse)
2385 continue;
2386 if (unredir_tt[i].vge.base[0] == guest_addr) {
2387 *result = unredir_tt[i].hcode;
2388 return True;
2389 }
2390 }
2391 return False;
2392 }
2393
unredir_discard_translations(Addr guest_start,ULong range)2394 static void unredir_discard_translations( Addr guest_start, ULong range )
2395 {
2396 Int i;
2397
2398 vg_assert(sanity_check_redir_tt_tc());
2399
2400 for (i = 0; i <= unredir_tt_highwater; i++) {
2401 if (unredir_tt[i].inUse) {
2402 /* Fake up a TTEntryH just so we can pass it to overlaps()
2403 without having to make a new version of overlaps() just for
2404 this rare case. */
2405 TTEntryH tmp_tteH;
2406 TTEntryH__from_VexGuestExtents( &tmp_tteH, &unredir_tt[i].vge );
2407 tmp_tteH.status = Empty; /* Completely irrelevant; pure paranoia. */
2408 if (overlaps( guest_start, range, &tmp_tteH )) {
2409 unredir_tt[i].inUse = False;
2410 }
2411 }
2412 }
2413 }
2414
2415
2416 /*------------------------------------------------------------*/
2417 /*--- Initialisation. ---*/
2418 /*------------------------------------------------------------*/
2419
VG_(init_tt_tc)2420 void VG_(init_tt_tc) ( void )
2421 {
2422 Int i, avg_codeszQ;
2423
2424 vg_assert(!init_done);
2425 init_done = True;
2426
2427 /* Otherwise lots of things go wrong... */
2428 vg_assert(sizeof(ULong) == 8);
2429 vg_assert(sizeof(TTEno) == sizeof(HTTno));
2430 vg_assert(sizeof(TTEno) == 2);
2431 vg_assert(N_TTES_PER_SECTOR <= N_HTTES_PER_SECTOR);
2432 vg_assert(N_HTTES_PER_SECTOR < INV_TTE);
2433 vg_assert(N_HTTES_PER_SECTOR < EC2TTE_DELETED);
2434 vg_assert(N_HTTES_PER_SECTOR < HTT_EMPTY);
2435 /* check fast cache entries really are 2 words long */
2436 vg_assert(sizeof(Addr) == sizeof(void*));
2437 vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
2438 /* check fast cache entries are packed back-to-back with no spaces */
2439 vg_assert(sizeof( VG_(tt_fast) )
2440 == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
2441 /* check fast cache is aligned as we requested. Not fatal if it
2442 isn't, but we might as well make sure. */
2443 vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
2444
2445 /* The TTEntryH size is critical for keeping the LLC miss rate down
2446 when doing a lot of discarding. Hence check it here. We also
2447 have a lot of TTEntryCs so let's check that too. */
2448 if (sizeof(HWord) == 8) {
2449 vg_assert(sizeof(TTEntryH) <= 32);
2450 vg_assert(sizeof(TTEntryC) <= 112);
2451 }
2452 else if (sizeof(HWord) == 4) {
2453 vg_assert(sizeof(TTEntryH) <= 20);
2454 # if defined(VGP_ppc32_linux) || defined(VGP_mips32_linux) \
2455 || defined(VGP_arm_linux)
2456 /* On PPC32, MIPS32, ARM32 platforms, alignof(ULong) == 8, so the
2457 structure is larger than on other 32 bit targets. */
2458 vg_assert(sizeof(TTEntryC) <= 96);
2459 # else
2460 vg_assert(sizeof(TTEntryC) <= 88);
2461 # endif
2462 }
2463 else {
2464 vg_assert(0);
2465 }
2466
2467 /* All the hassle about converting between TTEntryH and
2468 VexGuestExtents was so as to ensure the following. */
2469 vg_assert(sizeof(TTEntryH) == sizeof(VexGuestExtents));
2470
2471 if (VG_(clo_verbosity) > 2)
2472 VG_(message)(Vg_DebugMsg,
2473 "TT/TC: VG_(init_tt_tc) "
2474 "(startup of code management)\n");
2475
2476 /* Figure out how big each tc area should be. */
2477 if (VG_(clo_avg_transtab_entry_size) == 0)
2478 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8;
2479 else
2480 avg_codeszQ = (VG_(clo_avg_transtab_entry_size) + 7) / 8;
2481 tc_sector_szQ = N_TTES_PER_SECTOR * (1 + avg_codeszQ);
2482
2483 /* Ensure the calculated value is not way crazy. */
2484 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR);
2485 vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR);
2486
2487 n_sectors = VG_(clo_num_transtab_sectors);
2488 vg_assert(n_sectors >= MIN_N_SECTORS);
2489 vg_assert(n_sectors <= MAX_N_SECTORS);
2490
2491 /* Initialise the sectors, even the ones we aren't going to use.
2492 Set all fields to zero. */
2493 youngest_sector = 0;
2494 for (i = 0; i < MAX_N_SECTORS; i++)
2495 VG_(memset)(§ors[i], 0, sizeof(sectors[i]));
2496
2497 /* Initialise the sector_search_order hint table, including the
2498 entries we aren't going to use. */
2499 for (i = 0; i < MAX_N_SECTORS; i++)
2500 sector_search_order[i] = INV_SNO;
2501
2502 /* Initialise the fast cache. */
2503 invalidateFastCache();
2504
2505 /* and the unredir tt/tc */
2506 init_unredir_tt_tc();
2507
2508 if (VG_(clo_verbosity) > 2 || VG_(clo_stats)
2509 || VG_(debugLog_getLevel) () >= 2) {
2510 VG_(message)(Vg_DebugMsg,
2511 "TT/TC: cache: %s--avg-transtab-entry-size=%u, "
2512 "%stool provided default %u\n",
2513 VG_(clo_avg_transtab_entry_size) == 0 ? "ignoring " : "using ",
2514 VG_(clo_avg_transtab_entry_size),
2515 VG_(clo_avg_transtab_entry_size) == 0 ? "using " : "ignoring ",
2516 VG_(details).avg_translation_sizeB);
2517 VG_(message)(Vg_DebugMsg,
2518 "TT/TC: cache: %d sectors of %'d bytes each = %'d total TC\n",
2519 n_sectors, 8 * tc_sector_szQ,
2520 n_sectors * 8 * tc_sector_szQ );
2521 VG_(message)(Vg_DebugMsg,
2522 "TT/TC: table: %'d tables[%d] of C %'d + H %'d bytes each "
2523 "= %'d total TT\n",
2524 n_sectors, N_TTES_PER_SECTOR,
2525 (int)(N_TTES_PER_SECTOR * sizeof(TTEntryC)),
2526 (int)(N_TTES_PER_SECTOR * sizeof(TTEntryH)),
2527 (int)(n_sectors * N_TTES_PER_SECTOR
2528 * (sizeof(TTEntryC) + sizeof(TTEntryH))));
2529 VG_(message)(Vg_DebugMsg,
2530 "TT/TC: table: %d tt entries each = %'d total tt entries\n",
2531 N_TTES_PER_SECTOR, n_sectors * N_TTES_PER_SECTOR);
2532 VG_(message)(Vg_DebugMsg,
2533 "TT/TC: table: %d htt[%d] of %'d bytes each = %'d total HTT"
2534 " (htt[%d] %d%% max occup)\n",
2535 n_sectors, N_HTTES_PER_SECTOR,
2536 (int)(N_HTTES_PER_SECTOR * sizeof(TTEno)),
2537 (int)(n_sectors * N_HTTES_PER_SECTOR * sizeof(TTEno)),
2538 N_HTTES_PER_SECTOR, SECTOR_TT_LIMIT_PERCENT);
2539 }
2540
2541 if (0) {
2542 VG_(printf)("XXXX sizeof(VexGuestExtents) = %d\n",
2543 (Int)sizeof(VexGuestExtents));
2544 VG_(printf)("XXXX sizeof(InEdge) = %d\n", (Int)sizeof(InEdge));
2545 VG_(printf)("XXXX sizeof(OutEdge) = %d\n", (Int)sizeof(OutEdge));
2546 VG_(printf)("XXXX sizeof(InEdgeArr) = %d\n", (Int)sizeof(InEdgeArr));
2547 VG_(printf)("XXXX sizeof(OutEdgeArr) = %d\n", (Int)sizeof(OutEdgeArr));
2548 VG_(printf)("XXXX sizeof(TTEntryC) = %d\n", (Int)sizeof(TTEntryC));
2549 VG_(printf)("XXXX sizeof(TTEntryH) = %d\n", (Int)sizeof(TTEntryH));
2550 }
2551 }
2552
2553
2554 /*------------------------------------------------------------*/
2555 /*--- Printing out statistics. ---*/
2556 /*------------------------------------------------------------*/
2557
safe_idiv(ULong a,ULong b)2558 static Double safe_idiv( ULong a, ULong b )
2559 {
2560 return (b == 0 ? 0 : (Double)a / (Double)b);
2561 }
2562
VG_(get_bbs_translated)2563 UInt VG_(get_bbs_translated) ( void )
2564 {
2565 return n_in_count;
2566 }
2567
VG_(print_tt_tc_stats)2568 void VG_(print_tt_tc_stats) ( void )
2569 {
2570 VG_(message)(Vg_DebugMsg,
2571 " tt/tc: %'llu tt lookups requiring %'llu probes\n",
2572 n_full_lookups, n_lookup_probes );
2573 VG_(message)(Vg_DebugMsg,
2574 " tt/tc: %'llu fast-cache updates, %'llu flushes\n",
2575 n_fast_updates, n_fast_flushes );
2576
2577 VG_(message)(Vg_DebugMsg,
2578 " transtab: new %'llu "
2579 "(%'llu -> %'llu; ratio %3.1f) [%'llu scs] "
2580 "avg tce size %llu\n",
2581 n_in_count, n_in_osize, n_in_tsize,
2582 safe_idiv(n_in_tsize, n_in_osize),
2583 n_in_sc_count,
2584 n_in_tsize / (n_in_count ? n_in_count : 1));
2585 VG_(message)(Vg_DebugMsg,
2586 " transtab: dumped %'llu (%'llu -> ?" "?) "
2587 "(sectors recycled %'llu)\n",
2588 n_dump_count, n_dump_osize, n_sectors_recycled );
2589 VG_(message)(Vg_DebugMsg,
2590 " transtab: discarded %'llu (%'llu -> ?" "?)\n",
2591 n_disc_count, n_disc_osize );
2592
2593 if (DEBUG_TRANSTAB) {
2594 VG_(printf)("\n");
2595 for (EClassNo e = 0; e < ECLASS_N; e++) {
2596 VG_(printf)(" %4d", sectors[0].ec2tte_used[e]);
2597 if (e % 16 == 15)
2598 VG_(printf)("\n");
2599 }
2600 VG_(printf)("\n\n");
2601 }
2602 }
2603
2604 /*------------------------------------------------------------*/
2605 /*--- Printing out of profiling results. ---*/
2606 /*------------------------------------------------------------*/
2607
score(const TTEntryC * tteC)2608 static ULong score ( const TTEntryC* tteC )
2609 {
2610 return ((ULong)tteC->usage.prof.weight) * ((ULong)tteC->usage.prof.count);
2611 }
2612
VG_(get_SB_profile)2613 ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops )
2614 {
2615 SECno sno;
2616 Int r, s;
2617 ULong score_total;
2618 TTEno i;
2619
2620 /* First, compute the total weighted count, and find the top N
2621 ttes. tops contains pointers to the most-used n_tops blocks, in
2622 descending order (viz, tops[0] is the highest scorer). */
2623 for (s = 0; s < n_tops; s++) {
2624 tops[s].addr = 0;
2625 tops[s].score = 0;
2626 }
2627
2628 score_total = 0;
2629
2630 for (sno = 0; sno < n_sectors; sno++) {
2631 if (sectors[sno].tc == NULL)
2632 continue;
2633 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2634 if (sectors[sno].ttH[i].status != InUse)
2635 continue;
2636 score_total += score(§ors[sno].ttC[i]);
2637 /* Find the rank for sectors[sno].tt{C,H}[i]. */
2638 r = n_tops-1;
2639 while (True) {
2640 if (r == -1)
2641 break;
2642 if (tops[r].addr == 0) {
2643 r--;
2644 continue;
2645 }
2646 if ( score(§ors[sno].ttC[i]) > tops[r].score ) {
2647 r--;
2648 continue;
2649 }
2650 break;
2651 }
2652 r++;
2653 vg_assert(r >= 0 && r <= n_tops);
2654 /* This bb should be placed at r, and bbs above it shifted
2655 upwards one slot. */
2656 if (r < n_tops) {
2657 for (s = n_tops-1; s > r; s--)
2658 tops[s] = tops[s-1];
2659 tops[r].addr = sectors[sno].ttC[i].entry;
2660 tops[r].score = score( §ors[sno].ttC[i] );
2661 }
2662 }
2663 }
2664
2665 /* Now zero out all the counter fields, so that we can make
2666 multiple calls here and just get the values since the last call,
2667 each time, rather than values accumulated for the whole run. */
2668 for (sno = 0; sno < n_sectors; sno++) {
2669 if (sectors[sno].tc == NULL)
2670 continue;
2671 for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2672 if (sectors[sno].ttH[i].status != InUse)
2673 continue;
2674 sectors[sno].ttC[i].usage.prof.count = 0;
2675 }
2676 }
2677
2678 return score_total;
2679 }
2680
2681 /*--------------------------------------------------------------------*/
2682 /*--- end ---*/
2683 /*--------------------------------------------------------------------*/
2684