• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- LibHB: a library for implementing and checking               ---*/
4 /*--- the happens-before relationship in concurrent programs.      ---*/
5 /*---                                                 libhb_main.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of LibHB, a library for implementing and checking
10    the happens-before relationship in concurrent programs.
11 
12    Copyright (C) 2008-2012 OpenWorks Ltd
13       info@open-works.co.uk
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 
33 #include "pub_tool_basics.h"
34 #include "pub_tool_poolalloc.h"
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcbase.h"
37 #include "pub_tool_libcprint.h"
38 #include "pub_tool_mallocfree.h"
39 #include "pub_tool_wordfm.h"
40 #include "pub_tool_sparsewa.h"
41 #include "pub_tool_xarray.h"
42 #include "pub_tool_oset.h"
43 #include "pub_tool_threadstate.h"
44 #include "pub_tool_aspacemgr.h"
45 #include "pub_tool_execontext.h"
46 #include "pub_tool_errormgr.h"
47 #include "pub_tool_options.h"        // VG_(clo_stats)
48 #include "hg_basics.h"
49 #include "hg_wordset.h"
50 #include "hg_lock_n_thread.h"
51 #include "hg_errors.h"
52 
53 #include "libhb.h"
54 
55 
56 /////////////////////////////////////////////////////////////////
57 /////////////////////////////////////////////////////////////////
58 //                                                             //
59 // Debugging #defines                                          //
60 //                                                             //
61 /////////////////////////////////////////////////////////////////
62 /////////////////////////////////////////////////////////////////
63 
64 /* Check the sanity of shadow values in the core memory state
65    machine.  Change #if 0 to #if 1 to enable this. */
66 #if 0
67 #  define CHECK_MSM 1
68 #else
69 #  define CHECK_MSM 0
70 #endif
71 
72 
73 /* Check sanity (reference counts, etc) in the conflicting access
74    machinery.  Change #if 0 to #if 1 to enable this. */
75 #if 0
76 #  define CHECK_CEM 1
77 #else
78 #  define CHECK_CEM 0
79 #endif
80 
81 
82 /* Check sanity in the compressed shadow memory machinery,
83    particularly in its caching innards.  Unfortunately there's no
84    almost-zero-cost way to make them selectable at run time.  Hence
85    set the #if 0 to #if 1 and rebuild if you want them. */
86 #if 0
87 #  define CHECK_ZSM 1  /* do sanity-check CacheLine stuff */
88 #  define inline __attribute__((noinline))
89    /* probably want to ditch -fomit-frame-pointer too */
90 #else
91 #  define CHECK_ZSM 0   /* don't sanity-check CacheLine stuff */
92 #endif
93 
94 
95 /////////////////////////////////////////////////////////////////
96 /////////////////////////////////////////////////////////////////
97 //                                                             //
98 // data decls: VtsID                                           //
99 //                                                             //
100 /////////////////////////////////////////////////////////////////
101 /////////////////////////////////////////////////////////////////
102 
103 /* VtsIDs: Unique small-integer IDs for VTSs.  VtsIDs can't exceed 30
104    bits, since they have to be packed into the lowest 30 bits of an
105    SVal. */
106 typedef  UInt  VtsID;
107 #define VtsID_INVALID 0xFFFFFFFF
108 
109 
110 
111 /////////////////////////////////////////////////////////////////
112 /////////////////////////////////////////////////////////////////
113 //                                                             //
114 // data decls: SVal                                            //
115 //                                                             //
116 /////////////////////////////////////////////////////////////////
117 /////////////////////////////////////////////////////////////////
118 
119 typedef  ULong  SVal;
120 
121 /* This value has special significance to the implementation, and callers
122    may not store it in the shadow memory. */
123 #define SVal_INVALID (3ULL << 62)
124 
125 /* This is the default value for shadow memory.  Initially the shadow
126    memory contains no accessible areas and so all reads produce this
127    value.  TODO: make this caller-defineable. */
128 #define SVal_NOACCESS (2ULL << 62)
129 
130 
131 
132 /////////////////////////////////////////////////////////////////
133 /////////////////////////////////////////////////////////////////
134 //                                                             //
135 // data decls: ScalarTS                                        //
136 //                                                             //
137 /////////////////////////////////////////////////////////////////
138 /////////////////////////////////////////////////////////////////
139 
140 /* Scalar Timestamp.  We have to store a lot of these, so there is
141    some effort to make them as small as possible.  Logically they are
142    a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
143    We pack it into 64 bits by representing the Thr* using a ThrID, a
144    small integer (18 bits), and a 46 bit integer for the timestamp
145    number.  The 46/18 split is arbitary, but has the effect that
146    Helgrind can only handle programs that create 2^18 or fewer threads
147    over their entire lifetime, and have no more than 2^46 timestamp
148    ticks (synchronisation operations on the same thread).
149 
150    This doesn't seem like much of a limitation.  2^46 ticks is
151    7.06e+13, and if each tick (optimistically) takes the machine 1000
152    cycles to process, then the minimum time to process that many ticks
153    at a clock rate of 5 GHz is 162.9 days.  And that's doing nothing
154    but VTS ticks, which isn't realistic.
155 
156    NB1: SCALARTS_N_THRBITS must be 29 or lower.  The obvious limit is
157    32 since a ThrID is a UInt.  29 comes from the fact that
158    'Thr_n_RCEC', which records information about old accesses, packs
159    not only a ThrID but also 2+1 other bits (access size and
160    writeness) in a UInt, hence limiting size to 32-(2+1) == 29.
161 
162    NB2: thrid values are issued upwards from 1024, and values less
163    than that aren't valid.  This isn't per se necessary (any order
164    will do, so long as they are unique), but it does help ensure they
165    are less likely to get confused with the various other kinds of
166    small-integer thread ids drifting around (eg, TId).  See also NB5.
167 
168    NB3: this probably also relies on the fact that Thr's are never
169    deallocated -- they exist forever.  Hence the 1-1 mapping from
170    Thr's to thrid values (set up in Thr__new) persists forever.
171 
172    NB4: temp_max_sized_VTS is allocated at startup and never freed.
173    It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
174    ScalarTSs.  So we can't make SCALARTS_N_THRBITS too large without
175    making the memory use for this go sky-high.  With
176    SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
177    like an OK tradeoff.  If more than 256k threads need to be
178    supported, we could change SCALARTS_N_THRBITS to 20, which would
179    facilitate supporting 1 million threads at the cost of 8MB storage
180    for temp_max_sized_VTS.
181 
182    NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
183    ThrID == 0 to denote an empty Thr_n_RCEC record.  So ThrID == 0
184    must never be a valid ThrID.  Given NB2 that's OK.
185 */
186 #define SCALARTS_N_THRBITS 18  /* valid range: 11 to 29 inclusive */
187 
188 #define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
189 typedef
190    struct {
191       ThrID thrid : SCALARTS_N_THRBITS;
192       ULong tym   : SCALARTS_N_TYMBITS;
193    }
194    ScalarTS;
195 
196 #define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
197 
198 
199 
200 /////////////////////////////////////////////////////////////////
201 /////////////////////////////////////////////////////////////////
202 //                                                             //
203 // data decls: Filter                                          //
204 //                                                             //
205 /////////////////////////////////////////////////////////////////
206 /////////////////////////////////////////////////////////////////
207 
208 // baseline: 5, 9
209 #define FI_LINE_SZB_LOG2  5
210 #define FI_NUM_LINES_LOG2 10
211 
212 #define FI_LINE_SZB       (1 << FI_LINE_SZB_LOG2)
213 #define FI_NUM_LINES      (1 << FI_NUM_LINES_LOG2)
214 
215 #define FI_TAG_MASK        (~(Addr)(FI_LINE_SZB - 1))
216 #define FI_GET_TAG(_a)     ((_a) & FI_TAG_MASK)
217 
218 #define FI_GET_LINENO(_a)  ( ((_a) >> FI_LINE_SZB_LOG2) \
219                              & (Addr)(FI_NUM_LINES-1) )
220 
221 
222 /* In the lines, each 8 bytes are treated individually, and are mapped
223    to a UShort.  Regardless of endianness of the underlying machine,
224    bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
225    the highest address.
226 
227    Of each bit pair, the higher numbered bit is set if a R has been
228    seen, so the actual layout is:
229 
230    15 14             ...  01 00
231 
232    R  W  for addr+7  ...  R  W  for addr+0
233 
234    So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
235 */
236 
237 /* tags are separated from lines.  tags are Addrs and are
238    the base address of the line. */
239 typedef
240    struct {
241       UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
242    }
243    FiLine;
244 
245 typedef
246    struct {
247       Addr   tags[FI_NUM_LINES];
248       FiLine lines[FI_NUM_LINES];
249    }
250    Filter;
251 
252 
253 
254 /////////////////////////////////////////////////////////////////
255 /////////////////////////////////////////////////////////////////
256 //                                                             //
257 // data decls: Thr, ULong_n_EC                                 //
258 //                                                             //
259 /////////////////////////////////////////////////////////////////
260 /////////////////////////////////////////////////////////////////
261 
262 // Records stacks for H1 history mechanism (DRD-style)
263 typedef
264    struct { ULong ull; ExeContext* ec; }
265    ULong_n_EC;
266 
267 
268 /* How many of the above records to collect for each thread?  Older
269    ones are dumped when we run out of space.  62.5k requires 1MB per
270    thread, since each ULong_n_EC record is 16 bytes long.  When more
271    than N_KWs_N_STACKs_PER_THREAD are present, the older half are
272    deleted to make space.  Hence in the worst case we will be able to
273    produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
274    Kw transitions (segments in this thread).  For the current setting
275    that gives a guaranteed stack for at least the last 31.25k
276    segments. */
277 #define N_KWs_N_STACKs_PER_THREAD 62500
278 
279 
280 struct _Thr {
281    /* Current VTSs for this thread.  They change as we go along.  viR
282       is the VTS to be used for reads, viW for writes.  Usually they
283       are the same, but can differ when we deal with reader-writer
284       locks.  It is always the case that
285          VtsID__cmpLEQ(viW,viR) == True
286       that is, viW must be the same, or lagging behind, viR. */
287    VtsID viR;
288    VtsID viW;
289 
290    /* Is initially False, and is set to True after the thread really
291       has done a low-level exit.  When True, we expect to never see
292       any more memory references done by this thread. */
293    Bool llexit_done;
294 
295    /* Is initially False, and is set to True after the thread has been
296       joined with (reaped by some other thread).  After this point, we
297       do not expect to see any uses of .viR or .viW, so it is safe to
298       set them to VtsID_INVALID. */
299    Bool joinedwith_done;
300 
301    /* A small integer giving a unique identity to this Thr.  See
302       comments on the definition of ScalarTS for details. */
303    ThrID thrid : SCALARTS_N_THRBITS;
304 
305    /* A filter that removes references for which we believe that
306       msmcread/msmcwrite will not change the state, nor report a
307       race. */
308    Filter* filter;
309 
310    /* A pointer back to the top level Thread structure.  There is a
311       1-1 mapping between Thread and Thr structures -- each Thr points
312       at its corresponding Thread, and vice versa.  Really, Thr and
313       Thread should be merged into a single structure. */
314    Thread* hgthread;
315 
316    /* The ULongs (scalar Kws) in this accumulate in strictly
317       increasing order, without duplicates.  This is important because
318       we need to be able to find a given scalar Kw in this array
319       later, by binary search. */
320    XArray* /* ULong_n_EC */ local_Kws_n_stacks;
321 };
322 
323 
324 
325 /////////////////////////////////////////////////////////////////
326 /////////////////////////////////////////////////////////////////
327 //                                                             //
328 // data decls: SO                                              //
329 //                                                             //
330 /////////////////////////////////////////////////////////////////
331 /////////////////////////////////////////////////////////////////
332 
333 // (UInt) `echo "Synchronisation object" | md5sum`
334 #define SO_MAGIC 0x56b3c5b0U
335 
336 struct _SO {
337    struct _SO* admin_prev;
338    struct _SO* admin_next;
339    VtsID viR; /* r-clock of sender */
340    VtsID viW; /* w-clock of sender */
341    UInt  magic;
342 };
343 
344 
345 
346 /////////////////////////////////////////////////////////////////
347 /////////////////////////////////////////////////////////////////
348 //                                                             //
349 // Forward declarations                                        //
350 //                                                             //
351 /////////////////////////////////////////////////////////////////
352 /////////////////////////////////////////////////////////////////
353 
354 /* fwds for
355    Globals needed by other parts of the library.  These are set
356    once at startup and then never changed. */
357 static void        (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
358 static ExeContext* (*main_get_EC)( Thr* ) = NULL;
359 
360 /* misc fn and data fwdses */
361 static void VtsID__rcinc ( VtsID ii );
362 static void VtsID__rcdec ( VtsID ii );
363 
364 static inline Bool SVal__isC ( SVal s );
365 static inline VtsID SVal__unC_Rmin ( SVal s );
366 static inline VtsID SVal__unC_Wmin ( SVal s );
367 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
368 
369 /* A double linked list of all the SO's. */
370 SO* admin_SO;
371 
372 
373 
374 /////////////////////////////////////////////////////////////////
375 /////////////////////////////////////////////////////////////////
376 //                                                             //
377 // SECTION BEGIN compressed shadow memory                      //
378 //                                                             //
379 /////////////////////////////////////////////////////////////////
380 /////////////////////////////////////////////////////////////////
381 
382 #ifndef __HB_ZSM_H
383 #define __HB_ZSM_H
384 
385 /* Initialise the library.  Once initialised, it will (or may) call
386    rcinc and rcdec in response to all the calls below, in order to
387    allow the user to do reference counting on the SVals stored herein.
388    It is important to understand, however, that due to internal
389    caching, the reference counts are in general inaccurate, and can be
390    both above or below the true reference count for an item.  In
391    particular, the library may indicate that the reference count for
392    an item is zero, when in fact it is not.
393 
394    To make the reference counting exact and therefore non-pointless,
395    call zsm_flush_cache.  Immediately after it returns, the reference
396    counts for all items, as deduced by the caller by observing calls
397    to rcinc and rcdec, will be correct, and so any items with a zero
398    reference count may be freed (or at least considered to be
399    unreferenced by this library).
400 */
401 static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
402 
403 static void zsm_sset_range  ( Addr, SizeT, SVal );
404 static void zsm_scopy_range ( Addr, Addr, SizeT );
405 static void zsm_flush_cache ( void );
406 
407 #endif /* ! __HB_ZSM_H */
408 
409 
410 /* Round a up to the next multiple of N.  N must be a power of 2 */
411 #define ROUNDUP(a, N)   ((a + N - 1) & ~(N-1))
412 /* Round a down to the next multiple of N.  N must be a power of 2 */
413 #define ROUNDDN(a, N)   ((a) & ~(N-1))
414 
415 
416 
417 /* ------ User-supplied RC functions ------ */
418 static void(*rcinc)(SVal) = NULL;
419 static void(*rcdec)(SVal) = NULL;
420 
421 
422 /* ------ CacheLine ------ */
423 
424 #define N_LINE_BITS      6 /* must be >= 3 */
425 #define N_LINE_ARANGE    (1 << N_LINE_BITS)
426 #define N_LINE_TREES     (N_LINE_ARANGE >> 3)
427 
428 typedef
429    struct {
430       UShort descrs[N_LINE_TREES];
431       SVal   svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
432    }
433    CacheLine;
434 
435 #define TREE_DESCR_16_0 (1<<0)
436 #define TREE_DESCR_32_0 (1<<1)
437 #define TREE_DESCR_16_1 (1<<2)
438 #define TREE_DESCR_64   (1<<3)
439 #define TREE_DESCR_16_2 (1<<4)
440 #define TREE_DESCR_32_1 (1<<5)
441 #define TREE_DESCR_16_3 (1<<6)
442 #define TREE_DESCR_8_0  (1<<7)
443 #define TREE_DESCR_8_1  (1<<8)
444 #define TREE_DESCR_8_2  (1<<9)
445 #define TREE_DESCR_8_3  (1<<10)
446 #define TREE_DESCR_8_4  (1<<11)
447 #define TREE_DESCR_8_5  (1<<12)
448 #define TREE_DESCR_8_6  (1<<13)
449 #define TREE_DESCR_8_7  (1<<14)
450 #define TREE_DESCR_DTY  (1<<15)
451 
452 typedef
453    struct {
454       SVal  dict[4]; /* can represent up to 4 diff values in the line */
455       UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
456                                       dict indexes */
457       /* if dict[0] == SVal_INVALID then dict[1] is the index of the
458          LineF to use, and dict[2..] are also SVal_INVALID. */
459    }
460    LineZ; /* compressed rep for a cache line */
461 
462 typedef
463    struct {
464       Bool inUse;
465       SVal w64s[N_LINE_ARANGE];
466    }
467    LineF; /* full rep for a cache line */
468 
469 /* Shadow memory.
470    Primary map is a WordFM Addr SecMap*.
471    SecMaps cover some page-size-ish section of address space and hold
472      a compressed representation.
473    CacheLine-sized chunks of SecMaps are copied into a Cache, being
474    decompressed when moved into the cache and recompressed on the
475    way out.  Because of this, the cache must operate as a writeback
476    cache, not a writethrough one.
477 
478    Each SecMap must hold a power-of-2 number of CacheLines.  Hence
479    N_SECMAP_BITS must >= N_LINE_BITS.
480 */
481 #define N_SECMAP_BITS   13
482 #define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
483 
484 // # CacheLines held by a SecMap
485 #define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
486 
487 /* The data in the SecMap is held in the array of LineZs.  Each LineZ
488    either carries the required data directly, in a compressed
489    representation, or it holds (in .dict[0]) an index to the LineF in
490    .linesF that holds the full representation.
491 
492    Currently-unused LineF's have their .inUse bit set to zero.
493    Since each in-use LineF is referred to be exactly one LineZ,
494    the number of .linesZ[] that refer to .linesF should equal
495    the number of .linesF[] that have .inUse == True.
496 
497    RC obligations: the RCs presented to the user include exactly
498    the values in:
499    * direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
500    * F reps that are in use (.inUse == True)
501 
502    Hence the following actions at the following transitions are required:
503 
504    F rep: .inUse==True  -> .inUse==False        -- rcdec_LineF
505    F rep: .inUse==False -> .inUse==True         -- rcinc_LineF
506    Z rep: .dict[0] from other to SVal_INVALID   -- rcdec_LineZ
507    Z rep: .dict[0] from SVal_INVALID to other   -- rcinc_LineZ
508 */
509 typedef
510    struct {
511       UInt   magic;
512       LineZ  linesZ[N_SECMAP_ZLINES];
513       LineF* linesF;
514       UInt   linesF_size;
515    }
516    SecMap;
517 
518 #define SecMap_MAGIC   0x571e58cbU
519 
is_sane_SecMap(SecMap * sm)520 static inline Bool is_sane_SecMap ( SecMap* sm ) {
521    return sm != NULL && sm->magic == SecMap_MAGIC;
522 }
523 
524 /* ------ Cache ------ */
525 
526 #define N_WAY_BITS 16
527 #define N_WAY_NENT (1 << N_WAY_BITS)
528 
529 /* Each tag is the address of the associated CacheLine, rounded down
530    to a CacheLine address boundary.  A CacheLine size must be a power
531    of 2 and must be 8 or more.  Hence an easy way to initialise the
532    cache so it is empty is to set all the tag values to any value % 8
533    != 0, eg 1.  This means all queries in the cache initially miss.
534    It does however require us to detect and not writeback, any line
535    with a bogus tag. */
536 typedef
537    struct {
538       CacheLine lyns0[N_WAY_NENT];
539       Addr      tags0[N_WAY_NENT];
540    }
541    Cache;
542 
is_valid_scache_tag(Addr tag)543 static inline Bool is_valid_scache_tag ( Addr tag ) {
544    /* a valid tag should be naturally aligned to the start of
545       a CacheLine. */
546    return 0 == (tag & (N_LINE_ARANGE - 1));
547 }
548 
549 
550 /* --------- Primary data structures --------- */
551 
552 /* Shadow memory primary map */
553 static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
554 static Cache   cache_shmem;
555 
556 
557 static UWord stats__secmaps_search       = 0; // # SM finds
558 static UWord stats__secmaps_search_slow  = 0; // # SM lookupFMs
559 static UWord stats__secmaps_allocd       = 0; // # SecMaps issued
560 static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
561 static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
562 static UWord stats__secmap_linesZ_bytes  = 0; // .. using this much storage
563 static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
564 static UWord stats__secmap_linesF_bytes  = 0; //  .. using this much storage
565 static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
566 static UWord stats__cache_Z_fetches      = 0; // # Z lines fetched
567 static UWord stats__cache_Z_wbacks       = 0; // # Z lines written back
568 static UWord stats__cache_F_fetches      = 0; // # F lines fetched
569 static UWord stats__cache_F_wbacks       = 0; // # F lines written back
570 static UWord stats__cache_invals         = 0; // # cache invals
571 static UWord stats__cache_flushes        = 0; // # cache flushes
572 static UWord stats__cache_totrefs        = 0; // # total accesses
573 static UWord stats__cache_totmisses      = 0; // # misses
574 static ULong stats__cache_make_New_arange = 0; // total arange made New
575 static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
576 static UWord stats__cline_normalises     = 0; // # calls to cacheline_normalise
577 static UWord stats__cline_cread64s       = 0; // # calls to s_m_read64
578 static UWord stats__cline_cread32s       = 0; // # calls to s_m_read32
579 static UWord stats__cline_cread16s       = 0; // # calls to s_m_read16
580 static UWord stats__cline_cread08s       = 0; // # calls to s_m_read8
581 static UWord stats__cline_cwrite64s      = 0; // # calls to s_m_write64
582 static UWord stats__cline_cwrite32s      = 0; // # calls to s_m_write32
583 static UWord stats__cline_cwrite16s      = 0; // # calls to s_m_write16
584 static UWord stats__cline_cwrite08s      = 0; // # calls to s_m_write8
585 static UWord stats__cline_sread08s       = 0; // # calls to s_m_set8
586 static UWord stats__cline_swrite08s      = 0; // # calls to s_m_get8
587 static UWord stats__cline_swrite16s      = 0; // # calls to s_m_get8
588 static UWord stats__cline_swrite32s      = 0; // # calls to s_m_get8
589 static UWord stats__cline_swrite64s      = 0; // # calls to s_m_get8
590 static UWord stats__cline_scopy08s       = 0; // # calls to s_m_copy8
591 static UWord stats__cline_64to32splits   = 0; // # 64-bit accesses split
592 static UWord stats__cline_32to16splits   = 0; // # 32-bit accesses split
593 static UWord stats__cline_16to8splits    = 0; // # 16-bit accesses split
594 static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
595 static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
596 static UWord stats__cline_16to8pulldown  = 0; // # calls to pulldown_to_8
597 static UWord stats__vts__tick            = 0; // # calls to VTS__tick
598 static UWord stats__vts__join            = 0; // # calls to VTS__join
599 static UWord stats__vts__cmpLEQ          = 0; // # calls to VTS__cmpLEQ
600 static UWord stats__vts__cmp_structural  = 0; // # calls to VTS__cmp_structural
601 
602 // # calls to VTS__cmp_structural w/ slow case
603 static UWord stats__vts__cmp_structural_slow = 0;
604 
605 // # calls to VTS__indexAt_SLOW
606 static UWord stats__vts__indexat_slow = 0;
607 
608 // # calls to vts_set__find__or__clone_and_add
609 static UWord stats__vts_set__focaa    = 0;
610 
611 // # calls to vts_set__find__or__clone_and_add that lead to an
612 // allocation
613 static UWord stats__vts_set__focaa_a  = 0;
614 
615 
shmem__round_to_SecMap_base(Addr a)616 static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
617    return a & ~(N_SECMAP_ARANGE - 1);
618 }
shmem__get_SecMap_offset(Addr a)619 static inline UWord shmem__get_SecMap_offset ( Addr a ) {
620    return a & (N_SECMAP_ARANGE - 1);
621 }
622 
623 
624 /*----------------------------------------------------------------*/
625 /*--- map_shmem :: WordFM Addr SecMap                          ---*/
626 /*--- shadow memory (low level handlers) (shmem__* fns)        ---*/
627 /*----------------------------------------------------------------*/
628 
629 /*--------------- SecMap allocation --------------- */
630 
631 static HChar* shmem__bigchunk_next = NULL;
632 static HChar* shmem__bigchunk_end1 = NULL;
633 
shmem__bigchunk_alloc(SizeT n)634 static void* shmem__bigchunk_alloc ( SizeT n )
635 {
636    const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
637    tl_assert(n > 0);
638    n = VG_ROUNDUP(n, 16);
639    tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
640    tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
641              <= (SSizeT)sHMEM__BIGCHUNK_SIZE);
642    if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
643       if (0)
644       VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
645                   (Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
646       shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
647       if (shmem__bigchunk_next == NULL)
648          VG_(out_of_memory_NORETURN)(
649             "helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
650       shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
651    }
652    tl_assert(shmem__bigchunk_next);
653    tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
654    tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
655    shmem__bigchunk_next += n;
656    return shmem__bigchunk_next - n;
657 }
658 
shmem__alloc_SecMap(void)659 static SecMap* shmem__alloc_SecMap ( void )
660 {
661    Word    i, j;
662    SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
663    if (0) VG_(printf)("alloc_SecMap %p\n",sm);
664    tl_assert(sm);
665    sm->magic = SecMap_MAGIC;
666    for (i = 0; i < N_SECMAP_ZLINES; i++) {
667       sm->linesZ[i].dict[0] = SVal_NOACCESS;
668       sm->linesZ[i].dict[1] = SVal_INVALID;
669       sm->linesZ[i].dict[2] = SVal_INVALID;
670       sm->linesZ[i].dict[3] = SVal_INVALID;
671       for (j = 0; j < N_LINE_ARANGE/4; j++)
672          sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
673    }
674    sm->linesF      = NULL;
675    sm->linesF_size = 0;
676    stats__secmaps_allocd++;
677    stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
678    stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
679    stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
680    return sm;
681 }
682 
683 typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
684 static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
685 
shmem__find_SecMap(Addr ga)686 static SecMap* shmem__find_SecMap ( Addr ga )
687 {
688    SecMap* sm    = NULL;
689    Addr    gaKey = shmem__round_to_SecMap_base(ga);
690    // Cache
691    stats__secmaps_search++;
692    if (LIKELY(gaKey == smCache[0].gaKey))
693       return smCache[0].sm;
694    if (LIKELY(gaKey == smCache[1].gaKey)) {
695       SMCacheEnt tmp = smCache[0];
696       smCache[0] = smCache[1];
697       smCache[1] = tmp;
698       return smCache[0].sm;
699    }
700    if (gaKey == smCache[2].gaKey) {
701       SMCacheEnt tmp = smCache[1];
702       smCache[1] = smCache[2];
703       smCache[2] = tmp;
704       return smCache[1].sm;
705    }
706    // end Cache
707    stats__secmaps_search_slow++;
708    if (VG_(lookupFM)( map_shmem,
709                       NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
710       tl_assert(sm != NULL);
711       smCache[2] = smCache[1];
712       smCache[1] = smCache[0];
713       smCache[0].gaKey = gaKey;
714       smCache[0].sm    = sm;
715    } else {
716       tl_assert(sm == NULL);
717    }
718    return sm;
719 }
720 
shmem__find_or_alloc_SecMap(Addr ga)721 static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
722 {
723    SecMap* sm = shmem__find_SecMap ( ga );
724    if (LIKELY(sm)) {
725       return sm;
726    } else {
727       /* create a new one */
728       Addr gaKey = shmem__round_to_SecMap_base(ga);
729       sm = shmem__alloc_SecMap();
730       tl_assert(sm);
731       VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
732       return sm;
733    }
734 }
735 
736 
737 /* ------------ LineF and LineZ related ------------ */
738 
rcinc_LineF(LineF * lineF)739 static void rcinc_LineF ( LineF* lineF ) {
740    UWord i;
741    tl_assert(lineF->inUse);
742    for (i = 0; i < N_LINE_ARANGE; i++)
743       rcinc(lineF->w64s[i]);
744 }
745 
rcdec_LineF(LineF * lineF)746 static void rcdec_LineF ( LineF* lineF ) {
747    UWord i;
748    tl_assert(lineF->inUse);
749    for (i = 0; i < N_LINE_ARANGE; i++)
750       rcdec(lineF->w64s[i]);
751 }
752 
rcinc_LineZ(LineZ * lineZ)753 static void rcinc_LineZ ( LineZ* lineZ ) {
754    tl_assert(lineZ->dict[0] != SVal_INVALID);
755    rcinc(lineZ->dict[0]);
756    if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
757    if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
758    if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
759 }
760 
rcdec_LineZ(LineZ * lineZ)761 static void rcdec_LineZ ( LineZ* lineZ ) {
762    tl_assert(lineZ->dict[0] != SVal_INVALID);
763    rcdec(lineZ->dict[0]);
764    if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
765    if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
766    if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
767 }
768 
769 inline
write_twobit_array(UChar * arr,UWord ix,UWord b2)770 static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
771    Word bix, shft, mask, prep;
772    tl_assert(ix >= 0);
773    bix  = ix >> 2;
774    shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
775    mask = 3 << shft;
776    prep = b2 << shft;
777    arr[bix] = (arr[bix] & ~mask) | prep;
778 }
779 
780 inline
read_twobit_array(UChar * arr,UWord ix)781 static UWord read_twobit_array ( UChar* arr, UWord ix ) {
782    Word bix, shft;
783    tl_assert(ix >= 0);
784    bix  = ix >> 2;
785    shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
786    return (arr[bix] >> shft) & 3;
787 }
788 
789 /* Given address 'tag', find either the Z or F line containing relevant
790    data, so it can be read into the cache.
791 */
find_ZF_for_reading(LineZ ** zp,LineF ** fp,Addr tag)792 static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
793                                   /*OUT*/LineF** fp, Addr tag ) {
794    LineZ* lineZ;
795    LineF* lineF;
796    UWord   zix;
797    SecMap* sm    = shmem__find_or_alloc_SecMap(tag);
798    UWord   smoff = shmem__get_SecMap_offset(tag);
799    /* since smoff is derived from a valid tag, it should be
800       cacheline-aligned. */
801    tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
802    zix = smoff >> N_LINE_BITS;
803    tl_assert(zix < N_SECMAP_ZLINES);
804    lineZ = &sm->linesZ[zix];
805    lineF = NULL;
806    if (lineZ->dict[0] == SVal_INVALID) {
807       UInt fix = (UInt)lineZ->dict[1];
808       tl_assert(sm->linesF);
809       tl_assert(sm->linesF_size > 0);
810       tl_assert(fix >= 0 && fix < sm->linesF_size);
811       lineF = &sm->linesF[fix];
812       tl_assert(lineF->inUse);
813       lineZ = NULL;
814    }
815    *zp = lineZ;
816    *fp = lineF;
817 }
818 
819 /* Given address 'tag', return the relevant SecMap and the index of
820    the LineZ within it, in the expectation that the line is to be
821    overwritten.  Regardless of whether 'tag' is currently associated
822    with a Z or F representation, to rcdec on the current
823    representation, in recognition of the fact that the contents are
824    just about to be overwritten. */
825 static __attribute__((noinline))
find_Z_for_writing(SecMap ** smp,Word * zixp,Addr tag)826 void find_Z_for_writing ( /*OUT*/SecMap** smp,
827                           /*OUT*/Word* zixp,
828                           Addr tag ) {
829    LineZ* lineZ;
830    LineF* lineF;
831    UWord   zix;
832    SecMap* sm    = shmem__find_or_alloc_SecMap(tag);
833    UWord   smoff = shmem__get_SecMap_offset(tag);
834    /* since smoff is derived from a valid tag, it should be
835       cacheline-aligned. */
836    tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
837    zix = smoff >> N_LINE_BITS;
838    tl_assert(zix < N_SECMAP_ZLINES);
839    lineZ = &sm->linesZ[zix];
840    lineF = NULL;
841    /* re RCs, we are freeing up this LineZ/LineF so that new data can
842       be parked in it.  Hence have to rcdec it accordingly. */
843    /* If lineZ has an associated lineF, free it up. */
844    if (lineZ->dict[0] == SVal_INVALID) {
845       UInt fix = (UInt)lineZ->dict[1];
846       tl_assert(sm->linesF);
847       tl_assert(sm->linesF_size > 0);
848       tl_assert(fix >= 0 && fix < sm->linesF_size);
849       lineF = &sm->linesF[fix];
850       tl_assert(lineF->inUse);
851       rcdec_LineF(lineF);
852       lineF->inUse = False;
853    } else {
854       rcdec_LineZ(lineZ);
855    }
856    *smp  = sm;
857    *zixp = zix;
858 }
859 
860 static __attribute__((noinline))
alloc_F_for_writing(SecMap * sm,Word * fixp)861 void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
862    UInt        i, new_size;
863    LineF* nyu;
864 
865    if (sm->linesF) {
866       tl_assert(sm->linesF_size > 0);
867    } else {
868       tl_assert(sm->linesF_size == 0);
869    }
870 
871    if (sm->linesF) {
872       for (i = 0; i < sm->linesF_size; i++) {
873          if (!sm->linesF[i].inUse) {
874             *fixp = (Word)i;
875             return;
876          }
877       }
878    }
879 
880    /* No free F line found.  Expand existing array and try again. */
881    new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
882    nyu      = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
883                            new_size * sizeof(LineF) );
884    tl_assert(nyu);
885 
886    stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
887    stats__secmap_linesF_bytes  += (new_size - sm->linesF_size)
888                                   * sizeof(LineF);
889 
890    if (0)
891    VG_(printf)("SM %p: expand F array from %d to %d\n",
892                sm, (Int)sm->linesF_size, new_size);
893 
894    for (i = 0; i < new_size; i++)
895       nyu[i].inUse = False;
896 
897    if (sm->linesF) {
898       for (i = 0; i < sm->linesF_size; i++) {
899          tl_assert(sm->linesF[i].inUse);
900          nyu[i] = sm->linesF[i];
901       }
902       VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
903       HG_(free)(sm->linesF);
904    }
905 
906    sm->linesF      = nyu;
907    sm->linesF_size = new_size;
908 
909    for (i = 0; i < sm->linesF_size; i++) {
910       if (!sm->linesF[i].inUse) {
911          *fixp = (Word)i;
912          return;
913       }
914     }
915 
916     /*NOTREACHED*/
917     tl_assert(0);
918 }
919 
920 
921 /* ------------ CacheLine and implicit-tree related ------------ */
922 
923 __attribute__((unused))
pp_CacheLine(CacheLine * cl)924 static void pp_CacheLine ( CacheLine* cl ) {
925    Word i;
926    if (!cl) {
927       VG_(printf)("%s","pp_CacheLine(NULL)\n");
928       return;
929    }
930    for (i = 0; i < N_LINE_TREES; i++)
931       VG_(printf)("   descr: %04lx\n", (UWord)cl->descrs[i]);
932    for (i = 0; i < N_LINE_ARANGE; i++)
933       VG_(printf)("    sval: %08lx\n", (UWord)cl->svals[i]);
934 }
935 
descr_to_validbits(UShort descr)936 static UChar descr_to_validbits ( UShort descr )
937 {
938    /* a.k.a Party Time for gcc's constant folder */
939 #  define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
940                 b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0)  \
941              ( (UShort) ( ( (b8_7)  << 14) | ( (b8_6)  << 13) | \
942                           ( (b8_5)  << 12) | ( (b8_4)  << 11) | \
943                           ( (b8_3)  << 10) | ( (b8_2)  << 9)  | \
944                           ( (b8_1)  << 8)  | ( (b8_0)  << 7)  | \
945                           ( (b16_3) << 6)  | ( (b32_1) << 5)  | \
946                           ( (b16_2) << 4)  | ( (b64)   << 3)  | \
947                           ( (b16_1) << 2)  | ( (b32_0) << 1)  | \
948                           ( (b16_0) << 0) ) )
949 
950 #  define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
951              ( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
952                          ( (bit5) << 5) | ( (bit4) << 4) | \
953                          ( (bit3) << 3) | ( (bit2) << 2) | \
954                          ( (bit1) << 1) | ( (bit0) << 0) ) )
955 
956    /* these should all get folded out at compile time */
957    tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
958    tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
959    tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
960    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
961    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
962    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
963    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
964    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
965    tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
966 
967    switch (descr) {
968    /*
969               +--------------------------------- TREE_DESCR_8_7
970               |             +------------------- TREE_DESCR_8_0
971               |             |  +---------------- TREE_DESCR_16_3
972               |             |  | +-------------- TREE_DESCR_32_1
973               |             |  | | +------------ TREE_DESCR_16_2
974               |             |  | | |  +--------- TREE_DESCR_64
975               |             |  | | |  |  +------ TREE_DESCR_16_1
976               |             |  | | |  |  | +---- TREE_DESCR_32_0
977               |             |  | | |  |  | | +-- TREE_DESCR_16_0
978               |             |  | | |  |  | | |
979               |             |  | | |  |  | | |   GRANULARITY, 7 -> 0 */
980    case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8  8 8 8 8 */
981                                                  return BYTE(1,1,1,1,1,1,1,1);
982    case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16   8 8 8 8 */
983                                                  return BYTE(1,1,0,1,1,1,1,1);
984    case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16  8 8  8 8 8 8 */
985                                                  return BYTE(0,1,1,1,1,1,1,1);
986    case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16  16   8 8 8 8 */
987                                                  return BYTE(0,1,0,1,1,1,1,1);
988 
989    case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8  8 8 16 */
990                                                  return BYTE(1,1,1,1,1,1,0,1);
991    case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16   8 8 16 */
992                                                  return BYTE(1,1,0,1,1,1,0,1);
993    case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16  8 8  8 8 16 */
994                                                  return BYTE(0,1,1,1,1,1,0,1);
995    case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16  16   8 8 16 */
996                                                  return BYTE(0,1,0,1,1,1,0,1);
997 
998    case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8  16 8 8 */
999                                                  return BYTE(1,1,1,1,0,1,1,1);
1000    case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16   16 8 8 */
1001                                                  return BYTE(1,1,0,1,0,1,1,1);
1002    case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16  8 8  16 8 8 */
1003                                                  return BYTE(0,1,1,1,0,1,1,1);
1004    case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16  16   16 8 8 */
1005                                                  return BYTE(0,1,0,1,0,1,1,1);
1006 
1007    case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8  16 16 */
1008                                                  return BYTE(1,1,1,1,0,1,0,1);
1009    case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16   16 16 */
1010                                                  return BYTE(1,1,0,1,0,1,0,1);
1011    case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16  8 8  16 16 */
1012                                                  return BYTE(0,1,1,1,0,1,0,1);
1013    case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16  16   16 16 */
1014                                                  return BYTE(0,1,0,1,0,1,0,1);
1015 
1016    case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32  8 8 8 8 */
1017                                                  return BYTE(0,0,0,1,1,1,1,1);
1018    case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32  8 8 16  */
1019                                                  return BYTE(0,0,0,1,1,1,0,1);
1020    case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32  16  8 8 */
1021                                                  return BYTE(0,0,0,1,0,1,1,1);
1022    case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32  16  16  */
1023                                                  return BYTE(0,0,0,1,0,1,0,1);
1024 
1025    case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8  32 */
1026                                                  return BYTE(1,1,1,1,0,0,0,1);
1027    case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16   32 */
1028                                                  return BYTE(1,1,0,1,0,0,0,1);
1029    case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16  8 8  32 */
1030                                                  return BYTE(0,1,1,1,0,0,0,1);
1031    case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16  16   32 */
1032                                                  return BYTE(0,1,0,1,0,0,0,1);
1033 
1034    case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
1035                                                  return BYTE(0,0,0,1,0,0,0,1);
1036 
1037    case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
1038                                                  return BYTE(0,0,0,0,0,0,0,1);
1039 
1040    default: return BYTE(0,0,0,0,0,0,0,0);
1041                    /* INVALID - any valid descr produces at least one
1042                       valid bit in tree[0..7]*/
1043    }
1044    /* NOTREACHED*/
1045    tl_assert(0);
1046 
1047 #  undef DESCR
1048 #  undef BYTE
1049 }
1050 
1051 __attribute__((unused))
is_sane_Descr(UShort descr)1052 static Bool is_sane_Descr ( UShort descr ) {
1053    return descr_to_validbits(descr) != 0;
1054 }
1055 
sprintf_Descr(HChar * dst,UShort descr)1056 static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
1057    VG_(sprintf)(dst,
1058                 "%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
1059                 (Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
1060                 (Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
1061                 (Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
1062                 (Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
1063                 (Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
1064                 (Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
1065                 (Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
1066                 (Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
1067                 (Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
1068                 (Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
1069                 (Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
1070                 (Int)((descr & TREE_DESCR_64)   ? 1 : 0),
1071                 (Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
1072                 (Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
1073                 (Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
1074    );
1075 }
sprintf_Byte(HChar * dst,UChar byte)1076 static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
1077    VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
1078                      (Int)((byte & 128) ? 1 : 0),
1079                      (Int)((byte &  64) ? 1 : 0),
1080                      (Int)((byte &  32) ? 1 : 0),
1081                      (Int)((byte &  16) ? 1 : 0),
1082                      (Int)((byte &   8) ? 1 : 0),
1083                      (Int)((byte &   4) ? 1 : 0),
1084                      (Int)((byte &   2) ? 1 : 0),
1085                      (Int)((byte &   1) ? 1 : 0)
1086    );
1087 }
1088 
is_sane_Descr_and_Tree(UShort descr,SVal * tree)1089 static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
1090    Word  i;
1091    UChar validbits = descr_to_validbits(descr);
1092    HChar buf[128], buf2[128];
1093    if (validbits == 0)
1094       goto bad;
1095    for (i = 0; i < 8; i++) {
1096       if (validbits & (1<<i)) {
1097          if (tree[i] == SVal_INVALID)
1098             goto bad;
1099       } else {
1100          if (tree[i] != SVal_INVALID)
1101             goto bad;
1102       }
1103    }
1104    return True;
1105   bad:
1106    sprintf_Descr( buf, descr );
1107    sprintf_Byte( buf2, validbits );
1108    VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
1109    VG_(printf)("   validbits 0x%02lx    %s\n", (UWord)validbits, buf2);
1110    VG_(printf)("       descr 0x%04lx  %s\n", (UWord)descr, buf);
1111    for (i = 0; i < 8; i++)
1112       VG_(printf)("   [%ld] 0x%016llx\n", i, tree[i]);
1113    VG_(printf)("%s","}\n");
1114    return 0;
1115 }
1116 
is_sane_CacheLine(CacheLine * cl)1117 static Bool is_sane_CacheLine ( CacheLine* cl )
1118 {
1119    Word tno, cloff;
1120 
1121    if (!cl) goto bad;
1122 
1123    for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
1124       UShort descr = cl->descrs[tno];
1125       SVal*  tree  = &cl->svals[cloff];
1126       if (!is_sane_Descr_and_Tree(descr, tree))
1127          goto bad;
1128    }
1129    tl_assert(cloff == N_LINE_ARANGE);
1130    return True;
1131   bad:
1132    pp_CacheLine(cl);
1133    return False;
1134 }
1135 
normalise_tree(SVal * tree)1136 static UShort normalise_tree ( /*MOD*/SVal* tree )
1137 {
1138    UShort descr;
1139    /* pre: incoming tree[0..7] does not have any invalid shvals, in
1140       particular no zeroes. */
1141    if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
1142                 || tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
1143                 || tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
1144                 || tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
1145       tl_assert(0);
1146 
1147    descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
1148            | TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
1149            | TREE_DESCR_8_1 | TREE_DESCR_8_0;
1150    /* build 16-bit layer */
1151    if (tree[1] == tree[0]) {
1152       tree[1] = SVal_INVALID;
1153       descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
1154       descr |= TREE_DESCR_16_0;
1155    }
1156    if (tree[3] == tree[2]) {
1157       tree[3] = SVal_INVALID;
1158       descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
1159       descr |= TREE_DESCR_16_1;
1160    }
1161    if (tree[5] == tree[4]) {
1162       tree[5] = SVal_INVALID;
1163       descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
1164       descr |= TREE_DESCR_16_2;
1165    }
1166    if (tree[7] == tree[6]) {
1167       tree[7] = SVal_INVALID;
1168       descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
1169       descr |= TREE_DESCR_16_3;
1170    }
1171    /* build 32-bit layer */
1172    if (tree[2] == tree[0]
1173        && (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
1174       tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
1175       descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
1176       descr |= TREE_DESCR_32_0;
1177    }
1178    if (tree[6] == tree[4]
1179        && (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
1180       tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
1181       descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
1182       descr |= TREE_DESCR_32_1;
1183    }
1184    /* build 64-bit layer */
1185    if (tree[4] == tree[0]
1186        && (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
1187       tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
1188       descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
1189       descr |= TREE_DESCR_64;
1190    }
1191    return descr;
1192 }
1193 
1194 /* This takes a cacheline where all the data is at the leaves
1195    (w8[..]) and builds a correctly normalised tree. */
normalise_CacheLine(CacheLine * cl)1196 static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
1197 {
1198    Word tno, cloff;
1199    for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
1200       SVal* tree = &cl->svals[cloff];
1201       cl->descrs[tno] = normalise_tree( tree );
1202    }
1203    tl_assert(cloff == N_LINE_ARANGE);
1204    if (CHECK_ZSM)
1205       tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1206    stats__cline_normalises++;
1207 }
1208 
1209 
1210 typedef struct { UChar count; SVal sval; } CountedSVal;
1211 
1212 static
sequentialise_CacheLine(CountedSVal * dst,Word * dstUsedP,Word nDst,CacheLine * src)1213 void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
1214                                /*OUT*/Word* dstUsedP,
1215                                Word nDst, CacheLine* src )
1216 {
1217    Word  tno, cloff, dstUsed;
1218 
1219    tl_assert(nDst == N_LINE_ARANGE);
1220    dstUsed = 0;
1221 
1222    for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
1223       UShort descr = src->descrs[tno];
1224       SVal*  tree  = &src->svals[cloff];
1225 
1226       /* sequentialise the tree described by (descr,tree). */
1227 #     define PUT(_n,_v)                                \
1228          do { dst[dstUsed  ].count = (_n);             \
1229               dst[dstUsed++].sval  = (_v);             \
1230          } while (0)
1231 
1232       /* byte 0 */
1233       if (descr & TREE_DESCR_64)   PUT(8, tree[0]); else
1234       if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
1235       if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
1236       if (descr & TREE_DESCR_8_0)  PUT(1, tree[0]);
1237       /* byte 1 */
1238       if (descr & TREE_DESCR_8_1)  PUT(1, tree[1]);
1239       /* byte 2 */
1240       if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
1241       if (descr & TREE_DESCR_8_2)  PUT(1, tree[2]);
1242       /* byte 3 */
1243       if (descr & TREE_DESCR_8_3)  PUT(1, tree[3]);
1244       /* byte 4 */
1245       if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
1246       if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
1247       if (descr & TREE_DESCR_8_4)  PUT(1, tree[4]);
1248       /* byte 5 */
1249       if (descr & TREE_DESCR_8_5)  PUT(1, tree[5]);
1250       /* byte 6 */
1251       if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
1252       if (descr & TREE_DESCR_8_6)  PUT(1, tree[6]);
1253       /* byte 7 */
1254       if (descr & TREE_DESCR_8_7)  PUT(1, tree[7]);
1255 
1256 #     undef PUT
1257       /* END sequentialise the tree described by (descr,tree). */
1258 
1259    }
1260    tl_assert(cloff == N_LINE_ARANGE);
1261    tl_assert(dstUsed <= nDst);
1262 
1263    *dstUsedP = dstUsed;
1264 }
1265 
1266 /* Write the cacheline 'wix' to backing store.  Where it ends up
1267    is determined by its tag field. */
cacheline_wback(UWord wix)1268 static __attribute__((noinline)) void cacheline_wback ( UWord wix )
1269 {
1270    Word        i, j, k, m;
1271    Addr        tag;
1272    SecMap*     sm;
1273    CacheLine*  cl;
1274    LineZ* lineZ;
1275    LineF* lineF;
1276    Word        zix, fix, csvalsUsed;
1277    CountedSVal csvals[N_LINE_ARANGE];
1278    SVal        sv;
1279 
1280    if (0)
1281    VG_(printf)("scache wback line %d\n", (Int)wix);
1282 
1283    tl_assert(wix >= 0 && wix < N_WAY_NENT);
1284 
1285    tag =  cache_shmem.tags0[wix];
1286    cl  = &cache_shmem.lyns0[wix];
1287 
1288    /* The cache line may have been invalidated; if so, ignore it. */
1289    if (!is_valid_scache_tag(tag))
1290       return;
1291 
1292    /* Where are we going to put it? */
1293    sm         = NULL;
1294    lineZ      = NULL;
1295    lineF      = NULL;
1296    zix = fix = -1;
1297 
1298    /* find the Z line to write in and rcdec it or the associated F
1299       line. */
1300    find_Z_for_writing( &sm, &zix, tag );
1301 
1302    tl_assert(sm);
1303    tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
1304    lineZ = &sm->linesZ[zix];
1305 
1306    /* Generate the data to be stored */
1307    if (CHECK_ZSM)
1308       tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1309 
1310    csvalsUsed = -1;
1311    sequentialise_CacheLine( csvals, &csvalsUsed,
1312                             N_LINE_ARANGE, cl );
1313    tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
1314    if (0) VG_(printf)("%lu ", csvalsUsed);
1315 
1316    lineZ->dict[0] = lineZ->dict[1]
1317                   = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1318 
1319    /* i indexes actual shadow values, k is cursor in csvals */
1320    i = 0;
1321    for (k = 0; k < csvalsUsed; k++) {
1322 
1323       sv = csvals[k].sval;
1324       if (CHECK_ZSM)
1325          tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1326       /* do we already have it? */
1327       if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
1328       if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
1329       if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
1330       if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
1331       /* no.  look for a free slot. */
1332       if (CHECK_ZSM)
1333          tl_assert(sv != SVal_INVALID);
1334       if (lineZ->dict[0]
1335           == SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
1336       if (lineZ->dict[1]
1337           == SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
1338       if (lineZ->dict[2]
1339           == SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
1340       if (lineZ->dict[3]
1341           == SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
1342       break; /* we'll have to use the f rep */
1343      dict_ok:
1344       m = csvals[k].count;
1345       if (m == 8) {
1346          write_twobit_array( lineZ->ix2s, i+0, j );
1347          write_twobit_array( lineZ->ix2s, i+1, j );
1348          write_twobit_array( lineZ->ix2s, i+2, j );
1349          write_twobit_array( lineZ->ix2s, i+3, j );
1350          write_twobit_array( lineZ->ix2s, i+4, j );
1351          write_twobit_array( lineZ->ix2s, i+5, j );
1352          write_twobit_array( lineZ->ix2s, i+6, j );
1353          write_twobit_array( lineZ->ix2s, i+7, j );
1354          i += 8;
1355       }
1356       else if (m == 4) {
1357          write_twobit_array( lineZ->ix2s, i+0, j );
1358          write_twobit_array( lineZ->ix2s, i+1, j );
1359          write_twobit_array( lineZ->ix2s, i+2, j );
1360          write_twobit_array( lineZ->ix2s, i+3, j );
1361          i += 4;
1362       }
1363       else if (m == 1) {
1364          write_twobit_array( lineZ->ix2s, i+0, j );
1365          i += 1;
1366       }
1367       else if (m == 2) {
1368          write_twobit_array( lineZ->ix2s, i+0, j );
1369          write_twobit_array( lineZ->ix2s, i+1, j );
1370          i += 2;
1371       }
1372       else {
1373          tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
1374       }
1375 
1376    }
1377 
1378    if (LIKELY(i == N_LINE_ARANGE)) {
1379       /* Construction of the compressed representation was
1380          successful. */
1381       rcinc_LineZ(lineZ);
1382       stats__cache_Z_wbacks++;
1383    } else {
1384       /* Cannot use the compressed(z) representation.  Use the full(f)
1385          rep instead. */
1386       tl_assert(i >= 0 && i < N_LINE_ARANGE);
1387       alloc_F_for_writing( sm, &fix );
1388       tl_assert(sm->linesF);
1389       tl_assert(sm->linesF_size > 0);
1390       tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
1391       lineF = &sm->linesF[fix];
1392       tl_assert(!lineF->inUse);
1393       lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
1394       lineZ->dict[1] = (SVal)fix;
1395       lineF->inUse = True;
1396       i = 0;
1397       for (k = 0; k < csvalsUsed; k++) {
1398          if (CHECK_ZSM)
1399             tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
1400          sv = csvals[k].sval;
1401          if (CHECK_ZSM)
1402             tl_assert(sv != SVal_INVALID);
1403          for (m = csvals[k].count; m > 0; m--) {
1404             lineF->w64s[i] = sv;
1405             i++;
1406          }
1407       }
1408       tl_assert(i == N_LINE_ARANGE);
1409       rcinc_LineF(lineF);
1410       stats__cache_F_wbacks++;
1411    }
1412 }
1413 
1414 /* Fetch the cacheline 'wix' from the backing store.  The tag
1415    associated with 'wix' is assumed to have already been filled in;
1416    hence that is used to determine where in the backing store to read
1417    from. */
cacheline_fetch(UWord wix)1418 static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
1419 {
1420    Word       i;
1421    Addr       tag;
1422    CacheLine* cl;
1423    LineZ*     lineZ;
1424    LineF*     lineF;
1425 
1426    if (0)
1427    VG_(printf)("scache fetch line %d\n", (Int)wix);
1428 
1429    tl_assert(wix >= 0 && wix < N_WAY_NENT);
1430 
1431    tag =  cache_shmem.tags0[wix];
1432    cl  = &cache_shmem.lyns0[wix];
1433 
1434    /* reject nonsense requests */
1435    tl_assert(is_valid_scache_tag(tag));
1436 
1437    lineZ = NULL;
1438    lineF = NULL;
1439    find_ZF_for_reading( &lineZ, &lineF, tag );
1440    tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
1441 
1442    /* expand the data into the bottom layer of the tree, then get
1443       cacheline_normalise to build the descriptor array. */
1444    if (lineF) {
1445       tl_assert(lineF->inUse);
1446       for (i = 0; i < N_LINE_ARANGE; i++) {
1447          cl->svals[i] = lineF->w64s[i];
1448       }
1449       stats__cache_F_fetches++;
1450    } else {
1451       for (i = 0; i < N_LINE_ARANGE; i++) {
1452          SVal sv;
1453          UWord ix = read_twobit_array( lineZ->ix2s, i );
1454          /* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
1455          sv = lineZ->dict[ix];
1456          tl_assert(sv != SVal_INVALID);
1457          cl->svals[i] = sv;
1458       }
1459       stats__cache_Z_fetches++;
1460    }
1461    normalise_CacheLine( cl );
1462 }
1463 
shmem__invalidate_scache(void)1464 static void shmem__invalidate_scache ( void ) {
1465    Word wix;
1466    if (0) VG_(printf)("%s","scache inval\n");
1467    tl_assert(!is_valid_scache_tag(1));
1468    for (wix = 0; wix < N_WAY_NENT; wix++) {
1469       cache_shmem.tags0[wix] = 1/*INVALID*/;
1470    }
1471    stats__cache_invals++;
1472 }
1473 
shmem__flush_and_invalidate_scache(void)1474 static void shmem__flush_and_invalidate_scache ( void ) {
1475    Word wix;
1476    Addr tag;
1477    if (0) VG_(printf)("%s","scache flush and invalidate\n");
1478    tl_assert(!is_valid_scache_tag(1));
1479    for (wix = 0; wix < N_WAY_NENT; wix++) {
1480       tag = cache_shmem.tags0[wix];
1481       if (tag == 1/*INVALID*/) {
1482          /* already invalid; nothing to do */
1483       } else {
1484          tl_assert(is_valid_scache_tag(tag));
1485          cacheline_wback( wix );
1486       }
1487       cache_shmem.tags0[wix] = 1/*INVALID*/;
1488    }
1489    stats__cache_flushes++;
1490    stats__cache_invals++;
1491 }
1492 
1493 
aligned16(Addr a)1494 static inline Bool aligned16 ( Addr a ) {
1495    return 0 == (a & 1);
1496 }
aligned32(Addr a)1497 static inline Bool aligned32 ( Addr a ) {
1498    return 0 == (a & 3);
1499 }
aligned64(Addr a)1500 static inline Bool aligned64 ( Addr a ) {
1501    return 0 == (a & 7);
1502 }
get_cacheline_offset(Addr a)1503 static inline UWord get_cacheline_offset ( Addr a ) {
1504    return (UWord)(a & (N_LINE_ARANGE - 1));
1505 }
cacheline_ROUNDUP(Addr a)1506 static inline Addr cacheline_ROUNDUP ( Addr a ) {
1507    return ROUNDUP(a, N_LINE_ARANGE);
1508 }
cacheline_ROUNDDN(Addr a)1509 static inline Addr cacheline_ROUNDDN ( Addr a ) {
1510    return ROUNDDN(a, N_LINE_ARANGE);
1511 }
get_treeno(Addr a)1512 static inline UWord get_treeno ( Addr a ) {
1513    return get_cacheline_offset(a) >> 3;
1514 }
get_tree_offset(Addr a)1515 static inline UWord get_tree_offset ( Addr a ) {
1516    return a & 7;
1517 }
1518 
1519 static __attribute__((noinline))
1520        CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
get_cacheline(Addr a)1521 static inline CacheLine* get_cacheline ( Addr a )
1522 {
1523    /* tag is 'a' with the in-line offset masked out,
1524       eg a[31]..a[4] 0000 */
1525    Addr       tag = a & ~(N_LINE_ARANGE - 1);
1526    UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1527    stats__cache_totrefs++;
1528    if (LIKELY(tag == cache_shmem.tags0[wix])) {
1529       return &cache_shmem.lyns0[wix];
1530    } else {
1531       return get_cacheline_MISS( a );
1532    }
1533 }
1534 
1535 static __attribute__((noinline))
get_cacheline_MISS(Addr a)1536        CacheLine* get_cacheline_MISS ( Addr a )
1537 {
1538    /* tag is 'a' with the in-line offset masked out,
1539       eg a[31]..a[4] 0000 */
1540 
1541    CacheLine* cl;
1542    Addr*      tag_old_p;
1543    Addr       tag = a & ~(N_LINE_ARANGE - 1);
1544    UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
1545 
1546    tl_assert(tag != cache_shmem.tags0[wix]);
1547 
1548    /* Dump the old line into the backing store. */
1549    stats__cache_totmisses++;
1550 
1551    cl        = &cache_shmem.lyns0[wix];
1552    tag_old_p = &cache_shmem.tags0[wix];
1553 
1554    if (is_valid_scache_tag( *tag_old_p )) {
1555       /* EXPENSIVE and REDUNDANT: callee does it */
1556       if (CHECK_ZSM)
1557          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1558       cacheline_wback( wix );
1559    }
1560    /* and reload the new one */
1561    *tag_old_p = tag;
1562    cacheline_fetch( wix );
1563    if (CHECK_ZSM)
1564       tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
1565    return cl;
1566 }
1567 
pulldown_to_32(SVal * tree,UWord toff,UShort descr)1568 static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1569    stats__cline_64to32pulldown++;
1570    switch (toff) {
1571       case 0: case 4:
1572          tl_assert(descr & TREE_DESCR_64);
1573          tree[4] = tree[0];
1574          descr &= ~TREE_DESCR_64;
1575          descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
1576          break;
1577       default:
1578          tl_assert(0);
1579    }
1580    return descr;
1581 }
1582 
pulldown_to_16(SVal * tree,UWord toff,UShort descr)1583 static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1584    stats__cline_32to16pulldown++;
1585    switch (toff) {
1586       case 0: case 2:
1587          if (!(descr & TREE_DESCR_32_0)) {
1588             descr = pulldown_to_32(tree, 0, descr);
1589          }
1590          tl_assert(descr & TREE_DESCR_32_0);
1591          tree[2] = tree[0];
1592          descr &= ~TREE_DESCR_32_0;
1593          descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
1594          break;
1595       case 4: case 6:
1596          if (!(descr & TREE_DESCR_32_1)) {
1597             descr = pulldown_to_32(tree, 4, descr);
1598          }
1599          tl_assert(descr & TREE_DESCR_32_1);
1600          tree[6] = tree[4];
1601          descr &= ~TREE_DESCR_32_1;
1602          descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
1603          break;
1604       default:
1605          tl_assert(0);
1606    }
1607    return descr;
1608 }
1609 
pulldown_to_8(SVal * tree,UWord toff,UShort descr)1610 static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
1611    stats__cline_16to8pulldown++;
1612    switch (toff) {
1613       case 0: case 1:
1614          if (!(descr & TREE_DESCR_16_0)) {
1615             descr = pulldown_to_16(tree, 0, descr);
1616          }
1617          tl_assert(descr & TREE_DESCR_16_0);
1618          tree[1] = tree[0];
1619          descr &= ~TREE_DESCR_16_0;
1620          descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
1621          break;
1622       case 2: case 3:
1623          if (!(descr & TREE_DESCR_16_1)) {
1624             descr = pulldown_to_16(tree, 2, descr);
1625          }
1626          tl_assert(descr & TREE_DESCR_16_1);
1627          tree[3] = tree[2];
1628          descr &= ~TREE_DESCR_16_1;
1629          descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
1630          break;
1631       case 4: case 5:
1632          if (!(descr & TREE_DESCR_16_2)) {
1633             descr = pulldown_to_16(tree, 4, descr);
1634          }
1635          tl_assert(descr & TREE_DESCR_16_2);
1636          tree[5] = tree[4];
1637          descr &= ~TREE_DESCR_16_2;
1638          descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
1639          break;
1640       case 6: case 7:
1641          if (!(descr & TREE_DESCR_16_3)) {
1642             descr = pulldown_to_16(tree, 6, descr);
1643          }
1644          tl_assert(descr & TREE_DESCR_16_3);
1645          tree[7] = tree[6];
1646          descr &= ~TREE_DESCR_16_3;
1647          descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
1648          break;
1649       default:
1650          tl_assert(0);
1651    }
1652    return descr;
1653 }
1654 
1655 
pullup_descr_to_16(UShort descr,UWord toff)1656 static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
1657    UShort mask;
1658    switch (toff) {
1659       case 0:
1660          mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
1661          tl_assert( (descr & mask) == mask );
1662          descr &= ~mask;
1663          descr |= TREE_DESCR_16_0;
1664          break;
1665       case 2:
1666          mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
1667          tl_assert( (descr & mask) == mask );
1668          descr &= ~mask;
1669          descr |= TREE_DESCR_16_1;
1670          break;
1671       case 4:
1672          mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
1673          tl_assert( (descr & mask) == mask );
1674          descr &= ~mask;
1675          descr |= TREE_DESCR_16_2;
1676          break;
1677       case 6:
1678          mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
1679          tl_assert( (descr & mask) == mask );
1680          descr &= ~mask;
1681          descr |= TREE_DESCR_16_3;
1682          break;
1683       default:
1684          tl_assert(0);
1685    }
1686    return descr;
1687 }
1688 
pullup_descr_to_32(UShort descr,UWord toff)1689 static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
1690    UShort mask;
1691    switch (toff) {
1692       case 0:
1693          if (!(descr & TREE_DESCR_16_0))
1694             descr = pullup_descr_to_16(descr, 0);
1695          if (!(descr & TREE_DESCR_16_1))
1696             descr = pullup_descr_to_16(descr, 2);
1697          mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
1698          tl_assert( (descr & mask) == mask );
1699          descr &= ~mask;
1700          descr |= TREE_DESCR_32_0;
1701          break;
1702       case 4:
1703          if (!(descr & TREE_DESCR_16_2))
1704             descr = pullup_descr_to_16(descr, 4);
1705          if (!(descr & TREE_DESCR_16_3))
1706             descr = pullup_descr_to_16(descr, 6);
1707          mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
1708          tl_assert( (descr & mask) == mask );
1709          descr &= ~mask;
1710          descr |= TREE_DESCR_32_1;
1711          break;
1712       default:
1713          tl_assert(0);
1714    }
1715    return descr;
1716 }
1717 
valid_value_is_above_me_32(UShort descr,UWord toff)1718 static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
1719    switch (toff) {
1720       case 0: case 4:
1721          return 0 != (descr & TREE_DESCR_64);
1722       default:
1723          tl_assert(0);
1724    }
1725 }
1726 
valid_value_is_below_me_16(UShort descr,UWord toff)1727 static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
1728    switch (toff) {
1729       case 0:
1730          return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
1731       case 2:
1732          return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
1733       case 4:
1734          return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
1735       case 6:
1736          return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
1737       default:
1738          tl_assert(0);
1739    }
1740 }
1741 
1742 /* ------------ Cache management ------------ */
1743 
zsm_flush_cache(void)1744 static void zsm_flush_cache ( void )
1745 {
1746    shmem__flush_and_invalidate_scache();
1747 }
1748 
1749 
zsm_init(void (* p_rcinc)(SVal),void (* p_rcdec)(SVal))1750 static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
1751 {
1752    tl_assert( sizeof(UWord) == sizeof(Addr) );
1753 
1754    rcinc = p_rcinc;
1755    rcdec = p_rcdec;
1756 
1757    tl_assert(map_shmem == NULL);
1758    map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
1759                            HG_(free),
1760                            NULL/*unboxed UWord cmp*/);
1761    tl_assert(map_shmem != NULL);
1762    shmem__invalidate_scache();
1763 
1764    /* a SecMap must contain an integral number of CacheLines */
1765    tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
1766    /* also ... a CacheLine holds an integral number of trees */
1767    tl_assert(0 == (N_LINE_ARANGE % 8));
1768 }
1769 
1770 /////////////////////////////////////////////////////////////////
1771 /////////////////////////////////////////////////////////////////
1772 //                                                             //
1773 // SECTION END compressed shadow memory                        //
1774 //                                                             //
1775 /////////////////////////////////////////////////////////////////
1776 /////////////////////////////////////////////////////////////////
1777 
1778 
1779 
1780 /////////////////////////////////////////////////////////////////
1781 /////////////////////////////////////////////////////////////////
1782 //                                                             //
1783 // SECTION BEGIN vts primitives                                //
1784 //                                                             //
1785 /////////////////////////////////////////////////////////////////
1786 /////////////////////////////////////////////////////////////////
1787 
1788 
1789 /* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
1790    being compact stand-ins for Thr*'s.  Use these functions to map
1791    between them. */
1792 static ThrID Thr__to_ThrID   ( Thr*  thr   ); /* fwds */
1793 static Thr*  Thr__from_ThrID ( ThrID thrid ); /* fwds */
1794 
1795 __attribute__((noreturn))
scalarts_limitations_fail_NORETURN(Bool due_to_nThrs)1796 static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
1797 {
1798    if (due_to_nThrs) {
1799       HChar* s =
1800          "\n"
1801          "Helgrind: cannot continue, run aborted: too many threads.\n"
1802          "Sorry.  Helgrind can only handle programs that create\n"
1803          "%'llu or fewer threads over their entire lifetime.\n"
1804          "\n";
1805       VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
1806    } else {
1807       HChar* s =
1808          "\n"
1809          "Helgrind: cannot continue, run aborted: too many\n"
1810          "synchronisation events.  Sorry. Helgrind can only handle\n"
1811          "programs which perform %'llu or fewer\n"
1812          "inter-thread synchronisation events (locks, unlocks, etc).\n"
1813          "\n";
1814       VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
1815    }
1816    VG_(exit)(1);
1817    /*NOTREACHED*/
1818    tl_assert(0); /*wtf?!*/
1819 }
1820 
1821 
1822 /* The dead thread (ThrID, actually) table.  A thread may only be
1823    listed here if we have been notified thereof by libhb_async_exit.
1824    New entries are added at the end.  The order isn't important, but
1825    the ThrID values must be unique.  This table lists the identity of
1826    all threads that have ever died -- none are ever removed.  We keep
1827    this table so as to be able to prune entries from VTSs.  We don't
1828    actually need to keep the set of threads that have ever died --
1829    only the threads that have died since the previous round of
1830    pruning.  But it's useful for sanity check purposes to keep the
1831    entire set, so we do. */
1832 static XArray* /* of ThrID */ verydead_thread_table = NULL;
1833 
1834 /* Arbitrary total ordering on ThrIDs. */
cmp__ThrID(void * v1,void * v2)1835 static Int cmp__ThrID ( void* v1, void* v2 ) {
1836    ThrID id1 = *(ThrID*)v1;
1837    ThrID id2 = *(ThrID*)v2;
1838    if (id1 < id2) return -1;
1839    if (id1 > id2) return 1;
1840    return 0;
1841 }
1842 
verydead_thread_table_init(void)1843 static void verydead_thread_table_init ( void )
1844 {
1845    tl_assert(!verydead_thread_table);
1846    verydead_thread_table
1847      = VG_(newXA)( HG_(zalloc),
1848                    "libhb.verydead_thread_table_init.1",
1849                    HG_(free), sizeof(ThrID) );
1850    tl_assert(verydead_thread_table);
1851    VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
1852 }
1853 
1854 
1855 /* A VTS contains .ts, its vector clock, and also .id, a field to hold
1856    a backlink for the caller's convenience.  Since we have no idea
1857    what to set that to in the library, it always gets set to
1858    VtsID_INVALID. */
1859 typedef
1860    struct {
1861       VtsID    id;
1862       UInt     usedTS;
1863       UInt     sizeTS;
1864       ScalarTS ts[0];
1865    }
1866    VTS;
1867 
1868 /* Allocate a VTS capable of storing 'sizeTS' entries. */
1869 static VTS* VTS__new ( HChar* who, UInt sizeTS );
1870 
1871 /* Make a clone of 'vts', sizing the new array to exactly match the
1872    number of ScalarTSs present. */
1873 static VTS* VTS__clone ( HChar* who, VTS* vts );
1874 
1875 /* Make a clone of 'vts' with the thrids in 'thrids' removed.  The new
1876    array is sized exactly to hold the number of required elements.
1877    'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
1878    must be in strictly increasing order. */
1879 static VTS* VTS__subtract ( HChar* who, VTS* vts, XArray* thridsToDel );
1880 
1881 /* Delete this VTS in its entirety. */
1882 static void VTS__delete ( VTS* vts );
1883 
1884 /* Create a new singleton VTS in 'out'.  Caller must have
1885    pre-allocated 'out' sufficiently big to hold the result in all
1886    possible cases. */
1887 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
1888 
1889 /* Create in 'out' a VTS which is the same as 'vts' except with
1890    vts[me]++, so to speak.  Caller must have pre-allocated 'out'
1891    sufficiently big to hold the result in all possible cases. */
1892 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
1893 
1894 /* Create in 'out' a VTS which is the join (max) of 'a' and
1895    'b'. Caller must have pre-allocated 'out' sufficiently big to hold
1896    the result in all possible cases. */
1897 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
1898 
1899 /* Compute the partial ordering relation of the two args.  Although we
1900    could be completely general and return an enumeration value (EQ,
1901    LT, GT, UN), in fact we only need LEQ, and so we may as well
1902    hardwire that fact.
1903 
1904    Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
1905    invald ThrID).  In the latter case, the returned ThrID indicates
1906    the discovered point for which they are not.  There may be more
1907    than one such point, but we only care about seeing one of them, not
1908    all of them.  This rather strange convention is used because
1909    sometimes we want to know the actual index at which they first
1910    differ. */
1911 static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
1912 
1913 /* Compute an arbitrary structural (total) ordering on the two args,
1914    based on their VCs, so they can be looked up in a table, tree, etc.
1915    Returns -1, 0 or 1. */
1916 static Word VTS__cmp_structural ( VTS* a, VTS* b );
1917 
1918 /* Debugging only.  Display the given VTS in the buffer. */
1919 static void VTS__show ( HChar* buf, Int nBuf, VTS* vts );
1920 
1921 /* Debugging only.  Return vts[index], so to speak. */
1922 static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
1923 
1924 /* Notify the VTS machinery that a thread has been declared
1925    comprehensively dead: that is, it has done an async exit AND it has
1926    been joined with.  This should ensure that its local clocks (.viR
1927    and .viW) will never again change, and so all mentions of this
1928    thread from all VTSs in the system may be removed. */
1929 static void VTS__declare_thread_very_dead ( Thr* idx );
1930 
1931 /*--------------- to do with Vector Timestamps ---------------*/
1932 
is_sane_VTS(VTS * vts)1933 static Bool is_sane_VTS ( VTS* vts )
1934 {
1935    UWord     i, n;
1936    ScalarTS  *st1, *st2;
1937    if (!vts) return False;
1938    if (!vts->ts) return False;
1939    if (vts->usedTS > vts->sizeTS) return False;
1940    n = vts->usedTS;
1941    if (n == 1) {
1942       st1 = &vts->ts[0];
1943       if (st1->tym == 0)
1944          return False;
1945    }
1946    else
1947    if (n >= 2) {
1948       for (i = 0; i < n-1; i++) {
1949          st1 = &vts->ts[i];
1950          st2 = &vts->ts[i+1];
1951          if (st1->thrid >= st2->thrid)
1952             return False;
1953          if (st1->tym == 0 || st2->tym == 0)
1954             return False;
1955       }
1956    }
1957    return True;
1958 }
1959 
1960 
1961 /* Create a new, empty VTS.
1962 */
VTS__new(HChar * who,UInt sizeTS)1963 static VTS* VTS__new ( HChar* who, UInt sizeTS )
1964 {
1965    VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
1966    tl_assert(vts->usedTS == 0);
1967    vts->sizeTS = sizeTS;
1968    *(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
1969    return vts;
1970 }
1971 
1972 /* Clone this VTS.
1973 */
VTS__clone(HChar * who,VTS * vts)1974 static VTS* VTS__clone ( HChar* who, VTS* vts )
1975 {
1976    tl_assert(vts);
1977    tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
1978    UInt nTS = vts->usedTS;
1979    VTS* clone = VTS__new(who, nTS);
1980    clone->id = vts->id;
1981    clone->sizeTS = nTS;
1982    clone->usedTS = nTS;
1983    UInt i;
1984    for (i = 0; i < nTS; i++) {
1985       clone->ts[i] = vts->ts[i];
1986    }
1987    tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
1988    return clone;
1989 }
1990 
1991 
1992 /* Make a clone of a VTS with specified ThrIDs removed.  'thridsToDel'
1993    must be in strictly increasing order.  We could obviously do this
1994    much more efficiently (in linear time) if necessary.
1995 */
VTS__subtract(HChar * who,VTS * vts,XArray * thridsToDel)1996 static VTS* VTS__subtract ( HChar* who, VTS* vts, XArray* thridsToDel )
1997 {
1998    UInt i, j;
1999    tl_assert(vts);
2000    tl_assert(thridsToDel);
2001    tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2002    UInt nTS = vts->usedTS;
2003    /* Figure out how many ScalarTSs will remain in the output. */
2004    UInt nReq = nTS;
2005    for (i = 0; i < nTS; i++) {
2006       ThrID thrid = vts->ts[i].thrid;
2007       if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2008          nReq--;
2009    }
2010    tl_assert(nReq <= nTS);
2011    /* Copy the ones that will remain. */
2012    VTS* res = VTS__new(who, nReq);
2013    j = 0;
2014    for (i = 0; i < nTS; i++) {
2015       ThrID thrid = vts->ts[i].thrid;
2016       if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
2017          continue;
2018       res->ts[j++] = vts->ts[i];
2019    }
2020    tl_assert(j == nReq);
2021    tl_assert(j == res->sizeTS);
2022    res->usedTS = j;
2023    tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
2024    return res;
2025 }
2026 
2027 
2028 /* Delete this VTS in its entirety.
2029 */
VTS__delete(VTS * vts)2030 static void VTS__delete ( VTS* vts )
2031 {
2032    tl_assert(vts);
2033    tl_assert(vts->usedTS <= vts->sizeTS);
2034    tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
2035    HG_(free)(vts);
2036 }
2037 
2038 
2039 /* Create a new singleton VTS.
2040 */
VTS__singleton(VTS * out,Thr * thr,ULong tym)2041 static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
2042 {
2043    tl_assert(thr);
2044    tl_assert(tym >= 1);
2045    tl_assert(out);
2046    tl_assert(out->usedTS == 0);
2047    tl_assert(out->sizeTS >= 1);
2048    UInt hi = out->usedTS++;
2049    out->ts[hi].thrid = Thr__to_ThrID(thr);
2050    out->ts[hi].tym   = tym;
2051 }
2052 
2053 
2054 /* Return a new VTS in which vts[me]++, so to speak.  'vts' itself is
2055    not modified.
2056 */
VTS__tick(VTS * out,Thr * me,VTS * vts)2057 static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
2058 {
2059    UInt      i, n;
2060    ThrID     me_thrid;
2061    Bool      found = False;
2062 
2063    stats__vts__tick++;
2064 
2065    tl_assert(out);
2066    tl_assert(out->usedTS == 0);
2067    if (vts->usedTS >= ThrID_MAX_VALID)
2068       scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2069    tl_assert(out->sizeTS >= 1 + vts->usedTS);
2070 
2071    tl_assert(me);
2072    me_thrid = Thr__to_ThrID(me);
2073    tl_assert(is_sane_VTS(vts));
2074    n = vts->usedTS;
2075 
2076    /* Copy all entries which precede 'me'. */
2077    for (i = 0; i < n; i++) {
2078       ScalarTS* here = &vts->ts[i];
2079       if (UNLIKELY(here->thrid >= me_thrid))
2080          break;
2081       UInt hi = out->usedTS++;
2082       out->ts[hi] = *here;
2083    }
2084 
2085    /* 'i' now indicates the next entry to copy, if any.
2086        There are 3 possibilities:
2087        (a) there is no next entry (we used them all up already):
2088            add (me_thrid,1) to the output, and quit
2089        (b) there is a next entry, and its thrid > me_thrid:
2090            add (me_thrid,1) to the output, then copy the remaining entries
2091        (c) there is a next entry, and its thrid == me_thrid:
2092            copy it to the output but increment its timestamp value.
2093            Then copy the remaining entries.  (c) is the common case.
2094    */
2095    tl_assert(i >= 0 && i <= n);
2096    if (i == n) { /* case (a) */
2097       UInt hi = out->usedTS++;
2098       out->ts[hi].thrid = me_thrid;
2099       out->ts[hi].tym   = 1;
2100    } else {
2101       /* cases (b) and (c) */
2102       ScalarTS* here = &vts->ts[i];
2103       if (me_thrid == here->thrid) { /* case (c) */
2104          if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
2105             /* We're hosed.  We have to stop. */
2106             scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
2107          }
2108          UInt hi = out->usedTS++;
2109          out->ts[hi].thrid = here->thrid;
2110          out->ts[hi].tym   = here->tym + 1;
2111          i++;
2112          found = True;
2113       } else { /* case (b) */
2114          UInt hi = out->usedTS++;
2115          out->ts[hi].thrid = me_thrid;
2116          out->ts[hi].tym   = 1;
2117       }
2118       /* And copy any remaining entries. */
2119       for (/*keepgoing*/; i < n; i++) {
2120          ScalarTS* here2 = &vts->ts[i];
2121          UInt hi = out->usedTS++;
2122          out->ts[hi] = *here2;
2123       }
2124    }
2125 
2126    tl_assert(is_sane_VTS(out));
2127    tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
2128    tl_assert(out->usedTS <= out->sizeTS);
2129 }
2130 
2131 
2132 /* Return a new VTS constructed as the join (max) of the 2 args.
2133    Neither arg is modified.
2134 */
VTS__join(VTS * out,VTS * a,VTS * b)2135 static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
2136 {
2137    UInt     ia, ib, useda, usedb;
2138    ULong    tyma, tymb, tymMax;
2139    ThrID    thrid;
2140    UInt     ncommon = 0;
2141 
2142    stats__vts__join++;
2143 
2144    tl_assert(a);
2145    tl_assert(b);
2146    useda = a->usedTS;
2147    usedb = b->usedTS;
2148 
2149    tl_assert(out);
2150    tl_assert(out->usedTS == 0);
2151    /* overly conservative test, but doing better involves comparing
2152       the two VTSs, which we don't want to do at this point. */
2153    if (useda + usedb >= ThrID_MAX_VALID)
2154       scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
2155    tl_assert(out->sizeTS >= useda + usedb);
2156 
2157    ia = ib = 0;
2158 
2159    while (1) {
2160 
2161       /* This logic is to enumerate triples (thrid, tyma, tymb) drawn
2162          from a and b in order, where thrid is the next ThrID
2163          occurring in either a or b, and tyma/b are the relevant
2164          scalar timestamps, taking into account implicit zeroes. */
2165       tl_assert(ia >= 0 && ia <= useda);
2166       tl_assert(ib >= 0 && ib <= usedb);
2167 
2168       if        (ia == useda && ib == usedb) {
2169          /* both empty - done */
2170          break;
2171 
2172       } else if (ia == useda && ib != usedb) {
2173          /* a empty, use up b */
2174          ScalarTS* tmpb = &b->ts[ib];
2175          thrid = tmpb->thrid;
2176          tyma  = 0;
2177          tymb  = tmpb->tym;
2178          ib++;
2179 
2180       } else if (ia != useda && ib == usedb) {
2181          /* b empty, use up a */
2182          ScalarTS* tmpa = &a->ts[ia];
2183          thrid = tmpa->thrid;
2184          tyma  = tmpa->tym;
2185          tymb  = 0;
2186          ia++;
2187 
2188       } else {
2189          /* both not empty; extract lowest-ThrID'd triple */
2190          ScalarTS* tmpa = &a->ts[ia];
2191          ScalarTS* tmpb = &b->ts[ib];
2192          if (tmpa->thrid < tmpb->thrid) {
2193             /* a has the lowest unconsidered ThrID */
2194             thrid = tmpa->thrid;
2195             tyma  = tmpa->tym;
2196             tymb  = 0;
2197             ia++;
2198          } else if (tmpa->thrid > tmpb->thrid) {
2199             /* b has the lowest unconsidered ThrID */
2200             thrid = tmpb->thrid;
2201             tyma  = 0;
2202             tymb  = tmpb->tym;
2203             ib++;
2204          } else {
2205             /* they both next mention the same ThrID */
2206             tl_assert(tmpa->thrid == tmpb->thrid);
2207             thrid = tmpa->thrid; /* == tmpb->thrid */
2208             tyma  = tmpa->tym;
2209             tymb  = tmpb->tym;
2210             ia++;
2211             ib++;
2212             ncommon++;
2213          }
2214       }
2215 
2216       /* having laboriously determined (thr, tyma, tymb), do something
2217          useful with it. */
2218       tymMax = tyma > tymb ? tyma : tymb;
2219       if (tymMax > 0) {
2220          UInt hi = out->usedTS++;
2221          out->ts[hi].thrid = thrid;
2222          out->ts[hi].tym   = tymMax;
2223       }
2224 
2225    }
2226 
2227    tl_assert(is_sane_VTS(out));
2228    tl_assert(out->usedTS <= out->sizeTS);
2229    tl_assert(out->usedTS == useda + usedb - ncommon);
2230 }
2231 
2232 
2233 /* Determine if 'a' <= 'b', in the partial ordering.  Returns zero if
2234    they are, or the first ThrID for which they are not (no valid ThrID
2235    has the value zero).  This rather strange convention is used
2236    because sometimes we want to know the actual index at which they
2237    first differ. */
VTS__cmpLEQ(VTS * a,VTS * b)2238 static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
2239 {
2240    Word  ia, ib, useda, usedb;
2241    ULong tyma, tymb;
2242 
2243    stats__vts__cmpLEQ++;
2244 
2245    tl_assert(a);
2246    tl_assert(b);
2247    useda = a->usedTS;
2248    usedb = b->usedTS;
2249 
2250    ia = ib = 0;
2251 
2252    while (1) {
2253 
2254       /* This logic is to enumerate doubles (tyma, tymb) drawn
2255          from a and b in order, and tyma/b are the relevant
2256          scalar timestamps, taking into account implicit zeroes. */
2257       ThrID thrid;
2258 
2259       tl_assert(ia >= 0 && ia <= useda);
2260       tl_assert(ib >= 0 && ib <= usedb);
2261 
2262       if        (ia == useda && ib == usedb) {
2263          /* both empty - done */
2264          break;
2265 
2266       } else if (ia == useda && ib != usedb) {
2267          /* a empty, use up b */
2268          ScalarTS* tmpb = &b->ts[ib];
2269          tyma  = 0;
2270          tymb  = tmpb->tym;
2271          thrid = tmpb->thrid;
2272          ib++;
2273 
2274       } else if (ia != useda && ib == usedb) {
2275          /* b empty, use up a */
2276          ScalarTS* tmpa = &a->ts[ia];
2277          tyma  = tmpa->tym;
2278          thrid = tmpa->thrid;
2279          tymb  = 0;
2280          ia++;
2281 
2282       } else {
2283          /* both not empty; extract lowest-ThrID'd triple */
2284          ScalarTS* tmpa = &a->ts[ia];
2285          ScalarTS* tmpb = &b->ts[ib];
2286          if (tmpa->thrid < tmpb->thrid) {
2287             /* a has the lowest unconsidered ThrID */
2288             tyma  = tmpa->tym;
2289             thrid = tmpa->thrid;
2290             tymb  = 0;
2291             ia++;
2292          }
2293          else
2294          if (tmpa->thrid > tmpb->thrid) {
2295             /* b has the lowest unconsidered ThrID */
2296             tyma  = 0;
2297             tymb  = tmpb->tym;
2298             thrid = tmpb->thrid;
2299             ib++;
2300          } else {
2301             /* they both next mention the same ThrID */
2302             tl_assert(tmpa->thrid == tmpb->thrid);
2303             tyma  = tmpa->tym;
2304             thrid = tmpa->thrid;
2305             tymb  = tmpb->tym;
2306             ia++;
2307             ib++;
2308          }
2309       }
2310 
2311       /* having laboriously determined (tyma, tymb), do something
2312          useful with it. */
2313       if (tyma > tymb) {
2314          /* not LEQ at this index.  Quit, since the answer is
2315             determined already. */
2316          tl_assert(thrid >= 1024);
2317          return thrid;
2318       }
2319    }
2320 
2321    return 0; /* all points are LEQ => return an invalid ThrID */
2322 }
2323 
2324 
2325 /* Compute an arbitrary structural (total) ordering on the two args,
2326    based on their VCs, so they can be looked up in a table, tree, etc.
2327    Returns -1, 0 or 1.  (really just 'deriving Ord' :-) This can be
2328    performance critical so there is some effort expended to make it sa
2329    fast as possible.
2330 */
VTS__cmp_structural(VTS * a,VTS * b)2331 Word VTS__cmp_structural ( VTS* a, VTS* b )
2332 {
2333    /* We just need to generate an arbitrary total ordering based on
2334       a->ts and b->ts.  Preferably do it in a way which comes across likely
2335       differences relatively quickly. */
2336    Word     i;
2337    Word     useda = 0,    usedb = 0;
2338    ScalarTS *ctsa = NULL, *ctsb = NULL;
2339 
2340    stats__vts__cmp_structural++;
2341 
2342    tl_assert(a);
2343    tl_assert(b);
2344 
2345    ctsa = &a->ts[0]; useda = a->usedTS;
2346    ctsb = &b->ts[0]; usedb = b->usedTS;
2347 
2348    if (LIKELY(useda == usedb)) {
2349       ScalarTS *tmpa = NULL, *tmpb = NULL;
2350       stats__vts__cmp_structural_slow++;
2351       /* Same length vectors.  Find the first difference, if any, as
2352          fast as possible. */
2353       for (i = 0; i < useda; i++) {
2354          tmpa = &ctsa[i];
2355          tmpb = &ctsb[i];
2356          if (LIKELY(tmpa->tym == tmpb->tym
2357                     && tmpa->thrid == tmpb->thrid))
2358             continue;
2359          else
2360             break;
2361       }
2362       if (UNLIKELY(i == useda)) {
2363          /* They're identical. */
2364          return 0;
2365       } else {
2366          tl_assert(i >= 0 && i < useda);
2367          if (tmpa->tym < tmpb->tym) return -1;
2368          if (tmpa->tym > tmpb->tym) return 1;
2369          if (tmpa->thrid < tmpb->thrid) return -1;
2370          if (tmpa->thrid > tmpb->thrid) return 1;
2371          /* we just established them as non-identical, hence: */
2372       }
2373       /*NOTREACHED*/
2374       tl_assert(0);
2375    }
2376 
2377    if (useda < usedb) return -1;
2378    if (useda > usedb) return 1;
2379    /*NOTREACHED*/
2380    tl_assert(0);
2381 }
2382 
2383 
2384 /* Debugging only.  Display the given VTS in the buffer.
2385 */
VTS__show(HChar * buf,Int nBuf,VTS * vts)2386 void VTS__show ( HChar* buf, Int nBuf, VTS* vts )
2387 {
2388    ScalarTS* st;
2389    HChar     unit[64];
2390    Word      i, n;
2391    Int       avail = nBuf;
2392    tl_assert(vts && vts->ts);
2393    tl_assert(nBuf > 16);
2394    buf[0] = '[';
2395    buf[1] = 0;
2396    n =  vts->usedTS;
2397    for (i = 0; i < n; i++) {
2398       tl_assert(avail >= 40);
2399       st = &vts->ts[i];
2400       VG_(memset)(unit, 0, sizeof(unit));
2401       VG_(sprintf)(unit, i < n-1 ? "%u:%llu " : "%u:%llu",
2402                          st->thrid, (ULong)st->tym);
2403       if (avail < VG_(strlen)(unit) + 40/*let's say*/) {
2404          VG_(strcat)(buf, " ...]");
2405          buf[nBuf-1] = 0;
2406          return;
2407       }
2408       VG_(strcat)(buf, unit);
2409       avail -= VG_(strlen)(unit);
2410    }
2411    VG_(strcat)(buf, "]");
2412    buf[nBuf-1] = 0;
2413 }
2414 
2415 
2416 /* Debugging only.  Return vts[index], so to speak.
2417 */
VTS__indexAt_SLOW(VTS * vts,Thr * idx)2418 ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
2419 {
2420    UWord i, n;
2421    ThrID idx_thrid = Thr__to_ThrID(idx);
2422    stats__vts__indexat_slow++;
2423    tl_assert(vts && vts->ts);
2424    n = vts->usedTS;
2425    for (i = 0; i < n; i++) {
2426       ScalarTS* st = &vts->ts[i];
2427       if (st->thrid == idx_thrid)
2428          return st->tym;
2429    }
2430    return 0;
2431 }
2432 
2433 
2434 /* See comment on prototype above.
2435 */
VTS__declare_thread_very_dead(Thr * thr)2436 static void VTS__declare_thread_very_dead ( Thr* thr )
2437 {
2438    if (0) VG_(printf)("VTQ:  tae %p\n", thr);
2439 
2440    tl_assert(thr->llexit_done);
2441    tl_assert(thr->joinedwith_done);
2442 
2443    ThrID nyu;
2444    nyu = Thr__to_ThrID(thr);
2445    VG_(addToXA)( verydead_thread_table, &nyu );
2446 
2447    /* We can only get here if we're assured that we'll never again
2448       need to look at this thread's ::viR or ::viW.  Set them to
2449       VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
2450       mostly so that we don't wind up pruning them (as that would be
2451       nonsensical: the only interesting ScalarTS entry for a dead
2452       thread is its own index, and the pruning will remove that.). */
2453    VtsID__rcdec(thr->viR);
2454    VtsID__rcdec(thr->viW);
2455    thr->viR = VtsID_INVALID;
2456    thr->viW = VtsID_INVALID;
2457 }
2458 
2459 
2460 /////////////////////////////////////////////////////////////////
2461 /////////////////////////////////////////////////////////////////
2462 //                                                             //
2463 // SECTION END vts primitives                                  //
2464 //                                                             //
2465 /////////////////////////////////////////////////////////////////
2466 /////////////////////////////////////////////////////////////////
2467 
2468 
2469 
2470 /////////////////////////////////////////////////////////////////
2471 /////////////////////////////////////////////////////////////////
2472 //                                                             //
2473 // SECTION BEGIN main library                                  //
2474 //                                                             //
2475 /////////////////////////////////////////////////////////////////
2476 /////////////////////////////////////////////////////////////////
2477 
2478 
2479 /////////////////////////////////////////////////////////
2480 //                                                     //
2481 // VTS set                                             //
2482 //                                                     //
2483 /////////////////////////////////////////////////////////
2484 
2485 static WordFM* /* WordFM VTS* void */ vts_set = NULL;
2486 
vts_set_init(void)2487 static void vts_set_init ( void )
2488 {
2489    tl_assert(!vts_set);
2490    vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
2491                          HG_(free),
2492                          (Word(*)(UWord,UWord))VTS__cmp_structural );
2493    tl_assert(vts_set);
2494 }
2495 
2496 /* Given a VTS, look in vts_set to see if we already have a
2497    structurally identical one.  If yes, return the pair (True, pointer
2498    to the existing one).  If no, clone this one, add the clone to the
2499    set, and return (False, pointer to the clone). */
vts_set__find__or__clone_and_add(VTS ** res,VTS * cand)2500 static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
2501 {
2502    UWord keyW, valW;
2503    stats__vts_set__focaa++;
2504    tl_assert(cand->id == VtsID_INVALID);
2505    /* lookup cand (by value) */
2506    if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
2507       /* found it */
2508       tl_assert(valW == 0);
2509       /* if this fails, cand (by ref) was already present (!) */
2510       tl_assert(keyW != (UWord)cand);
2511       *res = (VTS*)keyW;
2512       return True;
2513    } else {
2514       /* not present.  Clone, add and return address of clone. */
2515       stats__vts_set__focaa_a++;
2516       VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
2517       tl_assert(clone != cand);
2518       VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
2519       *res = clone;
2520       return False;
2521    }
2522 }
2523 
2524 
2525 /////////////////////////////////////////////////////////
2526 //                                                     //
2527 // VTS table                                           //
2528 //                                                     //
2529 /////////////////////////////////////////////////////////
2530 
2531 static void VtsID__invalidate_caches ( void ); /* fwds */
2532 
2533 /* A type to hold VTS table entries.  Invariants:
2534    If .vts == NULL, then this entry is not in use, so:
2535    - .rc == 0
2536    - this entry is on the freelist (unfortunately, does not imply
2537      any constraints on value for .freelink)
2538    If .vts != NULL, then this entry is in use:
2539    - .vts is findable in vts_set
2540    - .vts->id == this entry number
2541    - no specific value for .rc (even 0 is OK)
2542    - this entry is not on freelist, so .freelink == VtsID_INVALID
2543 */
2544 typedef
2545    struct {
2546       VTS*  vts;      /* vts, in vts_set */
2547       UWord rc;       /* reference count - enough for entire aspace */
2548       VtsID freelink; /* chain for free entries, VtsID_INVALID at end */
2549       VtsID remap;    /* used only during pruning */
2550    }
2551    VtsTE;
2552 
2553 /* The VTS table. */
2554 static XArray* /* of VtsTE */ vts_tab = NULL;
2555 
2556 /* An index into the VTS table, indicating the start of the list of
2557    free (available for use) entries.  If the list is empty, this is
2558    VtsID_INVALID. */
2559 static VtsID vts_tab_freelist = VtsID_INVALID;
2560 
2561 /* Do a GC of vts_tab when the freelist becomes empty AND the size of
2562    vts_tab equals or exceeds this size.  After GC, the value here is
2563    set appropriately so as to check for the next GC point. */
2564 static Word vts_next_GC_at = 1000;
2565 
vts_tab_init(void)2566 static void vts_tab_init ( void )
2567 {
2568    vts_tab
2569       = VG_(newXA)( HG_(zalloc), "libhb.vts_tab_init.1",
2570                     HG_(free), sizeof(VtsTE) );
2571    vts_tab_freelist
2572       = VtsID_INVALID;
2573    tl_assert(vts_tab);
2574 }
2575 
2576 /* Add ii to the free list, checking that it looks out-of-use. */
add_to_free_list(VtsID ii)2577 static void add_to_free_list ( VtsID ii )
2578 {
2579    VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2580    tl_assert(ie->vts == NULL);
2581    tl_assert(ie->rc == 0);
2582    tl_assert(ie->freelink == VtsID_INVALID);
2583    ie->freelink = vts_tab_freelist;
2584    vts_tab_freelist = ii;
2585 }
2586 
2587 /* Get an entry from the free list.  This will return VtsID_INVALID if
2588    the free list is empty. */
get_from_free_list(void)2589 static VtsID get_from_free_list ( void )
2590 {
2591    VtsID  ii;
2592    VtsTE* ie;
2593    if (vts_tab_freelist == VtsID_INVALID)
2594       return VtsID_INVALID;
2595    ii = vts_tab_freelist;
2596    ie = VG_(indexXA)( vts_tab, ii );
2597    tl_assert(ie->vts == NULL);
2598    tl_assert(ie->rc == 0);
2599    vts_tab_freelist = ie->freelink;
2600    return ii;
2601 }
2602 
2603 /* Produce a new VtsID that can be used, either by getting it from
2604    the freelist, or, if that is empty, by expanding vts_tab. */
get_new_VtsID(void)2605 static VtsID get_new_VtsID ( void )
2606 {
2607    VtsID ii;
2608    VtsTE te;
2609    ii = get_from_free_list();
2610    if (ii != VtsID_INVALID)
2611       return ii;
2612    te.vts = NULL;
2613    te.rc = 0;
2614    te.freelink = VtsID_INVALID;
2615    te.remap    = VtsID_INVALID;
2616    ii = (VtsID)VG_(addToXA)( vts_tab, &te );
2617    return ii;
2618 }
2619 
2620 
2621 /* Indirect callback from lib_zsm. */
VtsID__rcinc(VtsID ii)2622 static void VtsID__rcinc ( VtsID ii )
2623 {
2624    VtsTE* ie;
2625    /* VG_(indexXA) does a range check for us */
2626    ie = VG_(indexXA)( vts_tab, ii );
2627    tl_assert(ie->vts); /* else it's not in use */
2628    tl_assert(ie->rc < ~0UL); /* else we can't continue */
2629    tl_assert(ie->vts->id == ii);
2630    ie->rc++;
2631 }
2632 
2633 /* Indirect callback from lib_zsm. */
VtsID__rcdec(VtsID ii)2634 static void VtsID__rcdec ( VtsID ii )
2635 {
2636    VtsTE* ie;
2637    /* VG_(indexXA) does a range check for us */
2638    ie = VG_(indexXA)( vts_tab, ii );
2639    tl_assert(ie->vts); /* else it's not in use */
2640    tl_assert(ie->rc > 0); /* else RC snafu */
2641    tl_assert(ie->vts->id == ii);
2642    ie->rc--;
2643 }
2644 
2645 
2646 /* Look up 'cand' in our collection of VTSs.  If present, return the
2647    VtsID for the pre-existing version.  If not present, clone it, add
2648    the clone to both vts_tab and vts_set, allocate a fresh VtsID for
2649    it, and return that. */
vts_tab__find__or__clone_and_add(VTS * cand)2650 static VtsID vts_tab__find__or__clone_and_add ( VTS* cand )
2651 {
2652    VTS* in_tab = NULL;
2653    tl_assert(cand->id == VtsID_INVALID);
2654    Bool already_have = vts_set__find__or__clone_and_add( &in_tab, cand );
2655    tl_assert(in_tab);
2656    if (already_have) {
2657       /* We already have a copy of 'cand'.  Use that. */
2658       VtsTE* ie;
2659       tl_assert(in_tab->id != VtsID_INVALID);
2660       ie = VG_(indexXA)( vts_tab, in_tab->id );
2661       tl_assert(ie->vts == in_tab);
2662       return in_tab->id;
2663    } else {
2664       VtsID  ii = get_new_VtsID();
2665       VtsTE* ie = VG_(indexXA)( vts_tab, ii );
2666       ie->vts = in_tab;
2667       ie->rc = 0;
2668       ie->freelink = VtsID_INVALID;
2669       in_tab->id = ii;
2670       return ii;
2671    }
2672 }
2673 
2674 
show_vts_stats(HChar * caller)2675 static void show_vts_stats ( HChar* caller )
2676 {
2677    UWord nSet, nTab, nLive;
2678    ULong totrc;
2679    UWord n, i;
2680    nSet = VG_(sizeFM)( vts_set );
2681    nTab = VG_(sizeXA)( vts_tab );
2682    totrc = 0;
2683    nLive = 0;
2684    n = VG_(sizeXA)( vts_tab );
2685    for (i = 0; i < n; i++) {
2686       VtsTE* ie = VG_(indexXA)( vts_tab, i );
2687       if (ie->vts) {
2688          nLive++;
2689          totrc += (ULong)ie->rc;
2690       } else {
2691          tl_assert(ie->rc == 0);
2692       }
2693    }
2694    VG_(printf)("  show_vts_stats %s\n", caller);
2695    VG_(printf)("    vts_tab size %4lu\n", nTab);
2696    VG_(printf)("    vts_tab live %4lu\n", nLive);
2697    VG_(printf)("    vts_set size %4lu\n", nSet);
2698    VG_(printf)("        total rc %4llu\n", totrc);
2699 }
2700 
2701 
2702 /* --- Helpers for VtsID pruning --- */
2703 
2704 static
remap_VtsID(XArray * old_tab,XArray * new_tab,VtsID * ii)2705 void remap_VtsID ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2706                    /*MOD*/XArray* /* of VtsTE */ new_tab,
2707                    VtsID* ii )
2708 {
2709    VtsTE *old_te, *new_te;
2710    VtsID old_id, new_id;
2711    /* We're relying here on VG_(indexXA)'s range checking to assert on
2712       any stupid values, in particular *ii == VtsID_INVALID. */
2713    old_id = *ii;
2714    old_te = VG_(indexXA)( old_tab, old_id );
2715    old_te->rc--;
2716    new_id = old_te->remap;
2717    new_te = VG_(indexXA)( new_tab, new_id );
2718    new_te->rc++;
2719    *ii = new_id;
2720 }
2721 
2722 static
remap_VtsIDs_in_SVal(XArray * old_tab,XArray * new_tab,SVal * s)2723 void remap_VtsIDs_in_SVal ( /*MOD*/XArray* /* of VtsTE */ old_tab,
2724                             /*MOD*/XArray* /* of VtsTE */ new_tab,
2725                             SVal* s )
2726 {
2727    SVal old_sv, new_sv;
2728    old_sv = *s;
2729    if (SVal__isC(old_sv)) {
2730       VtsID rMin, wMin;
2731       rMin = SVal__unC_Rmin(old_sv);
2732       wMin = SVal__unC_Wmin(old_sv);
2733       remap_VtsID( old_tab, new_tab, &rMin );
2734       remap_VtsID( old_tab, new_tab, &wMin );
2735       new_sv = SVal__mkC( rMin, wMin );
2736       *s = new_sv;
2737   }
2738 }
2739 
2740 
2741 /* NOT TO BE CALLED FROM WITHIN libzsm. */
2742 __attribute__((noinline))
vts_tab__do_GC(Bool show_stats)2743 static void vts_tab__do_GC ( Bool show_stats )
2744 {
2745    UWord i, nTab, nLive, nFreed;
2746 
2747    /* ---------- BEGIN VTS GC ---------- */
2748    /* check this is actually necessary. */
2749    tl_assert(vts_tab_freelist == VtsID_INVALID);
2750 
2751    /* empty the caches for partial order checks and binary joins.  We
2752       could do better and prune out the entries to be deleted, but it
2753       ain't worth the hassle. */
2754    VtsID__invalidate_caches();
2755 
2756    /* First, make the reference counts up to date. */
2757    zsm_flush_cache();
2758 
2759    nTab = VG_(sizeXA)( vts_tab );
2760 
2761    if (show_stats) {
2762       VG_(printf)("<<GC begins at vts_tab size %lu>>\n", nTab);
2763       show_vts_stats("before GC");
2764    }
2765 
2766    /* Now we can inspect the entire vts_tab.  Any entries with zero
2767       .rc fields are now no longer in use and can be put back on the
2768       free list, removed from vts_set, and deleted. */
2769    nFreed = 0;
2770    for (i = 0; i < nTab; i++) {
2771       Bool present;
2772       UWord oldK = 0, oldV = 12345;
2773       VtsTE* te = VG_(indexXA)( vts_tab, i );
2774       if (te->vts == NULL) {
2775          tl_assert(te->rc == 0);
2776          continue; /* already on the free list (presumably) */
2777       }
2778       if (te->rc > 0)
2779          continue; /* in use */
2780       /* Ok, we got one we can free. */
2781       tl_assert(te->vts->id == i);
2782       /* first, remove it from vts_set. */
2783       present = VG_(delFromFM)( vts_set,
2784                                 &oldK, &oldV, (UWord)te->vts );
2785       tl_assert(present); /* else it isn't in vts_set ?! */
2786       tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2787       tl_assert(oldK == (UWord)te->vts); /* else what did delFromFM find?! */
2788       /* now free the VTS itself */
2789       VTS__delete(te->vts);
2790       te->vts = NULL;
2791       /* and finally put this entry on the free list */
2792       tl_assert(te->freelink == VtsID_INVALID); /* can't already be on it */
2793       add_to_free_list( i );
2794       nFreed++;
2795    }
2796 
2797    /* Now figure out when the next GC should be.  We'll allow the
2798       number of VTSs to double before GCing again.  Except of course
2799       that since we can't (or, at least, don't) shrink vts_tab, we
2800       can't set the threshhold value smaller than it. */
2801    tl_assert(nFreed <= nTab);
2802    nLive = nTab - nFreed;
2803    tl_assert(nLive >= 0 && nLive <= nTab);
2804    vts_next_GC_at = 2 * nLive;
2805    if (vts_next_GC_at < nTab)
2806       vts_next_GC_at = nTab;
2807 
2808    if (show_stats) {
2809       show_vts_stats("after GC");
2810       VG_(printf)("<<GC ends, next gc at %ld>>\n", vts_next_GC_at);
2811    }
2812 
2813    if (VG_(clo_stats)) {
2814       static UInt ctr = 1;
2815       tl_assert(nTab > 0);
2816       VG_(message)(Vg_DebugMsg,
2817                   "libhb: VTS GC: #%u  old size %lu  live %lu  (%2llu%%)\n",
2818                   ctr++, nTab, nLive, (100ULL * (ULong)nLive) / (ULong)nTab);
2819    }
2820    /* ---------- END VTS GC ---------- */
2821 
2822    /* Decide whether to do VTS pruning.  We have one of three
2823       settings. */
2824    static UInt pruning_auto_ctr = 0; /* do not make non-static */
2825 
2826    Bool do_pruning = False;
2827    switch (HG_(clo_vts_pruning)) {
2828       case 0: /* never */
2829          break;
2830       case 1: /* auto */
2831          do_pruning = (++pruning_auto_ctr % 5) == 0;
2832          break;
2833       case 2: /* always */
2834          do_pruning = True;
2835          break;
2836       default:
2837          tl_assert(0);
2838    }
2839 
2840    /* The rest of this routine only handles pruning, so we can
2841       quit at this point if it is not to be done. */
2842    if (!do_pruning)
2843       return;
2844 
2845    /* ---------- BEGIN VTS PRUNING ---------- */
2846    /* We begin by sorting the backing table on its .thr values, so as
2847       to (1) check they are unique [else something has gone wrong,
2848       since it means we must have seen some Thr* exiting more than
2849       once, which can't happen], and (2) so that we can quickly look
2850       up the dead-thread entries as we work through the VTSs. */
2851    VG_(sortXA)( verydead_thread_table );
2852    /* Sanity check: check for unique .sts.thr values. */
2853    UWord nBT = VG_(sizeXA)( verydead_thread_table );
2854    if (nBT > 0) {
2855       ThrID thrid1, thrid2;
2856       thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, 0 );
2857       for (i = 1; i < nBT; i++) {
2858          thrid1 = thrid2;
2859          thrid2 = *(ThrID*)VG_(indexXA)( verydead_thread_table, i );
2860          tl_assert(thrid1 < thrid2);
2861       }
2862    }
2863    /* Ok, so the dead thread table has unique and in-order keys. */
2864 
2865    /* We will run through the old table, and create a new table and
2866       set, at the same time setting the .remap entries in the old
2867       table to point to the new entries.  Then, visit every VtsID in
2868       the system, and replace all of them with new ones, using the
2869       .remap entries in the old table.  Finally, we can delete the old
2870       table and set. */
2871 
2872    XArray* /* of VtsTE */ new_tab
2873       = VG_(newXA)( HG_(zalloc), "libhb.vts_tab__do_GC.new_tab",
2874                     HG_(free), sizeof(VtsTE) );
2875 
2876    /* WordFM VTS* void */
2877    WordFM* new_set
2878       = VG_(newFM)( HG_(zalloc), "libhb.vts_tab__do_GC.new_set",
2879                     HG_(free),
2880                     (Word(*)(UWord,UWord))VTS__cmp_structural );
2881 
2882    /* Visit each old VTS.  For each one:
2883 
2884       * make a pruned version
2885 
2886       * search new_set for the pruned version, yielding either
2887         Nothing (not present) or the new VtsID for it.
2888 
2889       * if not present, allocate a new VtsID for it, insert (pruned
2890         VTS, new VtsID) in the tree, and set
2891         remap_table[old VtsID] = new VtsID.
2892 
2893       * if present, set remap_table[old VtsID] = new VtsID, where
2894         new VtsID was determined by the tree lookup.  Then free up
2895         the clone.
2896    */
2897 
2898    UWord nBeforePruning = 0, nAfterPruning = 0;
2899    UWord nSTSsBefore = 0, nSTSsAfter = 0;
2900    VtsID new_VtsID_ctr = 0;
2901 
2902    for (i = 0; i < nTab; i++) {
2903 
2904       /* For each old VTS .. */
2905       VtsTE* old_te  = VG_(indexXA)( vts_tab, i );
2906       VTS*   old_vts = old_te->vts;
2907       tl_assert(old_te->remap == VtsID_INVALID);
2908 
2909       /* Skip it if not in use */
2910       if (old_te->rc == 0) {
2911          tl_assert(old_vts == NULL);
2912          continue;
2913       }
2914       tl_assert(old_vts != NULL);
2915       tl_assert(old_vts->id == i);
2916       tl_assert(old_vts->ts != NULL);
2917 
2918       /* It is in use. Make a pruned version. */
2919       nBeforePruning++;
2920       nSTSsBefore += old_vts->usedTS;
2921       VTS* new_vts = VTS__subtract("libhb.vts_tab__do_GC.new_vts",
2922                                    old_vts, verydead_thread_table);
2923       tl_assert(new_vts->sizeTS == new_vts->usedTS);
2924       tl_assert(*(ULong*)(&new_vts->ts[new_vts->usedTS])
2925                 == 0x0ddC0ffeeBadF00dULL);
2926 
2927       /* Get rid of the old VTS and the tree entry.  It's a bit more
2928          complex to incrementally delete the VTSs now than to nuke
2929          them all after we're done, but the upside is that we don't
2930          wind up temporarily storing potentially two complete copies
2931          of each VTS and hence spiking memory use. */
2932       UWord oldK = 0, oldV = 12345;
2933       Bool  present = VG_(delFromFM)( vts_set,
2934                                       &oldK, &oldV, (UWord)old_vts );
2935       tl_assert(present); /* else it isn't in vts_set ?! */
2936       tl_assert(oldV == 0); /* no info stored in vts_set val fields */
2937       tl_assert(oldK == (UWord)old_vts); /* else what did delFromFM find?! */
2938       /* now free the VTS itself */
2939       VTS__delete(old_vts);
2940       old_te->vts = NULL;
2941       old_vts = NULL;
2942 
2943       /* NO MENTIONS of old_vts allowed beyond this point. */
2944 
2945       /* Ok, we have the pruned copy in new_vts.  See if a
2946          structurally identical version is already present in new_set.
2947          If so, delete the one we just made and move on; if not, add
2948          it. */
2949       VTS*  identical_version = NULL;
2950       UWord valW = 12345;
2951       if (VG_(lookupFM)(new_set, (UWord*)&identical_version, &valW,
2952                         (UWord)new_vts)) {
2953          // already have it
2954          tl_assert(valW == 0);
2955          tl_assert(identical_version != NULL);
2956          tl_assert(identical_version != new_vts);
2957          VTS__delete(new_vts);
2958          new_vts = identical_version;
2959          tl_assert(new_vts->id != VtsID_INVALID);
2960       } else {
2961          tl_assert(valW == 12345);
2962          tl_assert(identical_version == NULL);
2963          new_vts->id = new_VtsID_ctr++;
2964          Bool b = VG_(addToFM)(new_set, (UWord)new_vts, 0);
2965          tl_assert(!b);
2966          VtsTE new_te;
2967          new_te.vts      = new_vts;
2968          new_te.rc       = 0;
2969          new_te.freelink = VtsID_INVALID;
2970          new_te.remap    = VtsID_INVALID;
2971          Word j = VG_(addToXA)( new_tab, &new_te );
2972          tl_assert(j <= i);
2973          tl_assert(j == new_VtsID_ctr - 1);
2974          // stats
2975          nAfterPruning++;
2976          nSTSsAfter += new_vts->usedTS;
2977       }
2978       old_te->remap = new_vts->id;
2979 
2980    } /* for (i = 0; i < nTab; i++) */
2981 
2982    /* At this point, we have:
2983       * the old VTS table, with its .remap entries set,
2984         and with all .vts == NULL.
2985       * the old VTS tree should be empty, since it and the old VTSs
2986         it contained have been incrementally deleted was we worked
2987         through the old table.
2988       * the new VTS table, with all .rc == 0, all .freelink and .remap
2989         == VtsID_INVALID.
2990       * the new VTS tree.
2991    */
2992    tl_assert( VG_(sizeFM)(vts_set) == 0 );
2993 
2994    /* Now actually apply the mapping. */
2995    /* Visit all the VtsIDs in the entire system.  Where do we expect
2996       to find them?
2997       (a) in shadow memory -- the LineZs and LineFs
2998       (b) in our collection of struct _Thrs.
2999       (c) in our collection of struct _SOs.
3000       Nowhere else, AFAICS.  Not in the zsm cache, because that just
3001       got invalidated.
3002 
3003       Using the .remap fields in vts_tab, map each old VtsID to a new
3004       VtsID.  For each old VtsID, dec its rc; and for each new one,
3005       inc it.  This sets up the new refcounts, and it also gives a
3006       cheap sanity check of the old ones: all old refcounts should be
3007       zero after this operation.
3008    */
3009 
3010    /* Do the mappings for (a) above: iterate over the Primary shadow
3011       mem map (WordFM Addr SecMap*). */
3012    UWord secmapW = 0;
3013    VG_(initIterFM)( map_shmem );
3014    while (VG_(nextIterFM)( map_shmem, NULL, &secmapW )) {
3015       UWord   j;
3016       SecMap* sm = (SecMap*)secmapW;
3017       tl_assert(sm->magic == SecMap_MAGIC);
3018       /* Deal with the LineZs */
3019       for (i = 0; i < N_SECMAP_ZLINES; i++) {
3020          LineZ* lineZ = &sm->linesZ[i];
3021          if (lineZ->dict[0] == SVal_INVALID)
3022             continue; /* not in use -- data is in F rep instead */
3023          for (j = 0; j < 4; j++)
3024             remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineZ->dict[j]);
3025       }
3026       /* Deal with the LineFs */
3027       for (i = 0; i < sm->linesF_size; i++) {
3028          LineF* lineF = &sm->linesF[i];
3029          if (!lineF->inUse)
3030             continue;
3031          for (j = 0; j < N_LINE_ARANGE; j++)
3032             remap_VtsIDs_in_SVal(vts_tab, new_tab, &lineF->w64s[j]);
3033       }
3034    }
3035    VG_(doneIterFM)( map_shmem );
3036 
3037    /* Do the mappings for (b) above: visit our collection of struct
3038       _Thrs. */
3039    Thread* hgthread = get_admin_threads();
3040    tl_assert(hgthread);
3041    while (hgthread) {
3042       Thr* hbthr = hgthread->hbthr;
3043       tl_assert(hbthr);
3044       /* Threads that are listed in the prunable set have their viR
3045          and viW set to VtsID_INVALID, so we can't mess with them. */
3046       if (hbthr->llexit_done && hbthr->joinedwith_done) {
3047          tl_assert(hbthr->viR == VtsID_INVALID);
3048          tl_assert(hbthr->viW == VtsID_INVALID);
3049          hgthread = hgthread->admin;
3050          continue;
3051       }
3052       remap_VtsID( vts_tab, new_tab, &hbthr->viR );
3053       remap_VtsID( vts_tab, new_tab, &hbthr->viW );
3054       hgthread = hgthread->admin;
3055    }
3056 
3057    /* Do the mappings for (c) above: visit the struct _SOs. */
3058    SO* so = admin_SO;
3059    while (so) {
3060       if (so->viR != VtsID_INVALID)
3061          remap_VtsID( vts_tab, new_tab, &so->viR );
3062       if (so->viW != VtsID_INVALID)
3063          remap_VtsID( vts_tab, new_tab, &so->viW );
3064       so = so->admin_next;
3065    }
3066 
3067    /* So, we're nearly done (with this incredibly complex operation).
3068       Check the refcounts for the old VtsIDs all fell to zero, as
3069       expected.  Any failure is serious. */
3070    for (i = 0; i < nTab; i++) {
3071       VtsTE* te = VG_(indexXA)( vts_tab, i );
3072       tl_assert(te->vts == NULL);
3073       /* This is the assert proper.  Note we're also asserting
3074          zeroness for old entries which are unmapped (hence have
3075          .remap == VtsID_INVALID).  That's OK. */
3076       tl_assert(te->rc == 0);
3077    }
3078 
3079    /* Install the new table and set. */
3080    VG_(deleteFM)(vts_set, NULL/*kFin*/, NULL/*vFin*/);
3081    vts_set = new_set;
3082    VG_(deleteXA)( vts_tab );
3083    vts_tab = new_tab;
3084 
3085    /* The freelist of vts_tab entries is empty now, because we've
3086       compacted all of the live entries at the low end of the
3087       table. */
3088    vts_tab_freelist = VtsID_INVALID;
3089 
3090    /* Sanity check vts_set and vts_tab. */
3091 
3092    /* Because all the live entries got slid down to the bottom of vts_tab: */
3093    tl_assert( VG_(sizeXA)( vts_tab ) == VG_(sizeFM)( vts_set ));
3094 
3095    /* Assert that the vts_tab and vts_set entries point at each other
3096       in the required way */
3097    UWord wordK = 0, wordV = 0;
3098    VG_(initIterFM)( vts_set );
3099    while (VG_(nextIterFM)( vts_set, &wordK, &wordV )) {
3100       tl_assert(wordK != 0);
3101       tl_assert(wordV == 0);
3102       VTS* vts = (VTS*)wordK;
3103       tl_assert(vts->id != VtsID_INVALID);
3104       VtsTE* te = VG_(indexXA)( vts_tab, vts->id );
3105       tl_assert(te->vts == vts);
3106    }
3107    VG_(doneIterFM)( vts_set );
3108 
3109    /* Also iterate over the table, and check each entry is
3110       plausible. */
3111    nTab = VG_(sizeXA)( vts_tab );
3112    for (i = 0; i < nTab; i++) {
3113       VtsTE* te = VG_(indexXA)( vts_tab, i );
3114       tl_assert(te->vts);
3115       tl_assert(te->vts->id == i);
3116       tl_assert(te->rc > 0); /* 'cos we just GC'd */
3117       tl_assert(te->freelink == VtsID_INVALID); /* in use */
3118       tl_assert(te->remap == VtsID_INVALID); /* not relevant */
3119    }
3120 
3121    /* And we're done.  Bwahahaha. Ha. Ha. Ha. */
3122    if (VG_(clo_stats)) {
3123       static UInt ctr = 1;
3124       tl_assert(nTab > 0);
3125       VG_(message)(
3126          Vg_DebugMsg,
3127          "libhb: VTS PR: #%u  before %lu (avg sz %lu)  "
3128             "after %lu (avg sz %lu)\n",
3129          ctr++,
3130          nBeforePruning, nSTSsBefore / (nBeforePruning ? nBeforePruning : 1),
3131          nAfterPruning, nSTSsAfter / (nAfterPruning ? nAfterPruning : 1)
3132       );
3133    }
3134    if (0)
3135    VG_(printf)("VTQ: before pruning %lu (avg sz %lu), "
3136                "after pruning %lu (avg sz %lu)\n",
3137                nBeforePruning, nSTSsBefore / nBeforePruning,
3138                nAfterPruning, nSTSsAfter / nAfterPruning);
3139    /* ---------- END VTS PRUNING ---------- */
3140 }
3141 
3142 
3143 /////////////////////////////////////////////////////////
3144 //                                                     //
3145 // Vts IDs                                             //
3146 //                                                     //
3147 /////////////////////////////////////////////////////////
3148 
3149 //////////////////////////
3150 /* A temporary, max-sized VTS which is used as a temporary (the first
3151    argument) in VTS__singleton, VTS__tick and VTS__join operations. */
3152 static VTS* temp_max_sized_VTS = NULL;
3153 
3154 //////////////////////////
3155 static ULong stats__cmpLEQ_queries = 0;
3156 static ULong stats__cmpLEQ_misses  = 0;
3157 static ULong stats__join2_queries  = 0;
3158 static ULong stats__join2_misses   = 0;
3159 
ROL32(UInt w,Int n)3160 static inline UInt ROL32 ( UInt w, Int n ) {
3161    w = (w << n) | (w >> (32-n));
3162    return w;
3163 }
hash_VtsIDs(VtsID vi1,VtsID vi2,UInt nTab)3164 static inline UInt hash_VtsIDs ( VtsID vi1, VtsID vi2, UInt nTab ) {
3165    UInt hash = ROL32(vi1,19) ^ ROL32(vi2,13);
3166    return hash % nTab;
3167 }
3168 
3169 #define N_CMPLEQ_CACHE 1023
3170 static
3171    struct { VtsID vi1; VtsID vi2; Bool leq; }
3172    cmpLEQ_cache[N_CMPLEQ_CACHE];
3173 
3174 #define N_JOIN2_CACHE 1023
3175 static
3176    struct { VtsID vi1; VtsID vi2; VtsID res; }
3177    join2_cache[N_JOIN2_CACHE];
3178 
VtsID__invalidate_caches(void)3179 static void VtsID__invalidate_caches ( void ) {
3180    Int i;
3181    for (i = 0; i < N_CMPLEQ_CACHE; i++) {
3182       cmpLEQ_cache[i].vi1 = VtsID_INVALID;
3183       cmpLEQ_cache[i].vi2 = VtsID_INVALID;
3184       cmpLEQ_cache[i].leq = False;
3185    }
3186    for (i = 0; i < N_JOIN2_CACHE; i++) {
3187      join2_cache[i].vi1 = VtsID_INVALID;
3188      join2_cache[i].vi2 = VtsID_INVALID;
3189      join2_cache[i].res = VtsID_INVALID;
3190    }
3191 }
3192 //////////////////////////
3193 
3194 //static Bool VtsID__is_valid ( VtsID vi ) {
3195 //   VtsTE* ve;
3196 //   if (vi >= (VtsID)VG_(sizeXA)( vts_tab ))
3197 //      return False;
3198 //   ve = VG_(indexXA)( vts_tab, vi );
3199 //   if (!ve->vts)
3200 //      return False;
3201 //   tl_assert(ve->vts->id == vi);
3202 //   return True;
3203 //}
3204 
VtsID__to_VTS(VtsID vi)3205 static VTS* VtsID__to_VTS ( VtsID vi ) {
3206    VtsTE* te = VG_(indexXA)( vts_tab, vi );
3207    tl_assert(te->vts);
3208    return te->vts;
3209 }
3210 
VtsID__pp(VtsID vi)3211 static void VtsID__pp ( VtsID vi ) {
3212    HChar buf[100];
3213    VTS* vts = VtsID__to_VTS(vi);
3214    VTS__show( buf, sizeof(buf)-1, vts );
3215    buf[sizeof(buf)-1] = 0;
3216    VG_(printf)("%s", buf);
3217 }
3218 
3219 /* compute partial ordering relation of vi1 and vi2. */
3220 __attribute__((noinline))
VtsID__cmpLEQ_WRK(VtsID vi1,VtsID vi2)3221 static Bool VtsID__cmpLEQ_WRK ( VtsID vi1, VtsID vi2 ) {
3222    UInt hash;
3223    Bool leq;
3224    VTS  *v1, *v2;
3225    //if (vi1 == vi2) return True;
3226    tl_assert(vi1 != vi2);
3227    ////++
3228    stats__cmpLEQ_queries++;
3229    hash = hash_VtsIDs(vi1, vi2, N_CMPLEQ_CACHE);
3230    if (cmpLEQ_cache[hash].vi1 == vi1
3231        && cmpLEQ_cache[hash].vi2 == vi2)
3232       return cmpLEQ_cache[hash].leq;
3233    stats__cmpLEQ_misses++;
3234    ////--
3235    v1  = VtsID__to_VTS(vi1);
3236    v2  = VtsID__to_VTS(vi2);
3237    leq = VTS__cmpLEQ( v1, v2 ) == 0;
3238    ////++
3239    cmpLEQ_cache[hash].vi1 = vi1;
3240    cmpLEQ_cache[hash].vi2 = vi2;
3241    cmpLEQ_cache[hash].leq = leq;
3242    ////--
3243    return leq;
3244 }
VtsID__cmpLEQ(VtsID vi1,VtsID vi2)3245 static inline Bool VtsID__cmpLEQ ( VtsID vi1, VtsID vi2 ) {
3246    return LIKELY(vi1 == vi2)  ? True  : VtsID__cmpLEQ_WRK(vi1, vi2);
3247 }
3248 
3249 /* compute binary join */
3250 __attribute__((noinline))
VtsID__join2_WRK(VtsID vi1,VtsID vi2)3251 static VtsID VtsID__join2_WRK ( VtsID vi1, VtsID vi2 ) {
3252    UInt  hash;
3253    VtsID res;
3254    VTS   *vts1, *vts2;
3255    //if (vi1 == vi2) return vi1;
3256    tl_assert(vi1 != vi2);
3257    ////++
3258    stats__join2_queries++;
3259    hash = hash_VtsIDs(vi1, vi2, N_JOIN2_CACHE);
3260    if (join2_cache[hash].vi1 == vi1
3261        && join2_cache[hash].vi2 == vi2)
3262       return join2_cache[hash].res;
3263    stats__join2_misses++;
3264    ////--
3265    vts1 = VtsID__to_VTS(vi1);
3266    vts2 = VtsID__to_VTS(vi2);
3267    temp_max_sized_VTS->usedTS = 0;
3268    VTS__join(temp_max_sized_VTS, vts1,vts2);
3269    res = vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3270    ////++
3271    join2_cache[hash].vi1 = vi1;
3272    join2_cache[hash].vi2 = vi2;
3273    join2_cache[hash].res = res;
3274    ////--
3275    return res;
3276 }
VtsID__join2(VtsID vi1,VtsID vi2)3277 static inline VtsID VtsID__join2 ( VtsID vi1, VtsID vi2 ) {
3278    return LIKELY(vi1 == vi2)  ? vi1  : VtsID__join2_WRK(vi1, vi2);
3279 }
3280 
3281 /* create a singleton VTS, namely [thr:1] */
VtsID__mk_Singleton(Thr * thr,ULong tym)3282 static VtsID VtsID__mk_Singleton ( Thr* thr, ULong tym ) {
3283    temp_max_sized_VTS->usedTS = 0;
3284    VTS__singleton(temp_max_sized_VTS, thr,tym);
3285    return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3286 }
3287 
3288 /* tick operation, creates value 1 if specified index is absent */
VtsID__tick(VtsID vi,Thr * idx)3289 static VtsID VtsID__tick ( VtsID vi, Thr* idx ) {
3290    VTS* vts = VtsID__to_VTS(vi);
3291    temp_max_sized_VTS->usedTS = 0;
3292    VTS__tick(temp_max_sized_VTS, idx,vts);
3293    return vts_tab__find__or__clone_and_add(temp_max_sized_VTS);
3294 }
3295 
3296 /* index into a VTS (only for assertions) */
VtsID__indexAt(VtsID vi,Thr * idx)3297 static ULong VtsID__indexAt ( VtsID vi, Thr* idx ) {
3298    VTS* vts = VtsID__to_VTS(vi);
3299    return VTS__indexAt_SLOW( vts, idx );
3300 }
3301 
3302 /* Assuming that !cmpLEQ(vi1, vi2), find the index of the first (or
3303    any, really) element in vi1 which is pointwise greater-than the
3304    corresponding element in vi2.  If no such element exists, return
3305    NULL.  This needs to be fairly quick since it is called every time
3306    a race is detected. */
VtsID__findFirst_notLEQ(VtsID vi1,VtsID vi2)3307 static Thr* VtsID__findFirst_notLEQ ( VtsID vi1, VtsID vi2 )
3308 {
3309    VTS  *vts1, *vts2;
3310    Thr*  diffthr;
3311    ThrID diffthrid;
3312    tl_assert(vi1 != vi2);
3313    vts1 = VtsID__to_VTS(vi1);
3314    vts2 = VtsID__to_VTS(vi2);
3315    tl_assert(vts1 != vts2);
3316    diffthrid = VTS__cmpLEQ(vts1, vts2);
3317    diffthr = Thr__from_ThrID(diffthrid);
3318    tl_assert(diffthr); /* else they are LEQ ! */
3319    return diffthr;
3320 }
3321 
3322 
3323 /////////////////////////////////////////////////////////
3324 //                                                     //
3325 // Filters                                             //
3326 //                                                     //
3327 /////////////////////////////////////////////////////////
3328 
3329 /* Forget everything we know -- clear the filter and let everything
3330    through.  This needs to be as fast as possible, since it is called
3331    every time the running thread changes, and every time a thread's
3332    vector clocks change, which can be quite frequent.  The obvious
3333    fast way to do this is simply to stuff in tags which we know are
3334    not going to match anything, since they're not aligned to the start
3335    of a line. */
Filter__clear(Filter * fi,HChar * who)3336 static void Filter__clear ( Filter* fi, HChar* who )
3337 {
3338    UWord i;
3339    if (0) VG_(printf)("  Filter__clear(%p, %s)\n", fi, who);
3340    for (i = 0; i < FI_NUM_LINES; i += 8) {
3341       fi->tags[i+0] = 1; /* impossible value -- cannot match */
3342       fi->tags[i+1] = 1;
3343       fi->tags[i+2] = 1;
3344       fi->tags[i+3] = 1;
3345       fi->tags[i+4] = 1;
3346       fi->tags[i+5] = 1;
3347       fi->tags[i+6] = 1;
3348       fi->tags[i+7] = 1;
3349    }
3350    tl_assert(i == FI_NUM_LINES);
3351 }
3352 
3353 /* Clearing an arbitrary range in the filter.  Unfortunately
3354    we have to do this due to core-supplied new/die-mem events. */
3355 
Filter__clear_1byte(Filter * fi,Addr a)3356 static void Filter__clear_1byte ( Filter* fi, Addr a )
3357 {
3358    Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3359    UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3360    FiLine* line   = &fi->lines[lineno];
3361    UWord   loff   = (a - atag) / 8;
3362    UShort  mask   = 0x3 << (2 * (a & 7));
3363    /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3364    if (LIKELY( fi->tags[lineno] == atag )) {
3365       /* hit.  clear the bits. */
3366       UShort  u16  = line->u16s[loff];
3367       line->u16s[loff] = u16 & ~mask; /* clear them */
3368    } else {
3369       /* miss.  The filter doesn't hold this address, so ignore. */
3370    }
3371 }
3372 
Filter__clear_8bytes_aligned(Filter * fi,Addr a)3373 static void Filter__clear_8bytes_aligned ( Filter* fi, Addr a )
3374 {
3375    Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3376    UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3377    FiLine* line   = &fi->lines[lineno];
3378    UWord   loff   = (a - atag) / 8;
3379    if (LIKELY( fi->tags[lineno] == atag )) {
3380       line->u16s[loff] = 0;
3381    } else {
3382     /* miss.  The filter doesn't hold this address, so ignore. */
3383    }
3384 }
3385 
Filter__clear_range(Filter * fi,Addr a,UWord len)3386 static void Filter__clear_range ( Filter* fi, Addr a, UWord len )
3387 {
3388   //VG_(printf)("%lu ", len);
3389    /* slowly do part preceding 8-alignment */
3390    while (UNLIKELY(!VG_IS_8_ALIGNED(a)) && LIKELY(len > 0)) {
3391       Filter__clear_1byte( fi, a );
3392       a++;
3393       len--;
3394    }
3395    /* vector loop */
3396    while (len >= 8) {
3397       Filter__clear_8bytes_aligned( fi, a );
3398       a += 8;
3399       len -= 8;
3400    }
3401    /* slowly do tail */
3402    while (UNLIKELY(len > 0)) {
3403       Filter__clear_1byte( fi, a );
3404       a++;
3405       len--;
3406    }
3407 }
3408 
3409 
3410 /* ------ Read handlers for the filter. ------ */
3411 
Filter__ok_to_skip_crd64(Filter * fi,Addr a)3412 static inline Bool Filter__ok_to_skip_crd64 ( Filter* fi, Addr a )
3413 {
3414    if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3415       return False;
3416    {
3417      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3418      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3419      FiLine* line   = &fi->lines[lineno];
3420      UWord   loff   = (a - atag) / 8;
3421      UShort  mask   = 0xAAAA;
3422      if (LIKELY( fi->tags[lineno] == atag )) {
3423         /* hit.  check line and update. */
3424         UShort u16  = line->u16s[loff];
3425         Bool   ok   = (u16 & mask) == mask; /* all R bits set? */
3426         line->u16s[loff] = u16 | mask; /* set them */
3427         return ok;
3428      } else {
3429         /* miss.  nuke existing line and re-use it. */
3430         UWord i;
3431         fi->tags[lineno] = atag;
3432         for (i = 0; i < FI_LINE_SZB / 8; i++)
3433            line->u16s[i] = 0;
3434         line->u16s[loff] = mask;
3435         return False;
3436      }
3437    }
3438 }
3439 
Filter__ok_to_skip_crd32(Filter * fi,Addr a)3440 static inline Bool Filter__ok_to_skip_crd32 ( Filter* fi, Addr a )
3441 {
3442    if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3443       return False;
3444    {
3445      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3446      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3447      FiLine* line   = &fi->lines[lineno];
3448      UWord   loff   = (a - atag) / 8;
3449      UShort  mask   = 0xAA << (2 * (a & 4)); /* 0xAA00 or 0x00AA */
3450      if (LIKELY( fi->tags[lineno] == atag )) {
3451         /* hit.  check line and update. */
3452         UShort  u16  = line->u16s[loff];
3453         Bool    ok   = (u16 & mask) == mask; /* 4 x R bits set? */
3454         line->u16s[loff] = u16 | mask; /* set them */
3455         return ok;
3456      } else {
3457         /* miss.  nuke existing line and re-use it. */
3458         UWord   i;
3459         fi->tags[lineno] = atag;
3460         for (i = 0; i < FI_LINE_SZB / 8; i++)
3461            line->u16s[i] = 0;
3462         line->u16s[loff] = mask;
3463         return False;
3464      }
3465    }
3466 }
3467 
Filter__ok_to_skip_crd16(Filter * fi,Addr a)3468 static inline Bool Filter__ok_to_skip_crd16 ( Filter* fi, Addr a )
3469 {
3470    if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3471       return False;
3472    {
3473      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3474      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3475      FiLine* line   = &fi->lines[lineno];
3476      UWord   loff   = (a - atag) / 8;
3477      UShort  mask   = 0xA << (2 * (a & 6));
3478      /* mask is A000, 0A00, 00A0 or 000A */
3479      if (LIKELY( fi->tags[lineno] == atag )) {
3480         /* hit.  check line and update. */
3481         UShort  u16  = line->u16s[loff];
3482         Bool    ok   = (u16 & mask) == mask; /* 2 x R bits set? */
3483         line->u16s[loff] = u16 | mask; /* set them */
3484         return ok;
3485      } else {
3486         /* miss.  nuke existing line and re-use it. */
3487         UWord   i;
3488         fi->tags[lineno] = atag;
3489         for (i = 0; i < FI_LINE_SZB / 8; i++)
3490            line->u16s[i] = 0;
3491         line->u16s[loff] = mask;
3492         return False;
3493      }
3494    }
3495 }
3496 
Filter__ok_to_skip_crd08(Filter * fi,Addr a)3497 static inline Bool Filter__ok_to_skip_crd08 ( Filter* fi, Addr a )
3498 {
3499    {
3500      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3501      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3502      FiLine* line   = &fi->lines[lineno];
3503      UWord   loff   = (a - atag) / 8;
3504      UShort  mask   = 0x2 << (2 * (a & 7));
3505      /* mask is 8000, 2000, 0800, 0200, 0080, 0020, 0008 or 0002 */
3506      if (LIKELY( fi->tags[lineno] == atag )) {
3507         /* hit.  check line and update. */
3508         UShort  u16  = line->u16s[loff];
3509         Bool    ok   = (u16 & mask) == mask; /* 1 x R bits set? */
3510         line->u16s[loff] = u16 | mask; /* set them */
3511         return ok;
3512      } else {
3513         /* miss.  nuke existing line and re-use it. */
3514         UWord   i;
3515         fi->tags[lineno] = atag;
3516         for (i = 0; i < FI_LINE_SZB / 8; i++)
3517            line->u16s[i] = 0;
3518         line->u16s[loff] = mask;
3519         return False;
3520      }
3521    }
3522 }
3523 
3524 
3525 /* ------ Write handlers for the filter. ------ */
3526 
Filter__ok_to_skip_cwr64(Filter * fi,Addr a)3527 static inline Bool Filter__ok_to_skip_cwr64 ( Filter* fi, Addr a )
3528 {
3529    if (UNLIKELY( !VG_IS_8_ALIGNED(a) ))
3530       return False;
3531    {
3532      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3533      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3534      FiLine* line   = &fi->lines[lineno];
3535      UWord   loff   = (a - atag) / 8;
3536      UShort  mask   = 0xFFFF;
3537      if (LIKELY( fi->tags[lineno] == atag )) {
3538         /* hit.  check line and update. */
3539         UShort u16  = line->u16s[loff];
3540         Bool   ok   = (u16 & mask) == mask; /* all R & W bits set? */
3541         line->u16s[loff] = u16 | mask; /* set them */
3542         return ok;
3543      } else {
3544         /* miss.  nuke existing line and re-use it. */
3545         UWord i;
3546         fi->tags[lineno] = atag;
3547         for (i = 0; i < FI_LINE_SZB / 8; i++)
3548            line->u16s[i] = 0;
3549         line->u16s[loff] = mask;
3550         return False;
3551      }
3552    }
3553 }
3554 
Filter__ok_to_skip_cwr32(Filter * fi,Addr a)3555 static inline Bool Filter__ok_to_skip_cwr32 ( Filter* fi, Addr a )
3556 {
3557    if (UNLIKELY( !VG_IS_4_ALIGNED(a) ))
3558       return False;
3559    {
3560      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3561      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3562      FiLine* line   = &fi->lines[lineno];
3563      UWord   loff   = (a - atag) / 8;
3564      UShort  mask   = 0xFF << (2 * (a & 4)); /* 0xFF00 or 0x00FF */
3565      if (LIKELY( fi->tags[lineno] == atag )) {
3566         /* hit.  check line and update. */
3567         UShort  u16  = line->u16s[loff];
3568         Bool    ok   = (u16 & mask) == mask; /* 4 x R & W bits set? */
3569         line->u16s[loff] = u16 | mask; /* set them */
3570         return ok;
3571      } else {
3572         /* miss.  nuke existing line and re-use it. */
3573         UWord   i;
3574         fi->tags[lineno] = atag;
3575         for (i = 0; i < FI_LINE_SZB / 8; i++)
3576            line->u16s[i] = 0;
3577         line->u16s[loff] = mask;
3578         return False;
3579      }
3580    }
3581 }
3582 
Filter__ok_to_skip_cwr16(Filter * fi,Addr a)3583 static inline Bool Filter__ok_to_skip_cwr16 ( Filter* fi, Addr a )
3584 {
3585    if (UNLIKELY( !VG_IS_2_ALIGNED(a) ))
3586       return False;
3587    {
3588      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3589      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3590      FiLine* line   = &fi->lines[lineno];
3591      UWord   loff   = (a - atag) / 8;
3592      UShort  mask   = 0xF << (2 * (a & 6));
3593      /* mask is F000, 0F00, 00F0 or 000F */
3594      if (LIKELY( fi->tags[lineno] == atag )) {
3595         /* hit.  check line and update. */
3596         UShort  u16  = line->u16s[loff];
3597         Bool    ok   = (u16 & mask) == mask; /* 2 x R & W bits set? */
3598         line->u16s[loff] = u16 | mask; /* set them */
3599         return ok;
3600      } else {
3601         /* miss.  nuke existing line and re-use it. */
3602         UWord   i;
3603         fi->tags[lineno] = atag;
3604         for (i = 0; i < FI_LINE_SZB / 8; i++)
3605            line->u16s[i] = 0;
3606         line->u16s[loff] = mask;
3607         return False;
3608      }
3609    }
3610 }
3611 
Filter__ok_to_skip_cwr08(Filter * fi,Addr a)3612 static inline Bool Filter__ok_to_skip_cwr08 ( Filter* fi, Addr a )
3613 {
3614    {
3615      Addr    atag   = FI_GET_TAG(a);     /* tag of 'a' */
3616      UWord   lineno = FI_GET_LINENO(a);  /* lineno for 'a' */
3617      FiLine* line   = &fi->lines[lineno];
3618      UWord   loff   = (a - atag) / 8;
3619      UShort  mask   = 0x3 << (2 * (a & 7));
3620      /* mask is C000, 3000, 0C00, 0300, 00C0, 0030, 000C or 0003 */
3621      if (LIKELY( fi->tags[lineno] == atag )) {
3622         /* hit.  check line and update. */
3623         UShort  u16  = line->u16s[loff];
3624         Bool    ok   = (u16 & mask) == mask; /* 1 x R bits set? */
3625         line->u16s[loff] = u16 | mask; /* set them */
3626         return ok;
3627      } else {
3628         /* miss.  nuke existing line and re-use it. */
3629         UWord   i;
3630         fi->tags[lineno] = atag;
3631         for (i = 0; i < FI_LINE_SZB / 8; i++)
3632            line->u16s[i] = 0;
3633         line->u16s[loff] = mask;
3634         return False;
3635      }
3636    }
3637 }
3638 
3639 
3640 /////////////////////////////////////////////////////////
3641 //                                                     //
3642 // Threads                                             //
3643 //                                                     //
3644 /////////////////////////////////////////////////////////
3645 
3646 /* Maps ThrID values to their Thr*s (which contain ThrID values that
3647    should point back to the relevant slot in the array.  Lowest
3648    numbered slot (0) is for thrid = 1024, (1) is for 1025, etc. */
3649 static XArray* /* of Thr* */ thrid_to_thr_map = NULL;
3650 
3651 /* And a counter to dole out ThrID values.  For rationale/background,
3652    see comments on definition of ScalarTS (far) above. */
3653 static ThrID thrid_counter = 1024; /* runs up to ThrID_MAX_VALID */
3654 
Thr__to_ThrID(Thr * thr)3655 static ThrID Thr__to_ThrID ( Thr* thr ) {
3656    return thr->thrid;
3657 }
Thr__from_ThrID(UInt thrid)3658 static Thr* Thr__from_ThrID ( UInt thrid ) {
3659    Thr* thr = *(Thr**)VG_(indexXA)( thrid_to_thr_map, thrid - 1024 );
3660    tl_assert(thr->thrid == thrid);
3661    return thr;
3662 }
3663 
Thr__new(void)3664 static Thr* Thr__new ( void )
3665 {
3666    Thr* thr = HG_(zalloc)( "libhb.Thr__new.1", sizeof(Thr) );
3667    thr->viR = VtsID_INVALID;
3668    thr->viW = VtsID_INVALID;
3669    thr->llexit_done = False;
3670    thr->joinedwith_done = False;
3671    thr->filter = HG_(zalloc)( "libhb.Thr__new.2", sizeof(Filter) );
3672    /* We only really need this at history level 1, but unfortunately
3673       this routine is called before the command line processing is
3674       done (sigh), so we can't rely on HG_(clo_history_level) at this
3675       point.  Hence always allocate it.  Bah. */
3676    thr->local_Kws_n_stacks
3677       = VG_(newXA)( HG_(zalloc),
3678                     "libhb.Thr__new.3 (local_Kws_and_stacks)",
3679                     HG_(free), sizeof(ULong_n_EC) );
3680 
3681    /* Add this Thr* <-> ThrID binding to the mapping, and
3682       cross-check */
3683    if (!thrid_to_thr_map) {
3684       thrid_to_thr_map = VG_(newXA)( HG_(zalloc), "libhb.Thr__new.4",
3685                                      HG_(free), sizeof(Thr*) );
3686       tl_assert(thrid_to_thr_map);
3687    }
3688 
3689    if (thrid_counter >= ThrID_MAX_VALID) {
3690       /* We're hosed.  We have to stop. */
3691       scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
3692    }
3693 
3694    thr->thrid = thrid_counter++;
3695    Word ix = VG_(addToXA)( thrid_to_thr_map, &thr );
3696    tl_assert(ix + 1024 == thr->thrid);
3697 
3698    return thr;
3699 }
3700 
note_local_Kw_n_stack_for(Thr * thr)3701 static void note_local_Kw_n_stack_for ( Thr* thr )
3702 {
3703    Word       nPresent;
3704    ULong_n_EC pair;
3705    tl_assert(thr);
3706 
3707    // We only collect this info at history level 1 (approx)
3708    if (HG_(clo_history_level) != 1)
3709       return;
3710 
3711    /* This is the scalar Kw for thr. */
3712    pair.ull = VtsID__indexAt( thr->viW, thr );
3713    pair.ec  = main_get_EC( thr );
3714    tl_assert(pair.ec);
3715    tl_assert(thr->local_Kws_n_stacks);
3716 
3717    /* check that we're not adding duplicates */
3718    nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
3719 
3720    /* Throw away old stacks, if necessary.  We can't accumulate stuff
3721       indefinitely. */
3722    if (nPresent >= N_KWs_N_STACKs_PER_THREAD) {
3723       VG_(dropHeadXA)( thr->local_Kws_n_stacks, nPresent / 2 );
3724       nPresent = VG_(sizeXA)( thr->local_Kws_n_stacks );
3725       if (0)
3726          VG_(printf)("LOCAL Kw: thr %p,  Kw %llu,  ec %p (!!! gc !!!)\n",
3727                      thr, pair.ull, pair.ec );
3728    }
3729 
3730    if (nPresent > 0) {
3731       ULong_n_EC* prevPair
3732          = (ULong_n_EC*)VG_(indexXA)( thr->local_Kws_n_stacks, nPresent-1 );
3733       tl_assert( prevPair->ull <= pair.ull );
3734    }
3735 
3736    if (nPresent == 0)
3737       pair.ec = NULL;
3738 
3739    VG_(addToXA)( thr->local_Kws_n_stacks, &pair );
3740 
3741    if (0)
3742       VG_(printf)("LOCAL Kw: thr %p,  Kw %llu,  ec %p\n",
3743                   thr, pair.ull, pair.ec );
3744    if (0)
3745       VG_(pp_ExeContext)(pair.ec);
3746 }
3747 
cmp__ULong_n_EC__by_ULong(ULong_n_EC * pair1,ULong_n_EC * pair2)3748 static Int cmp__ULong_n_EC__by_ULong ( ULong_n_EC* pair1, ULong_n_EC* pair2 )
3749 {
3750    if (pair1->ull < pair2->ull) return -1;
3751    if (pair1->ull > pair2->ull) return 1;
3752    return 0;
3753 }
3754 
3755 
3756 /////////////////////////////////////////////////////////
3757 //                                                     //
3758 // Shadow Values                                       //
3759 //                                                     //
3760 /////////////////////////////////////////////////////////
3761 
3762 // type SVal, SVal_INVALID and SVal_NOACCESS are defined by
3763 // hb_zsm.h.  We have to do everything else here.
3764 
3765 /* SVal is 64 bit unsigned int.
3766 
3767       <---------30--------->    <---------30--------->
3768    00 X-----Rmin-VtsID-----X 00 X-----Wmin-VtsID-----X   C(Rmin,Wmin)
3769    10 X--------------------X XX X--------------------X   A: SVal_NOACCESS
3770    11 0--------------------0 00 0--------------------0   A: SVal_INVALID
3771 
3772 */
3773 #define SVAL_TAGMASK (3ULL << 62)
3774 
SVal__isC(SVal s)3775 static inline Bool SVal__isC ( SVal s ) {
3776    return (0ULL << 62) == (s & SVAL_TAGMASK);
3777 }
SVal__mkC(VtsID rmini,VtsID wmini)3778 static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini ) {
3779    //tl_assert(VtsID__is_valid(rmini));
3780    //tl_assert(VtsID__is_valid(wmini));
3781    return (((ULong)rmini) << 32) | ((ULong)wmini);
3782 }
SVal__unC_Rmin(SVal s)3783 static inline VtsID SVal__unC_Rmin ( SVal s ) {
3784    tl_assert(SVal__isC(s));
3785    return (VtsID)(s >> 32);
3786 }
SVal__unC_Wmin(SVal s)3787 static inline VtsID SVal__unC_Wmin ( SVal s ) {
3788    tl_assert(SVal__isC(s));
3789    return (VtsID)(s & 0xFFFFFFFFULL);
3790 }
3791 
SVal__isA(SVal s)3792 static inline Bool SVal__isA ( SVal s ) {
3793    return (2ULL << 62) == (s & SVAL_TAGMASK);
3794 }
SVal__mkA(void)3795 static inline SVal SVal__mkA ( void ) {
3796    return 2ULL << 62;
3797 }
3798 
3799 /* Direct callback from lib_zsm. */
SVal__rcinc(SVal s)3800 static void SVal__rcinc ( SVal s ) {
3801    if (SVal__isC(s)) {
3802       VtsID__rcinc( SVal__unC_Rmin(s) );
3803       VtsID__rcinc( SVal__unC_Wmin(s) );
3804    }
3805 }
3806 
3807 /* Direct callback from lib_zsm. */
SVal__rcdec(SVal s)3808 static void SVal__rcdec ( SVal s ) {
3809    if (SVal__isC(s)) {
3810       VtsID__rcdec( SVal__unC_Rmin(s) );
3811       VtsID__rcdec( SVal__unC_Wmin(s) );
3812    }
3813 }
3814 
3815 
3816 /////////////////////////////////////////////////////////
3817 //                                                     //
3818 // Change-event map2                                   //
3819 //                                                     //
3820 /////////////////////////////////////////////////////////
3821 
3822 #define EVENT_MAP_GC_DISCARD_FRACTION  0.5
3823 
3824 /* This is in two parts:
3825 
3826    1. A hash table of RCECs.  This is a set of reference-counted stack
3827       traces.  When the reference count of a stack trace becomes zero,
3828       it is removed from the set and freed up.  The intent is to have
3829       a set of stack traces which can be referred to from (2), but to
3830       only represent each one once.  The set is indexed/searched by
3831       ordering on the stack trace vectors.
3832 
3833    2. A SparseWA of OldRefs.  These store information about each old
3834       ref that we need to record.  It is indexed by address of the
3835       location for which the information is recorded.  For LRU
3836       purposes, each OldRef also contains a generation number,
3837       indicating when it was most recently accessed.
3838 
3839       The important part of an OldRef is, however, its accs[] array.
3840       This is an array of N_OLDREF_ACCS which binds (thread, R/W,
3841       size) triples to RCECs.  This allows us to collect the last
3842       access-traceback by up to N_OLDREF_ACCS different triples for
3843       this location.  The accs[] array is a MTF-array.  If a binding
3844       falls off the end, that's too bad -- we will lose info about
3845       that triple's access to this location.
3846 
3847       When the SparseWA becomes too big, we can throw away the OldRefs
3848       whose generation numbers are below some threshold; hence doing
3849       approximate LRU discarding.  For each discarded OldRef we must
3850       of course decrement the reference count on the all RCECs it
3851       refers to, in order that entries from (1) eventually get
3852       discarded too.
3853 
3854    A major improvement in reliability of this mechanism would be to
3855    have a dynamically sized OldRef.accs[] array, so no entries ever
3856    fall off the end.  In investigations (Dec 08) it appears that a
3857    major cause for the non-availability of conflicting-access traces
3858    in race reports is caused by the fixed size of this array.  I
3859    suspect for most OldRefs, only a few entries are used, but for a
3860    minority of cases there is an overflow, leading to info lossage.
3861    Investigations also suggest this is very workload and scheduling
3862    sensitive.  Therefore a dynamic sizing would be better.
3863 
3864    However, dynamic sizing would defeat the use of a PoolAllocator
3865    for OldRef structures.  And that's important for performance.  So
3866    it's not straightforward to do.
3867 */
3868 
3869 
3870 static UWord stats__ctxt_rcdec1 = 0;
3871 static UWord stats__ctxt_rcdec2 = 0;
3872 static UWord stats__ctxt_rcdec3 = 0;
3873 static UWord stats__ctxt_rcdec_calls = 0;
3874 static UWord stats__ctxt_rcdec_discards = 0;
3875 static UWord stats__ctxt_rcdec1_eq = 0;
3876 
3877 static UWord stats__ctxt_tab_curr = 0;
3878 static UWord stats__ctxt_tab_max  = 0;
3879 
3880 static UWord stats__ctxt_tab_qs   = 0;
3881 static UWord stats__ctxt_tab_cmps = 0;
3882 
3883 
3884 ///////////////////////////////////////////////////////
3885 //// Part (1): A hash table of RCECs
3886 ///
3887 
3888 #define N_FRAMES 8
3889 
3890 // (UInt) `echo "Reference Counted Execution Context" | md5sum`
3891 #define RCEC_MAGIC 0xab88abb2UL
3892 
3893 //#define N_RCEC_TAB 98317 /* prime */
3894 #define N_RCEC_TAB 196613 /* prime */
3895 
3896 typedef
3897    struct _RCEC {
3898       UWord magic;  /* sanity check only */
3899       struct _RCEC* next;
3900       UWord rc;
3901       UWord rcX; /* used for crosschecking */
3902       UWord frames_hash;          /* hash of all the frames */
3903       UWord frames[N_FRAMES];
3904    }
3905    RCEC;
3906 
3907 static RCEC** contextTab = NULL; /* hash table of RCEC*s */
3908 
3909 
3910 /* Gives an arbitrary total order on RCEC .frames fields */
RCEC__cmp_by_frames(RCEC * ec1,RCEC * ec2)3911 static Word RCEC__cmp_by_frames ( RCEC* ec1, RCEC* ec2 ) {
3912    Word i;
3913    tl_assert(ec1 && ec1->magic == RCEC_MAGIC);
3914    tl_assert(ec2 && ec2->magic == RCEC_MAGIC);
3915    if (ec1->frames_hash < ec2->frames_hash) return -1;
3916    if (ec1->frames_hash > ec2->frames_hash) return  1;
3917    for (i = 0; i < N_FRAMES; i++) {
3918       if (ec1->frames[i] < ec2->frames[i]) return -1;
3919       if (ec1->frames[i] > ec2->frames[i]) return  1;
3920    }
3921    return 0;
3922 }
3923 
3924 
3925 /* Dec the ref of this RCEC. */
ctxt__rcdec(RCEC * ec)3926 static void ctxt__rcdec ( RCEC* ec )
3927 {
3928    stats__ctxt_rcdec_calls++;
3929    tl_assert(ec && ec->magic == RCEC_MAGIC);
3930    tl_assert(ec->rc > 0);
3931    ec->rc--;
3932 }
3933 
ctxt__rcinc(RCEC * ec)3934 static void ctxt__rcinc ( RCEC* ec )
3935 {
3936    tl_assert(ec && ec->magic == RCEC_MAGIC);
3937    ec->rc++;
3938 }
3939 
3940 
3941 //////////// BEGIN RCEC pool allocator
3942 static PoolAlloc* rcec_pool_allocator;
3943 
alloc_RCEC(void)3944 static RCEC* alloc_RCEC ( void ) {
3945    return VG_(allocEltPA) ( rcec_pool_allocator );
3946 }
3947 
free_RCEC(RCEC * rcec)3948 static void free_RCEC ( RCEC* rcec ) {
3949    tl_assert(rcec->magic == RCEC_MAGIC);
3950    VG_(freeEltPA)( rcec_pool_allocator, rcec );
3951 }
3952 //////////// END RCEC pool allocator
3953 
3954 
3955 /* Find 'ec' in the RCEC list whose head pointer lives at 'headp' and
3956    move it one step closer the the front of the list, so as to make
3957    subsequent searches for it cheaper. */
move_RCEC_one_step_forward(RCEC ** headp,RCEC * ec)3958 static void move_RCEC_one_step_forward ( RCEC** headp, RCEC* ec )
3959 {
3960    RCEC *ec0, *ec1, *ec2;
3961    if (ec == *headp)
3962       tl_assert(0); /* already at head of list */
3963    tl_assert(ec != NULL);
3964    ec0 = *headp;
3965    ec1 = NULL;
3966    ec2 = NULL;
3967    while (True) {
3968       if (ec0 == NULL || ec0 == ec) break;
3969       ec2 = ec1;
3970       ec1 = ec0;
3971       ec0 = ec0->next;
3972    }
3973    tl_assert(ec0 == ec);
3974    if (ec0 != NULL && ec1 != NULL && ec2 != NULL) {
3975       RCEC* tmp;
3976       /* ec0 points to ec, ec1 to its predecessor, and ec2 to ec1's
3977          predecessor.  Swap ec0 and ec1, that is, move ec0 one step
3978          closer to the start of the list. */
3979       tl_assert(ec2->next == ec1);
3980       tl_assert(ec1->next == ec0);
3981       tmp = ec0->next;
3982       ec2->next = ec0;
3983       ec0->next = ec1;
3984       ec1->next = tmp;
3985    }
3986    else
3987    if (ec0 != NULL && ec1 != NULL && ec2 == NULL) {
3988       /* it's second in the list. */
3989       tl_assert(*headp == ec1);
3990       tl_assert(ec1->next == ec0);
3991       ec1->next = ec0->next;
3992       ec0->next = ec1;
3993       *headp = ec0;
3994    }
3995 }
3996 
3997 
3998 /* Find the given RCEC in the tree, and return a pointer to it.  Or,
3999    if not present, add the given one to the tree (by making a copy of
4000    it, so the caller can immediately deallocate the original) and
4001    return a pointer to the copy.  The caller can safely have 'example'
4002    on its stack, since we will always return a pointer to a copy of
4003    it, not to the original.  Note that the inserted node will have .rc
4004    of zero and so the caller must immediatly increment it. */
4005 __attribute__((noinline))
ctxt__find_or_add(RCEC * example)4006 static RCEC* ctxt__find_or_add ( RCEC* example )
4007 {
4008    UWord hent;
4009    RCEC* copy;
4010    tl_assert(example && example->magic == RCEC_MAGIC);
4011    tl_assert(example->rc == 0);
4012 
4013    /* Search the hash table to see if we already have it. */
4014    stats__ctxt_tab_qs++;
4015    hent = example->frames_hash % N_RCEC_TAB;
4016    copy = contextTab[hent];
4017    while (1) {
4018       if (!copy) break;
4019       tl_assert(copy->magic == RCEC_MAGIC);
4020       stats__ctxt_tab_cmps++;
4021       if (0 == RCEC__cmp_by_frames(copy, example)) break;
4022       copy = copy->next;
4023    }
4024 
4025    if (copy) {
4026       tl_assert(copy != example);
4027       /* optimisation: if it's not at the head of its list, move 1
4028          step fwds, to make future searches cheaper */
4029       if (copy != contextTab[hent]) {
4030          move_RCEC_one_step_forward( &contextTab[hent], copy );
4031       }
4032    } else {
4033       copy = alloc_RCEC();
4034       tl_assert(copy != example);
4035       *copy = *example;
4036       copy->next = contextTab[hent];
4037       contextTab[hent] = copy;
4038       stats__ctxt_tab_curr++;
4039       if (stats__ctxt_tab_curr > stats__ctxt_tab_max)
4040          stats__ctxt_tab_max = stats__ctxt_tab_curr;
4041    }
4042    return copy;
4043 }
4044 
ROLW(UWord w,Int n)4045 static inline UWord ROLW ( UWord w, Int n )
4046 {
4047    Int bpw = 8 * sizeof(UWord);
4048    w = (w << n) | (w >> (bpw-n));
4049    return w;
4050 }
4051 
4052 __attribute__((noinline))
get_RCEC(Thr * thr)4053 static RCEC* get_RCEC ( Thr* thr )
4054 {
4055    UWord hash, i;
4056    RCEC  example;
4057    example.magic = RCEC_MAGIC;
4058    example.rc = 0;
4059    example.rcX = 0;
4060    main_get_stacktrace( thr, &example.frames[0], N_FRAMES );
4061    hash = 0;
4062    for (i = 0; i < N_FRAMES; i++) {
4063       hash ^= example.frames[i];
4064       hash = ROLW(hash, 19);
4065    }
4066    example.frames_hash = hash;
4067    return ctxt__find_or_add( &example );
4068 }
4069 
4070 ///////////////////////////////////////////////////////
4071 //// Part (2):
4072 ///  A SparseWA guest-addr -> OldRef, that refers to (1)
4073 ///
4074 
4075 // (UInt) `echo "Old Reference Information" | md5sum`
4076 #define OldRef_MAGIC 0x30b1f075UL
4077 
4078 /* Records an access: a thread, a context (size & writeness) and the
4079    number of held locks. The size (1,2,4,8) is encoded as 00 = 1, 01 =
4080    2, 10 = 4, 11 = 8.
4081 */
4082 typedef
4083    struct {
4084       RCEC*     rcec;
4085       WordSetID locksHeldW;
4086       UInt      thrid  : SCALARTS_N_THRBITS;
4087       UInt      szLg2B : 2;
4088       UInt      isW    : 1;
4089    }
4090    Thr_n_RCEC;
4091 
4092 #define N_OLDREF_ACCS 5
4093 
4094 typedef
4095    struct {
4096       UWord magic;  /* sanity check only */
4097       UWord gen;    /* when most recently accessed */
4098                     /* or free list when not in use */
4099       /* unused slots in this array have .thrid == 0, which is invalid */
4100       Thr_n_RCEC accs[N_OLDREF_ACCS];
4101    }
4102    OldRef;
4103 
4104 
4105 //////////// BEGIN OldRef pool allocator
4106 static PoolAlloc* oldref_pool_allocator;
4107 
alloc_OldRef(void)4108 static OldRef* alloc_OldRef ( void ) {
4109    return VG_(allocEltPA) ( oldref_pool_allocator );
4110 }
4111 
free_OldRef(OldRef * r)4112 static void free_OldRef ( OldRef* r ) {
4113    tl_assert(r->magic == OldRef_MAGIC);
4114    VG_(freeEltPA)( oldref_pool_allocator, r );
4115 }
4116 //////////// END OldRef pool allocator
4117 
4118 
4119 static SparseWA* oldrefTree     = NULL; /* SparseWA* OldRef* */
4120 static UWord     oldrefGen      = 0;    /* current LRU generation # */
4121 static UWord     oldrefTreeN    = 0;    /* # elems in oldrefTree */
4122 static UWord     oldrefGenIncAt = 0;    /* inc gen # when size hits this */
4123 
min_UInt(UInt a,UInt b)4124 inline static UInt min_UInt ( UInt a, UInt b ) {
4125    return a < b ? a : b;
4126 }
4127 
4128 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2).  Return -1 if the
4129    first interval is lower, 1 if the first interval is higher, and 0
4130    if there is any overlap.  Redundant paranoia with casting is there
4131    following what looked distinctly like a bug in gcc-4.1.2, in which
4132    some of the comparisons were done signedly instead of
4133    unsignedly. */
4134 /* Copied from exp-ptrcheck/sg_main.c */
cmp_nonempty_intervals(Addr a1,SizeT n1,Addr a2,SizeT n2)4135 static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
4136                                      Addr a2, SizeT n2 ) {
4137    UWord a1w = (UWord)a1;
4138    UWord n1w = (UWord)n1;
4139    UWord a2w = (UWord)a2;
4140    UWord n2w = (UWord)n2;
4141    tl_assert(n1w > 0 && n2w > 0);
4142    if (a1w + n1w <= a2w) return -1L;
4143    if (a2w + n2w <= a1w) return 1L;
4144    return 0;
4145 }
4146 
event_map_bind(Addr a,SizeT szB,Bool isW,Thr * thr)4147 static void event_map_bind ( Addr a, SizeT szB, Bool isW, Thr* thr )
4148 {
4149    OldRef* ref;
4150    RCEC*   rcec;
4151    Word    i, j;
4152    UWord   keyW, valW;
4153    Bool    b;
4154 
4155    tl_assert(thr);
4156    ThrID thrid = thr->thrid;
4157    tl_assert(thrid != 0); /* zero is used to denote an empty slot. */
4158 
4159    WordSetID locksHeldW = thr->hgthread->locksetW;
4160 
4161    rcec = get_RCEC( thr );
4162    ctxt__rcinc(rcec);
4163 
4164    UInt szLg2B = 0;
4165    switch (szB) {
4166       /* This doesn't look particularly branch-predictor friendly. */
4167       case 1:  szLg2B = 0; break;
4168       case 2:  szLg2B = 1; break;
4169       case 4:  szLg2B = 2; break;
4170       case 8:  szLg2B = 3; break;
4171       default: tl_assert(0);
4172    }
4173 
4174    /* Look in the map to see if we already have a record for this
4175       address. */
4176    b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, a );
4177 
4178    if (b) {
4179 
4180       /* We already have a record for this address.  We now need to
4181          see if we have a stack trace pertaining to this (thrid, R/W,
4182          size) triple. */
4183       tl_assert(keyW == a);
4184       ref = (OldRef*)valW;
4185       tl_assert(ref->magic == OldRef_MAGIC);
4186 
4187       for (i = 0; i < N_OLDREF_ACCS; i++) {
4188          if (ref->accs[i].thrid != thrid)
4189             continue;
4190          if (ref->accs[i].szLg2B != szLg2B)
4191             continue;
4192          if (ref->accs[i].isW != (UInt)(isW & 1))
4193             continue;
4194          /* else we have a match, so stop looking. */
4195          break;
4196       }
4197 
4198       if (i < N_OLDREF_ACCS) {
4199          /* thread 'thr' has an entry at index 'i'.  Update its RCEC. */
4200          if (i > 0) {
4201             Thr_n_RCEC tmp = ref->accs[i-1];
4202             ref->accs[i-1] = ref->accs[i];
4203             ref->accs[i] = tmp;
4204             i--;
4205          }
4206          if (rcec == ref->accs[i].rcec) stats__ctxt_rcdec1_eq++;
4207          stats__ctxt_rcdec1++;
4208          ctxt__rcdec( ref->accs[i].rcec );
4209          tl_assert(ref->accs[i].thrid == thrid);
4210          /* Update the RCEC and the W-held lockset. */
4211          ref->accs[i].rcec       = rcec;
4212          ref->accs[i].locksHeldW = locksHeldW;
4213       } else {
4214          /* No entry for this (thread, R/W, size, nWHeld) quad.
4215             Shuffle all of them down one slot, and put the new entry
4216             at the start of the array. */
4217          if (ref->accs[N_OLDREF_ACCS-1].thrid != 0) {
4218             /* the last slot is in use.  We must dec the rc on the
4219                associated rcec. */
4220             tl_assert(ref->accs[N_OLDREF_ACCS-1].rcec);
4221             stats__ctxt_rcdec2++;
4222             if (0 && 0 == (stats__ctxt_rcdec2 & 0xFFF))
4223                VG_(printf)("QQQQ %lu overflows\n",stats__ctxt_rcdec2);
4224             ctxt__rcdec( ref->accs[N_OLDREF_ACCS-1].rcec );
4225          } else {
4226             tl_assert(!ref->accs[N_OLDREF_ACCS-1].rcec);
4227          }
4228          for (j = N_OLDREF_ACCS-1; j >= 1; j--)
4229             ref->accs[j] = ref->accs[j-1];
4230          ref->accs[0].thrid      = thrid;
4231          ref->accs[0].szLg2B     = szLg2B;
4232          ref->accs[0].isW        = (UInt)(isW & 1);
4233          ref->accs[0].locksHeldW = locksHeldW;
4234          ref->accs[0].rcec       = rcec;
4235          /* thrid==0 is used to signify an empty slot, so we can't
4236             add zero thrid (such a ThrID is invalid anyway). */
4237          /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
4238       }
4239 
4240       ref->gen = oldrefGen;
4241 
4242    } else {
4243 
4244       /* We don't have a record for this address.  Create a new one. */
4245       if (oldrefTreeN >= oldrefGenIncAt) {
4246          oldrefGen++;
4247          oldrefGenIncAt = oldrefTreeN + 50000;
4248          if (0) VG_(printf)("oldrefTree: new gen %lu at size %lu\n",
4249                             oldrefGen, oldrefTreeN );
4250       }
4251 
4252       ref = alloc_OldRef();
4253       ref->magic = OldRef_MAGIC;
4254       ref->gen   = oldrefGen;
4255       ref->accs[0].thrid      = thrid;
4256       ref->accs[0].szLg2B     = szLg2B;
4257       ref->accs[0].isW        = (UInt)(isW & 1);
4258       ref->accs[0].locksHeldW = locksHeldW;
4259       ref->accs[0].rcec       = rcec;
4260 
4261       /* thrid==0 is used to signify an empty slot, so we can't
4262          add zero thrid (such a ThrID is invalid anyway). */
4263       /* tl_assert(thrid != 0); */ /* There's a dominating assert above. */
4264 
4265       /* Clear out the rest of the entries */
4266       for (j = 1; j < N_OLDREF_ACCS; j++) {
4267          ref->accs[j].rcec       = NULL;
4268          ref->accs[j].thrid      = 0;
4269          ref->accs[j].szLg2B     = 0;
4270          ref->accs[j].isW        = 0;
4271          ref->accs[j].locksHeldW = 0;
4272       }
4273       VG_(addToSWA)( oldrefTree, a, (UWord)ref );
4274       oldrefTreeN++;
4275 
4276    }
4277 }
4278 
4279 
4280 /* Extract info from the conflicting-access machinery. */
libhb_event_map_lookup(ExeContext ** resEC,Thr ** resThr,SizeT * resSzB,Bool * resIsW,WordSetID * locksHeldW,Thr * thr,Addr a,SizeT szB,Bool isW)4281 Bool libhb_event_map_lookup ( /*OUT*/ExeContext** resEC,
4282                               /*OUT*/Thr**        resThr,
4283                               /*OUT*/SizeT*       resSzB,
4284                               /*OUT*/Bool*        resIsW,
4285                               /*OUT*/WordSetID*   locksHeldW,
4286                               Thr* thr, Addr a, SizeT szB, Bool isW )
4287 {
4288    Word    i, j;
4289    OldRef* ref;
4290    UWord   keyW, valW;
4291    Bool    b;
4292 
4293    ThrID     cand_thrid;
4294    RCEC*     cand_rcec;
4295    Bool      cand_isW;
4296    SizeT     cand_szB;
4297    WordSetID cand_locksHeldW;
4298    Addr      cand_a;
4299 
4300    Addr toCheck[15];
4301    Int  nToCheck = 0;
4302 
4303    tl_assert(thr);
4304    tl_assert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
4305 
4306    ThrID thrid = thr->thrid;
4307 
4308    toCheck[nToCheck++] = a;
4309    for (i = -7; i < (Word)szB; i++) {
4310       if (i != 0)
4311          toCheck[nToCheck++] = a + i;
4312    }
4313    tl_assert(nToCheck <= 15);
4314 
4315    /* Now see if we can find a suitable matching event for
4316       any of the addresses in toCheck[0 .. nToCheck-1]. */
4317    for (j = 0; j < nToCheck; j++) {
4318 
4319       cand_a = toCheck[j];
4320       //      VG_(printf)("test %ld %p\n", j, cand_a);
4321 
4322       b = VG_(lookupSWA)( oldrefTree, &keyW, &valW, cand_a );
4323       if (!b)
4324          continue;
4325 
4326       ref = (OldRef*)valW;
4327       tl_assert(keyW == cand_a);
4328       tl_assert(ref->magic == OldRef_MAGIC);
4329       tl_assert(ref->accs[0].thrid != 0); /* first slot must always be used */
4330 
4331       cand_thrid      = 0; /* invalid; see comments in event_map_bind */
4332       cand_rcec       = NULL;
4333       cand_isW        = False;
4334       cand_szB        = 0;
4335       cand_locksHeldW = 0; /* always valid; see initialise_data_structures() */
4336 
4337       for (i = 0; i < N_OLDREF_ACCS; i++) {
4338          Thr_n_RCEC* cand = &ref->accs[i];
4339          cand_rcec       = cand->rcec;
4340          cand_thrid      = cand->thrid;
4341          cand_isW        = (Bool)cand->isW;
4342          cand_szB        = 1 << cand->szLg2B;
4343          cand_locksHeldW = cand->locksHeldW;
4344 
4345          if (cand_thrid == 0)
4346             /* This slot isn't in use.  Ignore it. */
4347             continue;
4348 
4349          if (cand_thrid == thrid)
4350             /* This is an access by the same thread, but we're only
4351                interested in accesses from other threads.  Ignore. */
4352             continue;
4353 
4354          if ((!cand_isW) && (!isW))
4355             /* We don't want to report a read racing against another
4356                read; that's stupid.  So in this case move on. */
4357             continue;
4358 
4359          if (cmp_nonempty_intervals(a, szB, cand_a, cand_szB) != 0)
4360             /* No overlap with the access we're asking about.  Ignore. */
4361             continue;
4362 
4363          /* We have a match.  Stop searching. */
4364          break;
4365       }
4366 
4367       tl_assert(i >= 0 && i <= N_OLDREF_ACCS);
4368 
4369       if (i < N_OLDREF_ACCS) {
4370          Int n, maxNFrames;
4371          /* return with success */
4372          tl_assert(cand_thrid);
4373          tl_assert(cand_rcec);
4374          tl_assert(cand_rcec->magic == RCEC_MAGIC);
4375          tl_assert(cand_szB >= 1);
4376          /* Count how many non-zero frames we have. */
4377          maxNFrames = min_UInt(N_FRAMES, VG_(clo_backtrace_size));
4378          for (n = 0; n < maxNFrames; n++) {
4379             if (0 == cand_rcec->frames[n]) break;
4380          }
4381          *resEC      = VG_(make_ExeContext_from_StackTrace)
4382                           (cand_rcec->frames, n);
4383          *resThr     = Thr__from_ThrID(cand_thrid);
4384          *resSzB     = cand_szB;
4385          *resIsW     = cand_isW;
4386          *locksHeldW = cand_locksHeldW;
4387          return True;
4388       }
4389 
4390       /* consider next address in toCheck[] */
4391    } /* for (j = 0; j < nToCheck; j++) */
4392 
4393    /* really didn't find anything. */
4394    return False;
4395 }
4396 
event_map_init(void)4397 static void event_map_init ( void )
4398 {
4399    Word i;
4400 
4401    /* Context (RCEC) pool allocator */
4402    rcec_pool_allocator = VG_(newPA) (
4403                              sizeof(RCEC),
4404                              1000 /* RCECs per pool */,
4405                              HG_(zalloc),
4406                              "libhb.event_map_init.1 (RCEC pools)",
4407                              HG_(free)
4408                           );
4409 
4410    /* Context table */
4411    tl_assert(!contextTab);
4412    contextTab = HG_(zalloc)( "libhb.event_map_init.2 (context table)",
4413                              N_RCEC_TAB * sizeof(RCEC*) );
4414    tl_assert(contextTab);
4415    for (i = 0; i < N_RCEC_TAB; i++)
4416       contextTab[i] = NULL;
4417 
4418    /* Oldref pool allocator */
4419    oldref_pool_allocator = VG_(newPA)(
4420                                sizeof(OldRef),
4421                                1000 /* OldRefs per pool */,
4422                                HG_(zalloc),
4423                                "libhb.event_map_init.3 (OldRef pools)",
4424                                HG_(free)
4425                             );
4426 
4427    /* Oldref tree */
4428    tl_assert(!oldrefTree);
4429    oldrefTree = VG_(newSWA)(
4430                    HG_(zalloc),
4431                    "libhb.event_map_init.4 (oldref tree)",
4432                    HG_(free)
4433                 );
4434    tl_assert(oldrefTree);
4435 
4436    oldrefGen = 0;
4437    oldrefGenIncAt = 0;
4438    oldrefTreeN = 0;
4439 }
4440 
event_map__check_reference_counts(Bool before)4441 static void event_map__check_reference_counts ( Bool before )
4442 {
4443    RCEC*   rcec;
4444    OldRef* oldref;
4445    Word    i;
4446    UWord   nEnts = 0;
4447    UWord   keyW, valW;
4448 
4449    /* Set the 'check' reference counts to zero.  Also, optionally
4450       check that the real reference counts are non-zero.  We allow
4451       these to fall to zero before a GC, but the GC must get rid of
4452       all those that are zero, hence none should be zero after a
4453       GC. */
4454    for (i = 0; i < N_RCEC_TAB; i++) {
4455       for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4456          nEnts++;
4457          tl_assert(rcec);
4458          tl_assert(rcec->magic == RCEC_MAGIC);
4459          if (!before)
4460             tl_assert(rcec->rc > 0);
4461          rcec->rcX = 0;
4462       }
4463    }
4464 
4465    /* check that the stats are sane */
4466    tl_assert(nEnts == stats__ctxt_tab_curr);
4467    tl_assert(stats__ctxt_tab_curr <= stats__ctxt_tab_max);
4468 
4469    /* visit all the referencing points, inc check ref counts */
4470    VG_(initIterSWA)( oldrefTree );
4471    while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
4472       oldref = (OldRef*)valW;
4473       tl_assert(oldref->magic == OldRef_MAGIC);
4474       for (i = 0; i < N_OLDREF_ACCS; i++) {
4475          ThrID aThrID = oldref->accs[i].thrid;
4476          RCEC* aRef   = oldref->accs[i].rcec;
4477          if (aThrID != 0) {
4478             tl_assert(aRef);
4479             tl_assert(aRef->magic == RCEC_MAGIC);
4480             aRef->rcX++;
4481          } else {
4482             tl_assert(!aRef);
4483          }
4484       }
4485    }
4486 
4487    /* compare check ref counts with actual */
4488    for (i = 0; i < N_RCEC_TAB; i++) {
4489       for (rcec = contextTab[i]; rcec; rcec = rcec->next) {
4490          tl_assert(rcec->rc == rcec->rcX);
4491       }
4492    }
4493 }
4494 
4495 __attribute__((noinline))
event_map_maybe_GC(void)4496 static void event_map_maybe_GC ( void )
4497 {
4498    OldRef* oldref;
4499    UWord   keyW, valW, retained, maxGen;
4500    XArray* refs2del;
4501    Word    i, j, n2del;
4502 
4503    UWord* genMap      = NULL;
4504    UWord  genMap_min  = 0;
4505    UWord  genMap_size = 0;
4506 
4507    if (LIKELY(oldrefTreeN < HG_(clo_conflict_cache_size)))
4508       return;
4509 
4510    if (0)
4511       VG_(printf)("libhb: event_map GC at size %lu\n", oldrefTreeN);
4512 
4513    /* Check for sane command line params.  Limit values must match
4514       those in hg_process_cmd_line_option. */
4515    tl_assert( HG_(clo_conflict_cache_size) >= 10*1000 );
4516    tl_assert( HG_(clo_conflict_cache_size) <= 30*1000*1000 );
4517 
4518    /* Check our counting is sane (expensive) */
4519    if (CHECK_CEM)
4520       tl_assert(oldrefTreeN == VG_(sizeSWA)( oldrefTree ));
4521 
4522    /* Check the reference counts (expensive) */
4523    if (CHECK_CEM)
4524       event_map__check_reference_counts( True/*before*/ );
4525 
4526    /* Compute the distribution of generation values in the ref tree.
4527       There are likely only to be a few different generation numbers
4528       in the whole tree, but we don't know what they are.  Hence use a
4529       dynamically resized array of counters.  The array is genMap[0
4530       .. genMap_size-1], where genMap[0] is the count for the
4531       generation number genMap_min, genMap[1] is the count for
4532       genMap_min+1, etc.  If a new number is seen outside the range
4533       [genMap_min .. genMap_min + genMap_size - 1] then the array is
4534       copied into a larger array, and genMap_min and genMap_size are
4535       adjusted accordingly. */
4536 
4537    /* genMap :: generation-number -> count-of-nodes-with-that-number */
4538 
4539    VG_(initIterSWA)( oldrefTree );
4540    while ( VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
4541 
4542        UWord ea, key;
4543        oldref = (OldRef*)valW;
4544        key = oldref->gen;
4545 
4546       /* BEGIN find 'ea', which is the index in genMap holding the
4547          count for generation number 'key'. */
4548       if (UNLIKELY(genMap == NULL)) {
4549          /* deal with the first key to be seen, so that the following
4550             cases don't need to handle the complexity of a NULL count
4551             array. */
4552          genMap_min  = key;
4553          genMap_size = 1;
4554          genMap = HG_(zalloc)( "libhb.emmG.1a",
4555                                 genMap_size * sizeof(UWord) );
4556          ea = 0;
4557          if (0) VG_(printf)("(%lu) case 1 [%lu .. %lu]\n",
4558                             key, genMap_min, genMap_min+genMap_size- 1 );
4559       }
4560       else
4561       if (LIKELY(key >= genMap_min && key < genMap_min + genMap_size)) {
4562          /* this is the expected (almost-always-happens) case: 'key'
4563             is already mapped in the array. */
4564          ea = key - genMap_min;
4565       }
4566       else
4567       if (key < genMap_min) {
4568          /* 'key' appears before the start of the current array.
4569             Extend the current array by allocating a larger one and
4570             copying the current one to the upper end of it. */
4571          Word   more;
4572          UWord* map2;
4573          more = genMap_min - key;
4574          tl_assert(more > 0);
4575          map2 = HG_(zalloc)( "libhb.emmG.1b",
4576                              (genMap_size + more) * sizeof(UWord) );
4577          VG_(memcpy)( &map2[more], genMap, genMap_size * sizeof(UWord) );
4578          HG_(free)( genMap );
4579          genMap = map2;
4580          genMap_size += more;
4581          genMap_min -= more;
4582          ea = 0;
4583          tl_assert(genMap_min == key);
4584          if (0) VG_(printf)("(%lu) case 2 [%lu .. %lu]\n",
4585                             key, genMap_min,  genMap_min+genMap_size- 1 );
4586       }
4587       else {
4588          /* 'key' appears after the end of the current array.  Extend
4589             the current array by allocating a larger one and copying
4590             the current one to the lower end of it. */
4591          Word   more;
4592          UWord* map2;
4593          tl_assert(key >= genMap_min + genMap_size);
4594          more = key - (genMap_min + genMap_size) + 1;
4595          tl_assert(more > 0);
4596          map2 = HG_(zalloc)( "libhb.emmG.1c",
4597                              (genMap_size + more) * sizeof(UWord) );
4598          VG_(memcpy)( &map2[0], genMap, genMap_size * sizeof(UWord) );
4599          HG_(free)( genMap );
4600          genMap = map2;
4601          genMap_size += more;
4602          ea = genMap_size - 1;;
4603          tl_assert(genMap_min + genMap_size - 1 == key);
4604          if (0) VG_(printf)("(%lu) case 3 [%lu .. %lu]\n",
4605                             key, genMap_min, genMap_min+genMap_size- 1 );
4606       }
4607       /* END find 'ea' from 'key' */
4608 
4609       tl_assert(ea >= 0 && ea < genMap_size);
4610       /* and the whole point of this elaborate computation of 'ea' is .. */
4611       genMap[ea]++;
4612    }
4613 
4614    tl_assert(genMap);
4615    tl_assert(genMap_size > 0);
4616 
4617    /* Sanity check what we just computed */
4618    { UWord sum = 0;
4619      for (i = 0; i < genMap_size; i++) {
4620         if (0) VG_(printf)("  xxx: gen %ld has %lu\n",
4621                            i + genMap_min, genMap[i] );
4622         sum += genMap[i];
4623      }
4624      tl_assert(sum == oldrefTreeN);
4625    }
4626 
4627    /* Figure out how many generations to throw away */
4628    retained = oldrefTreeN;
4629    maxGen = 0;
4630 
4631    for (i = 0; i < genMap_size; i++) {
4632       keyW = i + genMap_min;
4633       valW = genMap[i];
4634       tl_assert(keyW > 0); /* can't allow a generation # 0 */
4635       if (0) VG_(printf)("  XXX: gen %lu has %lu\n", keyW, valW );
4636       tl_assert(keyW >= maxGen);
4637       tl_assert(retained >= valW);
4638       if (retained - valW
4639           > (UWord)(HG_(clo_conflict_cache_size)
4640                     * EVENT_MAP_GC_DISCARD_FRACTION)) {
4641          retained -= valW;
4642          maxGen = keyW;
4643       } else {
4644          break;
4645       }
4646    }
4647 
4648    HG_(free)(genMap);
4649 
4650    tl_assert(retained >= 0 && retained <= oldrefTreeN);
4651 
4652    /* Now make up a big list of the oldrefTree entries we want to
4653       delete.  We can't simultaneously traverse the tree and delete
4654       stuff from it, so first we need to copy them off somewhere
4655       else. (sigh) */
4656    refs2del = VG_(newXA)( HG_(zalloc), "libhb.emmG.2",
4657                           HG_(free), sizeof(Addr) );
4658 
4659    if (retained < oldrefTreeN) {
4660 
4661       /* This is the normal (expected) case.  We discard any ref whose
4662          generation number <= maxGen. */
4663       VG_(initIterSWA)( oldrefTree );
4664       while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
4665          oldref = (OldRef*)valW;
4666          tl_assert(oldref->magic == OldRef_MAGIC);
4667          if (oldref->gen <= maxGen) {
4668             VG_(addToXA)( refs2del, &keyW );
4669          }
4670       }
4671       if (VG_(clo_stats)) {
4672          VG_(message)(Vg_DebugMsg,
4673             "libhb: EvM GC: delete generations %lu and below, "
4674             "retaining %lu entries\n",
4675             maxGen, retained );
4676       }
4677 
4678    } else {
4679 
4680       static UInt rand_seed = 0; /* leave as static */
4681 
4682       /* Degenerate case: there's only one generation in the entire
4683          tree, so we need to have some other way of deciding which
4684          refs to throw away.  Just throw out half of them randomly. */
4685       tl_assert(retained == oldrefTreeN);
4686       VG_(initIterSWA)( oldrefTree );
4687       while (VG_(nextIterSWA)( oldrefTree, &keyW, &valW )) {
4688          UInt n;
4689          oldref = (OldRef*)valW;
4690          tl_assert(oldref->magic == OldRef_MAGIC);
4691          n = VG_(random)( &rand_seed );
4692          if ((n & 0xFFF) < 0x800) {
4693             VG_(addToXA)( refs2del, &keyW );
4694             retained--;
4695          }
4696       }
4697       if (VG_(clo_stats)) {
4698          VG_(message)(Vg_DebugMsg,
4699             "libhb: EvM GC: randomly delete half the entries, "
4700             "retaining %lu entries\n",
4701             retained );
4702       }
4703 
4704    }
4705 
4706    n2del = VG_(sizeXA)( refs2del );
4707    tl_assert(n2del == (Word)(oldrefTreeN - retained));
4708 
4709    if (0) VG_(printf)("%s","deleting entries\n");
4710    for (i = 0; i < n2del; i++) {
4711       Bool  b;
4712       Addr  ga2del = *(Addr*)VG_(indexXA)( refs2del, i );
4713       b = VG_(delFromSWA)( oldrefTree, &keyW, &valW, ga2del );
4714       tl_assert(b);
4715       tl_assert(keyW == ga2del);
4716       oldref = (OldRef*)valW;
4717       for (j = 0; j < N_OLDREF_ACCS; j++) {
4718          ThrID aThrID = oldref->accs[j].thrid;
4719          RCEC* aRef   = oldref->accs[j].rcec;
4720          if (aRef) {
4721             tl_assert(aThrID != 0);
4722             stats__ctxt_rcdec3++;
4723             ctxt__rcdec( aRef );
4724          } else {
4725             tl_assert(aThrID == 0);
4726          }
4727       }
4728 
4729       free_OldRef( oldref );
4730    }
4731 
4732    VG_(deleteXA)( refs2del );
4733 
4734    tl_assert( VG_(sizeSWA)( oldrefTree ) == retained );
4735 
4736    oldrefTreeN = retained;
4737    oldrefGenIncAt = oldrefTreeN; /* start new gen right away */
4738 
4739    /* Throw away all RCECs with zero reference counts */
4740    for (i = 0; i < N_RCEC_TAB; i++) {
4741       RCEC** pp = &contextTab[i];
4742       RCEC*  p  = *pp;
4743       while (p) {
4744          if (p->rc == 0) {
4745             *pp = p->next;
4746             free_RCEC(p);
4747             p = *pp;
4748             tl_assert(stats__ctxt_tab_curr > 0);
4749             stats__ctxt_tab_curr--;
4750          } else {
4751             pp = &p->next;
4752             p = p->next;
4753          }
4754       }
4755    }
4756 
4757    /* Check the reference counts (expensive) */
4758    if (CHECK_CEM)
4759       event_map__check_reference_counts( False/*after*/ );
4760 
4761    //if (0)
4762    //VG_(printf)("XXXX final sizes: oldrefTree %ld, contextTree %ld\n\n",
4763    //            VG_(OSetGen_Size)(oldrefTree), VG_(OSetGen_Size)(contextTree));
4764 
4765 }
4766 
4767 
4768 /////////////////////////////////////////////////////////
4769 //                                                     //
4770 // Core MSM                                            //
4771 //                                                     //
4772 /////////////////////////////////////////////////////////
4773 
4774 /* Logic in msmcread/msmcwrite updated/verified after re-analysis, 19
4775    Nov 08, and again after [...],
4776    June 09. */
4777 
4778 static ULong stats__msmcread         = 0;
4779 static ULong stats__msmcread_change  = 0;
4780 static ULong stats__msmcwrite        = 0;
4781 static ULong stats__msmcwrite_change = 0;
4782 
4783 /* Some notes on the H1 history mechanism:
4784 
4785    Transition rules are:
4786 
4787    read_{Kr,Kw}(Cr,Cw)  = (Cr,           Cr `join` Kw)
4788    write_{Kr,Kw}(Cr,Cw) = (Cr `join` Kw, Cr `join` Kw)
4789 
4790    After any access by a thread T to a location L, L's constraint pair
4791    (Cr,Cw) has Cw[T] == T's Kw[T], that is, == T's scalar W-clock.
4792 
4793    After a race by thread T conflicting with some previous access by
4794    some other thread U, for a location with constraint (before
4795    processing the later access) (Cr,Cw), then Cw[U] is the segment in
4796    which the previously access lies.
4797 
4798    Hence in record_race_info, we pass in Cfailed and Kfailed, which
4799    are compared so as to find out which thread(s) this access
4800    conflicts with.  Once that is established, we also require the
4801    pre-update Cw for the location, so we can index into it for those
4802    threads, to get the scalar clock values for the point at which the
4803    former accesses were made.  (In fact we only bother to do any of
4804    this for an arbitrarily chosen one of the conflicting threads, as
4805    that's simpler, it avoids flooding the user with vast amounts of
4806    mostly useless information, and because the program is wrong if it
4807    contains any races at all -- so we don't really need to show all
4808    conflicting access pairs initially, so long as we only show none if
4809    none exist).
4810 
4811    ---
4812 
4813    That requires the auxiliary proof that
4814 
4815       (Cr `join` Kw)[T] == Kw[T]
4816 
4817    Why should that be true?  Because for any thread T, Kw[T] >= the
4818    scalar clock value for T known by any other thread.  In other
4819    words, because T's value for its own scalar clock is at least as up
4820    to date as the value for it known by any other thread (that is true
4821    for both the R- and W- scalar clocks).  Hence no other thread will
4822    be able to feed in a value for that element (indirectly via a
4823    constraint) which will exceed Kw[T], and hence the join cannot
4824    cause that particular element to advance.
4825 */
4826 
4827 __attribute__((noinline))
record_race_info(Thr * acc_thr,Addr acc_addr,SizeT szB,Bool isWrite,VtsID Cfailed,VtsID Kfailed,VtsID Cw)4828 static void record_race_info ( Thr* acc_thr,
4829                                Addr acc_addr, SizeT szB, Bool isWrite,
4830                                VtsID Cfailed,
4831                                VtsID Kfailed,
4832                                VtsID Cw )
4833 {
4834    /* Call here to report a race.  We just hand it onwards to
4835       HG_(record_error_Race).  If that in turn discovers that the
4836       error is going to be collected, then, at history_level 2, that
4837       queries the conflicting-event map.  The alternative would be to
4838       query it right here.  But that causes a lot of pointless queries
4839       for errors which will shortly be discarded as duplicates, and
4840       can become a performance overhead; so we defer the query until
4841       we know the error is not a duplicate. */
4842 
4843    /* Stacks for the bounds of the (or one of the) conflicting
4844       segment(s).  These are only set at history_level 1. */
4845    ExeContext* hist1_seg_start = NULL;
4846    ExeContext* hist1_seg_end   = NULL;
4847    Thread*     hist1_conf_thr  = NULL;
4848 
4849    tl_assert(acc_thr);
4850    tl_assert(acc_thr->hgthread);
4851    tl_assert(acc_thr->hgthread->hbthr == acc_thr);
4852    tl_assert(HG_(clo_history_level) >= 0 && HG_(clo_history_level) <= 2);
4853 
4854    if (HG_(clo_history_level) == 1) {
4855       Bool found;
4856       Word firstIx, lastIx;
4857       ULong_n_EC key;
4858 
4859       /* At history_level 1, we must round up the relevant stack-pair
4860          for the conflicting segment right now.  This is because
4861          deferring it is complex; we can't (easily) put Kfailed and
4862          Cfailed into the XError and wait for later without
4863          getting tied up in difficulties with VtsID reference
4864          counting.  So just do it now. */
4865       Thr*  confThr;
4866       ULong confTym = 0;
4867       /* Which thread are we in conflict with?  There may be more than
4868          one, in which case VtsID__findFirst_notLEQ selects one arbitrarily
4869          (in fact it's the one with the lowest Thr* value). */
4870       confThr = VtsID__findFirst_notLEQ( Cfailed, Kfailed );
4871       /* This must exist!  since if it was NULL then there's no
4872          conflict (semantics of return value of
4873          VtsID__findFirst_notLEQ), and msmc{read,write}, which has
4874          called us, just checked exactly this -- that there was in
4875          fact a race. */
4876       tl_assert(confThr);
4877 
4878       /* Get the scalar clock value that the conflicting thread
4879          introduced into the constraint.  A careful examination of the
4880          base machine rules shows that this must be the same as the
4881          conflicting thread's scalar clock when it created this
4882          constraint.  Hence we know the scalar clock of the
4883          conflicting thread when the conflicting access was made. */
4884       confTym = VtsID__indexAt( Cfailed, confThr );
4885 
4886       /* Using this scalar clock, index into the conflicting thread's
4887          collection of stack traces made each time its vector clock
4888          (hence its scalar clock) changed.  This gives the stack
4889          traces at the start and end of the conflicting segment (well,
4890          as per comment just above, of one of the conflicting
4891          segments, if there are more than one). */
4892       key.ull = confTym;
4893       key.ec  = NULL;
4894       /* tl_assert(confThr); -- asserted just above */
4895       tl_assert(confThr->local_Kws_n_stacks);
4896       firstIx = lastIx = 0;
4897       found = VG_(lookupXA_UNSAFE)(
4898                  confThr->local_Kws_n_stacks,
4899                  &key, &firstIx, &lastIx,
4900                  (Int(*)(void*,void*))cmp__ULong_n_EC__by_ULong
4901               );
4902       if (0) VG_(printf)("record_race_info %u %u %u  confThr %p "
4903                          "confTym %llu found %d (%lu,%lu)\n",
4904                          Cfailed, Kfailed, Cw,
4905                          confThr, confTym, found, firstIx, lastIx);
4906       /* We can't indefinitely collect stack traces at VTS
4907          transitions, since we'd eventually run out of memory.  Hence
4908          note_local_Kw_n_stack_for will eventually throw away old
4909          ones, which in turn means we might fail to find index value
4910          confTym in the array. */
4911       if (found) {
4912          ULong_n_EC *pair_start, *pair_end;
4913          pair_start
4914             = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks, lastIx );
4915          hist1_seg_start = pair_start->ec;
4916          if (lastIx+1 < VG_(sizeXA)( confThr->local_Kws_n_stacks )) {
4917             pair_end
4918                = (ULong_n_EC*)VG_(indexXA)( confThr->local_Kws_n_stacks,
4919                                             lastIx+1 );
4920             /* from properties of VG_(lookupXA) and the comparison fn used: */
4921             tl_assert(pair_start->ull < pair_end->ull);
4922             hist1_seg_end = pair_end->ec;
4923             /* Could do a bit better here.  It may be that pair_end
4924                doesn't have a stack, but the following entries in the
4925                array have the same scalar Kw and to have a stack.  So
4926                we should search a bit further along the array than
4927                lastIx+1 if hist1_seg_end is NULL. */
4928          } else {
4929             if (!confThr->llexit_done)
4930                hist1_seg_end = main_get_EC( confThr );
4931          }
4932          // seg_start could be NULL iff this is the first stack in the thread
4933          //if (seg_start) VG_(pp_ExeContext)(seg_start);
4934          //if (seg_end)   VG_(pp_ExeContext)(seg_end);
4935          hist1_conf_thr = confThr->hgthread;
4936       }
4937    }
4938 
4939    HG_(record_error_Race)( acc_thr->hgthread, acc_addr,
4940                            szB, isWrite,
4941                            hist1_conf_thr, hist1_seg_start, hist1_seg_end );
4942 }
4943 
is_sane_SVal_C(SVal sv)4944 static Bool is_sane_SVal_C ( SVal sv ) {
4945    Bool leq;
4946    if (!SVal__isC(sv)) return True;
4947    leq = VtsID__cmpLEQ( SVal__unC_Rmin(sv), SVal__unC_Wmin(sv) );
4948    return leq;
4949 }
4950 
4951 
4952 /* Compute new state following a read */
msmcread(SVal svOld,Thr * acc_thr,Addr acc_addr,SizeT szB)4953 static inline SVal msmcread ( SVal svOld,
4954                               /* The following are only needed for
4955                                  creating error reports. */
4956                               Thr* acc_thr,
4957                               Addr acc_addr, SizeT szB )
4958 {
4959    SVal svNew = SVal_INVALID;
4960    stats__msmcread++;
4961 
4962    /* Redundant sanity check on the constraints */
4963    if (CHECK_MSM) {
4964       tl_assert(is_sane_SVal_C(svOld));
4965    }
4966 
4967    if (LIKELY(SVal__isC(svOld))) {
4968       VtsID tviR  = acc_thr->viR;
4969       VtsID tviW  = acc_thr->viW;
4970       VtsID rmini = SVal__unC_Rmin(svOld);
4971       VtsID wmini = SVal__unC_Wmin(svOld);
4972       Bool  leq   = VtsID__cmpLEQ(rmini,tviR);
4973       if (LIKELY(leq)) {
4974          /* no race */
4975          /* Note: RWLOCK subtlety: use tviW, not tviR */
4976          svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4977          goto out;
4978       } else {
4979          /* assert on sanity of constraints. */
4980          Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
4981          tl_assert(leqxx);
4982          // same as in non-race case
4983          svNew = SVal__mkC( rmini, VtsID__join2(wmini, tviW) );
4984          record_race_info( acc_thr, acc_addr, szB, False/*!isWrite*/,
4985                            rmini, /* Cfailed */
4986                            tviR,  /* Kfailed */
4987                            wmini  /* Cw */ );
4988          goto out;
4989       }
4990    }
4991    if (SVal__isA(svOld)) {
4992       /* reading no-access memory (sigh); leave unchanged */
4993       /* check for no pollution */
4994       tl_assert(svOld == SVal_NOACCESS);
4995       svNew = SVal_NOACCESS;
4996       goto out;
4997    }
4998    if (0) VG_(printf)("msmcread: bad svOld: 0x%016llx\n", svOld);
4999    tl_assert(0);
5000 
5001   out:
5002    if (CHECK_MSM) {
5003       tl_assert(is_sane_SVal_C(svNew));
5004    }
5005    if (UNLIKELY(svNew != svOld)) {
5006       tl_assert(svNew != SVal_INVALID);
5007       if (HG_(clo_history_level) >= 2
5008           && SVal__isC(svOld) && SVal__isC(svNew)) {
5009          event_map_bind( acc_addr, szB, False/*!isWrite*/, acc_thr );
5010          stats__msmcread_change++;
5011       }
5012    }
5013    return svNew;
5014 }
5015 
5016 
5017 /* Compute new state following a write */
msmcwrite(SVal svOld,Thr * acc_thr,Addr acc_addr,SizeT szB)5018 static inline SVal msmcwrite ( SVal svOld,
5019                               /* The following are only needed for
5020                                  creating error reports. */
5021                               Thr* acc_thr,
5022                               Addr acc_addr, SizeT szB )
5023 {
5024    SVal svNew = SVal_INVALID;
5025    stats__msmcwrite++;
5026 
5027    /* Redundant sanity check on the constraints */
5028    if (CHECK_MSM) {
5029       tl_assert(is_sane_SVal_C(svOld));
5030    }
5031 
5032    if (LIKELY(SVal__isC(svOld))) {
5033       VtsID tviW  = acc_thr->viW;
5034       VtsID wmini = SVal__unC_Wmin(svOld);
5035       Bool  leq   = VtsID__cmpLEQ(wmini,tviW);
5036       if (LIKELY(leq)) {
5037          /* no race */
5038          svNew = SVal__mkC( tviW, tviW );
5039          goto out;
5040       } else {
5041          VtsID rmini = SVal__unC_Rmin(svOld);
5042          /* assert on sanity of constraints. */
5043          Bool leqxx = VtsID__cmpLEQ(rmini,wmini);
5044          tl_assert(leqxx);
5045          // same as in non-race case
5046          // proof: in the non-race case, we have
5047          //    rmini <= wmini (invar on constraints)
5048          //    tviW <= tviR (invar on thread clocks)
5049          //    wmini <= tviW (from run-time check)
5050          // hence from transitivity of <= we have
5051          //    rmini <= wmini <= tviW
5052          // and so join(rmini,tviW) == tviW
5053          // and    join(wmini,tviW) == tviW
5054          // qed.
5055          svNew = SVal__mkC( VtsID__join2(rmini, tviW),
5056                             VtsID__join2(wmini, tviW) );
5057          record_race_info( acc_thr, acc_addr, szB, True/*isWrite*/,
5058                            wmini, /* Cfailed */
5059                            tviW,  /* Kfailed */
5060                            wmini  /* Cw */ );
5061          goto out;
5062       }
5063    }
5064    if (SVal__isA(svOld)) {
5065       /* writing no-access memory (sigh); leave unchanged */
5066       /* check for no pollution */
5067       tl_assert(svOld == SVal_NOACCESS);
5068       svNew = SVal_NOACCESS;
5069       goto out;
5070    }
5071    if (0) VG_(printf)("msmcwrite: bad svOld: 0x%016llx\n", svOld);
5072    tl_assert(0);
5073 
5074   out:
5075    if (CHECK_MSM) {
5076       tl_assert(is_sane_SVal_C(svNew));
5077    }
5078    if (UNLIKELY(svNew != svOld)) {
5079       tl_assert(svNew != SVal_INVALID);
5080       if (HG_(clo_history_level) >= 2
5081           && SVal__isC(svOld) && SVal__isC(svNew)) {
5082          event_map_bind( acc_addr, szB, True/*isWrite*/, acc_thr );
5083          stats__msmcwrite_change++;
5084       }
5085    }
5086    return svNew;
5087 }
5088 
5089 
5090 /////////////////////////////////////////////////////////
5091 //                                                     //
5092 // Apply core MSM to specific memory locations         //
5093 //                                                     //
5094 /////////////////////////////////////////////////////////
5095 
5096 /*------------- ZSM accesses: 8 bit sapply ------------- */
5097 
zsm_sapply08__msmcread(Thr * thr,Addr a)5098 static void zsm_sapply08__msmcread ( Thr* thr, Addr a ) {
5099    CacheLine* cl;
5100    UWord      cloff, tno, toff;
5101    SVal       svOld, svNew;
5102    UShort     descr;
5103    stats__cline_cread08s++;
5104    cl    = get_cacheline(a);
5105    cloff = get_cacheline_offset(a);
5106    tno   = get_treeno(a);
5107    toff  = get_tree_offset(a); /* == 0 .. 7 */
5108    descr = cl->descrs[tno];
5109    if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5110       SVal* tree = &cl->svals[tno << 3];
5111       cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5112       if (CHECK_ZSM)
5113          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5114    }
5115    svOld = cl->svals[cloff];
5116    svNew = msmcread( svOld, thr,a,1 );
5117    if (CHECK_ZSM)
5118       tl_assert(svNew != SVal_INVALID);
5119    cl->svals[cloff] = svNew;
5120 }
5121 
zsm_sapply08__msmcwrite(Thr * thr,Addr a)5122 static void zsm_sapply08__msmcwrite ( Thr* thr, Addr a ) {
5123    CacheLine* cl;
5124    UWord      cloff, tno, toff;
5125    SVal       svOld, svNew;
5126    UShort     descr;
5127    stats__cline_cwrite08s++;
5128    cl    = get_cacheline(a);
5129    cloff = get_cacheline_offset(a);
5130    tno   = get_treeno(a);
5131    toff  = get_tree_offset(a); /* == 0 .. 7 */
5132    descr = cl->descrs[tno];
5133    if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5134       SVal* tree = &cl->svals[tno << 3];
5135       cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5136       if (CHECK_ZSM)
5137          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5138    }
5139    svOld = cl->svals[cloff];
5140    svNew = msmcwrite( svOld, thr,a,1 );
5141    if (CHECK_ZSM)
5142       tl_assert(svNew != SVal_INVALID);
5143    cl->svals[cloff] = svNew;
5144 }
5145 
5146 /*------------- ZSM accesses: 16 bit sapply ------------- */
5147 
zsm_sapply16__msmcread(Thr * thr,Addr a)5148 static void zsm_sapply16__msmcread ( Thr* thr, Addr a ) {
5149    CacheLine* cl;
5150    UWord      cloff, tno, toff;
5151    SVal       svOld, svNew;
5152    UShort     descr;
5153    stats__cline_cread16s++;
5154    if (UNLIKELY(!aligned16(a))) goto slowcase;
5155    cl    = get_cacheline(a);
5156    cloff = get_cacheline_offset(a);
5157    tno   = get_treeno(a);
5158    toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5159    descr = cl->descrs[tno];
5160    if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5161       if (valid_value_is_below_me_16(descr, toff)) {
5162          goto slowcase;
5163       } else {
5164          SVal* tree = &cl->svals[tno << 3];
5165          cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5166       }
5167       if (CHECK_ZSM)
5168          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5169    }
5170    svOld = cl->svals[cloff];
5171    svNew = msmcread( svOld, thr,a,2 );
5172    if (CHECK_ZSM)
5173       tl_assert(svNew != SVal_INVALID);
5174    cl->svals[cloff] = svNew;
5175    return;
5176   slowcase: /* misaligned, or must go further down the tree */
5177    stats__cline_16to8splits++;
5178    zsm_sapply08__msmcread( thr, a + 0 );
5179    zsm_sapply08__msmcread( thr, a + 1 );
5180 }
5181 
zsm_sapply16__msmcwrite(Thr * thr,Addr a)5182 static void zsm_sapply16__msmcwrite ( Thr* thr, Addr a ) {
5183    CacheLine* cl;
5184    UWord      cloff, tno, toff;
5185    SVal       svOld, svNew;
5186    UShort     descr;
5187    stats__cline_cwrite16s++;
5188    if (UNLIKELY(!aligned16(a))) goto slowcase;
5189    cl    = get_cacheline(a);
5190    cloff = get_cacheline_offset(a);
5191    tno   = get_treeno(a);
5192    toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5193    descr = cl->descrs[tno];
5194    if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5195       if (valid_value_is_below_me_16(descr, toff)) {
5196          goto slowcase;
5197       } else {
5198          SVal* tree = &cl->svals[tno << 3];
5199          cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5200       }
5201       if (CHECK_ZSM)
5202          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5203    }
5204    svOld = cl->svals[cloff];
5205    svNew = msmcwrite( svOld, thr,a,2 );
5206    if (CHECK_ZSM)
5207       tl_assert(svNew != SVal_INVALID);
5208    cl->svals[cloff] = svNew;
5209    return;
5210   slowcase: /* misaligned, or must go further down the tree */
5211    stats__cline_16to8splits++;
5212    zsm_sapply08__msmcwrite( thr, a + 0 );
5213    zsm_sapply08__msmcwrite( thr, a + 1 );
5214 }
5215 
5216 /*------------- ZSM accesses: 32 bit sapply ------------- */
5217 
zsm_sapply32__msmcread(Thr * thr,Addr a)5218 static void zsm_sapply32__msmcread ( Thr* thr, Addr a ) {
5219    CacheLine* cl;
5220    UWord      cloff, tno, toff;
5221    SVal       svOld, svNew;
5222    UShort     descr;
5223    stats__cline_cread32s++;
5224    if (UNLIKELY(!aligned32(a))) goto slowcase;
5225    cl    = get_cacheline(a);
5226    cloff = get_cacheline_offset(a);
5227    tno   = get_treeno(a);
5228    toff  = get_tree_offset(a); /* == 0 or 4 */
5229    descr = cl->descrs[tno];
5230    if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5231       if (valid_value_is_above_me_32(descr, toff)) {
5232          SVal* tree = &cl->svals[tno << 3];
5233          cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5234       } else {
5235          goto slowcase;
5236       }
5237       if (CHECK_ZSM)
5238          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5239    }
5240    svOld = cl->svals[cloff];
5241    svNew = msmcread( svOld, thr,a,4 );
5242    if (CHECK_ZSM)
5243       tl_assert(svNew != SVal_INVALID);
5244    cl->svals[cloff] = svNew;
5245    return;
5246   slowcase: /* misaligned, or must go further down the tree */
5247    stats__cline_32to16splits++;
5248    zsm_sapply16__msmcread( thr, a + 0 );
5249    zsm_sapply16__msmcread( thr, a + 2 );
5250 }
5251 
zsm_sapply32__msmcwrite(Thr * thr,Addr a)5252 static void zsm_sapply32__msmcwrite ( Thr* thr, Addr a ) {
5253    CacheLine* cl;
5254    UWord      cloff, tno, toff;
5255    SVal       svOld, svNew;
5256    UShort     descr;
5257    stats__cline_cwrite32s++;
5258    if (UNLIKELY(!aligned32(a))) goto slowcase;
5259    cl    = get_cacheline(a);
5260    cloff = get_cacheline_offset(a);
5261    tno   = get_treeno(a);
5262    toff  = get_tree_offset(a); /* == 0 or 4 */
5263    descr = cl->descrs[tno];
5264    if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5265       if (valid_value_is_above_me_32(descr, toff)) {
5266          SVal* tree = &cl->svals[tno << 3];
5267          cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5268       } else {
5269          goto slowcase;
5270       }
5271       if (CHECK_ZSM)
5272          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5273    }
5274    svOld = cl->svals[cloff];
5275    svNew = msmcwrite( svOld, thr,a,4 );
5276    if (CHECK_ZSM)
5277       tl_assert(svNew != SVal_INVALID);
5278    cl->svals[cloff] = svNew;
5279    return;
5280   slowcase: /* misaligned, or must go further down the tree */
5281    stats__cline_32to16splits++;
5282    zsm_sapply16__msmcwrite( thr, a + 0 );
5283    zsm_sapply16__msmcwrite( thr, a + 2 );
5284 }
5285 
5286 /*------------- ZSM accesses: 64 bit sapply ------------- */
5287 
zsm_sapply64__msmcread(Thr * thr,Addr a)5288 static void zsm_sapply64__msmcread ( Thr* thr, Addr a ) {
5289    CacheLine* cl;
5290    UWord      cloff, tno;
5291    //UWord      toff;
5292    SVal       svOld, svNew;
5293    UShort     descr;
5294    stats__cline_cread64s++;
5295    if (UNLIKELY(!aligned64(a))) goto slowcase;
5296    cl    = get_cacheline(a);
5297    cloff = get_cacheline_offset(a);
5298    tno   = get_treeno(a);
5299    //toff  = get_tree_offset(a); /* == 0, unused */
5300    descr = cl->descrs[tno];
5301    if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5302       goto slowcase;
5303    }
5304    svOld = cl->svals[cloff];
5305    svNew = msmcread( svOld, thr,a,8 );
5306    if (CHECK_ZSM)
5307       tl_assert(svNew != SVal_INVALID);
5308    cl->svals[cloff] = svNew;
5309    return;
5310   slowcase: /* misaligned, or must go further down the tree */
5311    stats__cline_64to32splits++;
5312    zsm_sapply32__msmcread( thr, a + 0 );
5313    zsm_sapply32__msmcread( thr, a + 4 );
5314 }
5315 
zsm_sapply64__msmcwrite(Thr * thr,Addr a)5316 static void zsm_sapply64__msmcwrite ( Thr* thr, Addr a ) {
5317    CacheLine* cl;
5318    UWord      cloff, tno;
5319    //UWord      toff;
5320    SVal       svOld, svNew;
5321    UShort     descr;
5322    stats__cline_cwrite64s++;
5323    if (UNLIKELY(!aligned64(a))) goto slowcase;
5324    cl    = get_cacheline(a);
5325    cloff = get_cacheline_offset(a);
5326    tno   = get_treeno(a);
5327    //toff  = get_tree_offset(a); /* == 0, unused */
5328    descr = cl->descrs[tno];
5329    if (UNLIKELY( !(descr & TREE_DESCR_64) )) {
5330       goto slowcase;
5331    }
5332    svOld = cl->svals[cloff];
5333    svNew = msmcwrite( svOld, thr,a,8 );
5334    if (CHECK_ZSM)
5335       tl_assert(svNew != SVal_INVALID);
5336    cl->svals[cloff] = svNew;
5337    return;
5338   slowcase: /* misaligned, or must go further down the tree */
5339    stats__cline_64to32splits++;
5340    zsm_sapply32__msmcwrite( thr, a + 0 );
5341    zsm_sapply32__msmcwrite( thr, a + 4 );
5342 }
5343 
5344 /*--------------- ZSM accesses: 8 bit swrite --------------- */
5345 
5346 static
zsm_swrite08(Addr a,SVal svNew)5347 void zsm_swrite08 ( Addr a, SVal svNew ) {
5348    CacheLine* cl;
5349    UWord      cloff, tno, toff;
5350    UShort     descr;
5351    stats__cline_swrite08s++;
5352    cl    = get_cacheline(a);
5353    cloff = get_cacheline_offset(a);
5354    tno   = get_treeno(a);
5355    toff  = get_tree_offset(a); /* == 0 .. 7 */
5356    descr = cl->descrs[tno];
5357    if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5358       SVal* tree = &cl->svals[tno << 3];
5359       cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5360       if (CHECK_ZSM)
5361          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5362    }
5363    tl_assert(svNew != SVal_INVALID);
5364    cl->svals[cloff] = svNew;
5365 }
5366 
5367 /*--------------- ZSM accesses: 16 bit swrite --------------- */
5368 
5369 static
zsm_swrite16(Addr a,SVal svNew)5370 void zsm_swrite16 ( Addr a, SVal svNew ) {
5371    CacheLine* cl;
5372    UWord      cloff, tno, toff;
5373    UShort     descr;
5374    stats__cline_swrite16s++;
5375    if (UNLIKELY(!aligned16(a))) goto slowcase;
5376    cl    = get_cacheline(a);
5377    cloff = get_cacheline_offset(a);
5378    tno   = get_treeno(a);
5379    toff  = get_tree_offset(a); /* == 0, 2, 4 or 6 */
5380    descr = cl->descrs[tno];
5381    if (UNLIKELY( !(descr & (TREE_DESCR_16_0 << toff)) )) {
5382       if (valid_value_is_below_me_16(descr, toff)) {
5383          /* Writing at this level.  Need to fix up 'descr'. */
5384          cl->descrs[tno] = pullup_descr_to_16(descr, toff);
5385          /* At this point, the tree does not match cl->descr[tno] any
5386             more.  The assignments below will fix it up. */
5387       } else {
5388          /* We can't indiscriminately write on the w16 node as in the
5389             w64 case, as that might make the node inconsistent with
5390             its parent.  So first, pull down to this level. */
5391          SVal* tree = &cl->svals[tno << 3];
5392          cl->descrs[tno] = pulldown_to_16(tree, toff, descr);
5393       if (CHECK_ZSM)
5394          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5395       }
5396    }
5397    tl_assert(svNew != SVal_INVALID);
5398    cl->svals[cloff + 0] = svNew;
5399    cl->svals[cloff + 1] = SVal_INVALID;
5400    return;
5401   slowcase: /* misaligned */
5402    stats__cline_16to8splits++;
5403    zsm_swrite08( a + 0, svNew );
5404    zsm_swrite08( a + 1, svNew );
5405 }
5406 
5407 /*--------------- ZSM accesses: 32 bit swrite --------------- */
5408 
5409 static
zsm_swrite32(Addr a,SVal svNew)5410 void zsm_swrite32 ( Addr a, SVal svNew ) {
5411    CacheLine* cl;
5412    UWord      cloff, tno, toff;
5413    UShort     descr;
5414    stats__cline_swrite32s++;
5415    if (UNLIKELY(!aligned32(a))) goto slowcase;
5416    cl    = get_cacheline(a);
5417    cloff = get_cacheline_offset(a);
5418    tno   = get_treeno(a);
5419    toff  = get_tree_offset(a); /* == 0 or 4 */
5420    descr = cl->descrs[tno];
5421    if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) {
5422       if (valid_value_is_above_me_32(descr, toff)) {
5423          /* We can't indiscriminately write on the w32 node as in the
5424             w64 case, as that might make the node inconsistent with
5425             its parent.  So first, pull down to this level. */
5426          SVal* tree = &cl->svals[tno << 3];
5427          cl->descrs[tno] = pulldown_to_32(tree, toff, descr);
5428          if (CHECK_ZSM)
5429             tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
5430       } else {
5431          /* Writing at this level.  Need to fix up 'descr'. */
5432          cl->descrs[tno] = pullup_descr_to_32(descr, toff);
5433          /* At this point, the tree does not match cl->descr[tno] any
5434             more.  The assignments below will fix it up. */
5435       }
5436    }
5437    tl_assert(svNew != SVal_INVALID);
5438    cl->svals[cloff + 0] = svNew;
5439    cl->svals[cloff + 1] = SVal_INVALID;
5440    cl->svals[cloff + 2] = SVal_INVALID;
5441    cl->svals[cloff + 3] = SVal_INVALID;
5442    return;
5443   slowcase: /* misaligned */
5444    stats__cline_32to16splits++;
5445    zsm_swrite16( a + 0, svNew );
5446    zsm_swrite16( a + 2, svNew );
5447 }
5448 
5449 /*--------------- ZSM accesses: 64 bit swrite --------------- */
5450 
5451 static
zsm_swrite64(Addr a,SVal svNew)5452 void zsm_swrite64 ( Addr a, SVal svNew ) {
5453    CacheLine* cl;
5454    UWord      cloff, tno;
5455    //UWord    toff;
5456    stats__cline_swrite64s++;
5457    if (UNLIKELY(!aligned64(a))) goto slowcase;
5458    cl    = get_cacheline(a);
5459    cloff = get_cacheline_offset(a);
5460    tno   = get_treeno(a);
5461    //toff  = get_tree_offset(a); /* == 0, unused */
5462    cl->descrs[tno] = TREE_DESCR_64;
5463    tl_assert(svNew != SVal_INVALID);
5464    cl->svals[cloff + 0] = svNew;
5465    cl->svals[cloff + 1] = SVal_INVALID;
5466    cl->svals[cloff + 2] = SVal_INVALID;
5467    cl->svals[cloff + 3] = SVal_INVALID;
5468    cl->svals[cloff + 4] = SVal_INVALID;
5469    cl->svals[cloff + 5] = SVal_INVALID;
5470    cl->svals[cloff + 6] = SVal_INVALID;
5471    cl->svals[cloff + 7] = SVal_INVALID;
5472    return;
5473   slowcase: /* misaligned */
5474    stats__cline_64to32splits++;
5475    zsm_swrite32( a + 0, svNew );
5476    zsm_swrite32( a + 4, svNew );
5477 }
5478 
5479 /*------------- ZSM accesses: 8 bit sread/scopy ------------- */
5480 
5481 static
zsm_sread08(Addr a)5482 SVal zsm_sread08 ( Addr a ) {
5483    CacheLine* cl;
5484    UWord      cloff, tno, toff;
5485    UShort     descr;
5486    stats__cline_sread08s++;
5487    cl    = get_cacheline(a);
5488    cloff = get_cacheline_offset(a);
5489    tno   = get_treeno(a);
5490    toff  = get_tree_offset(a); /* == 0 .. 7 */
5491    descr = cl->descrs[tno];
5492    if (UNLIKELY( !(descr & (TREE_DESCR_8_0 << toff)) )) {
5493       SVal* tree = &cl->svals[tno << 3];
5494       cl->descrs[tno] = pulldown_to_8(tree, toff, descr);
5495    }
5496    return cl->svals[cloff];
5497 }
5498 
zsm_scopy08(Addr src,Addr dst,Bool uu_normalise)5499 static void zsm_scopy08 ( Addr src, Addr dst, Bool uu_normalise ) {
5500    SVal       sv;
5501    stats__cline_scopy08s++;
5502    sv = zsm_sread08( src );
5503    zsm_swrite08( dst, sv );
5504 }
5505 
5506 
5507 /* Block-copy states (needed for implementing realloc()).  Note this
5508    doesn't change the filtering arrangements.  The caller of
5509    zsm_scopy_range needs to attend to that. */
5510 
zsm_scopy_range(Addr src,Addr dst,SizeT len)5511 static void zsm_scopy_range ( Addr src, Addr dst, SizeT len )
5512 {
5513    SizeT i;
5514    if (len == 0)
5515       return;
5516 
5517    /* assert for non-overlappingness */
5518    tl_assert(src+len <= dst || dst+len <= src);
5519 
5520    /* To be simple, just copy byte by byte.  But so as not to wreck
5521       performance for later accesses to dst[0 .. len-1], normalise
5522       destination lines as we finish with them, and also normalise the
5523       line containing the first and last address. */
5524    for (i = 0; i < len; i++) {
5525       Bool normalise
5526          = get_cacheline_offset( dst+i+1 ) == 0 /* last in line */
5527            || i == 0       /* first in range */
5528            || i == len-1;  /* last in range */
5529       zsm_scopy08( src+i, dst+i, normalise );
5530    }
5531 }
5532 
5533 
5534 /* For setting address ranges to a given value.  Has considerable
5535    sophistication so as to avoid generating large numbers of pointless
5536    cache loads/writebacks for large ranges. */
5537 
5538 /* Do small ranges in-cache, in the obvious way. */
5539 static
zsm_sset_range_SMALL(Addr a,SizeT len,SVal svNew)5540 void zsm_sset_range_SMALL ( Addr a, SizeT len, SVal svNew )
5541 {
5542    /* fast track a couple of common cases */
5543    if (len == 4 && aligned32(a)) {
5544       zsm_swrite32( a, svNew );
5545       return;
5546    }
5547    if (len == 8 && aligned64(a)) {
5548       zsm_swrite64( a, svNew );
5549       return;
5550    }
5551 
5552    /* be completely general (but as efficient as possible) */
5553    if (len == 0) return;
5554 
5555    if (!aligned16(a) && len >= 1) {
5556       zsm_swrite08( a, svNew );
5557       a += 1;
5558       len -= 1;
5559       tl_assert(aligned16(a));
5560    }
5561    if (len == 0) return;
5562 
5563    if (!aligned32(a) && len >= 2) {
5564       zsm_swrite16( a, svNew );
5565       a += 2;
5566       len -= 2;
5567       tl_assert(aligned32(a));
5568    }
5569    if (len == 0) return;
5570 
5571    if (!aligned64(a) && len >= 4) {
5572       zsm_swrite32( a, svNew );
5573       a += 4;
5574       len -= 4;
5575       tl_assert(aligned64(a));
5576    }
5577    if (len == 0) return;
5578 
5579    if (len >= 8) {
5580       tl_assert(aligned64(a));
5581       while (len >= 8) {
5582          zsm_swrite64( a, svNew );
5583          a += 8;
5584          len -= 8;
5585       }
5586       tl_assert(aligned64(a));
5587    }
5588    if (len == 0) return;
5589 
5590    if (len >= 4)
5591       tl_assert(aligned32(a));
5592    if (len >= 4) {
5593       zsm_swrite32( a, svNew );
5594       a += 4;
5595       len -= 4;
5596    }
5597    if (len == 0) return;
5598 
5599    if (len >= 2)
5600       tl_assert(aligned16(a));
5601    if (len >= 2) {
5602       zsm_swrite16( a, svNew );
5603       a += 2;
5604       len -= 2;
5605    }
5606    if (len == 0) return;
5607 
5608    if (len >= 1) {
5609       zsm_swrite08( a, svNew );
5610       //a += 1;
5611       len -= 1;
5612    }
5613    tl_assert(len == 0);
5614 }
5615 
5616 
5617 /* If we're doing a small range, hand off to zsm_sset_range_SMALL.  But
5618    for larger ranges, try to operate directly on the out-of-cache
5619    representation, rather than dragging lines into the cache,
5620    overwriting them, and forcing them out.  This turns out to be an
5621    important performance optimisation.
5622 
5623    Note that this doesn't change the filtering arrangements.  The
5624    caller of zsm_sset_range needs to attend to that. */
5625 
zsm_sset_range(Addr a,SizeT len,SVal svNew)5626 static void zsm_sset_range ( Addr a, SizeT len, SVal svNew )
5627 {
5628    tl_assert(svNew != SVal_INVALID);
5629    stats__cache_make_New_arange += (ULong)len;
5630 
5631    if (0 && len > 500)
5632       VG_(printf)("make New      ( %#lx, %ld )\n", a, len );
5633 
5634    if (0) {
5635       static UWord n_New_in_cache = 0;
5636       static UWord n_New_not_in_cache = 0;
5637       /* tag is 'a' with the in-line offset masked out,
5638          eg a[31]..a[4] 0000 */
5639       Addr       tag = a & ~(N_LINE_ARANGE - 1);
5640       UWord      wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
5641       if (LIKELY(tag == cache_shmem.tags0[wix])) {
5642          n_New_in_cache++;
5643       } else {
5644          n_New_not_in_cache++;
5645       }
5646       if (0 == ((n_New_in_cache + n_New_not_in_cache) % 100000))
5647          VG_(printf)("shadow_mem_make_New: IN %lu OUT %lu\n",
5648                      n_New_in_cache, n_New_not_in_cache );
5649    }
5650 
5651    if (LIKELY(len < 2 * N_LINE_ARANGE)) {
5652       zsm_sset_range_SMALL( a, len, svNew );
5653    } else {
5654       Addr  before_start  = a;
5655       Addr  aligned_start = cacheline_ROUNDUP(a);
5656       Addr  after_start   = cacheline_ROUNDDN(a + len);
5657       UWord before_len    = aligned_start - before_start;
5658       UWord aligned_len   = after_start - aligned_start;
5659       UWord after_len     = a + len - after_start;
5660       tl_assert(before_start <= aligned_start);
5661       tl_assert(aligned_start <= after_start);
5662       tl_assert(before_len < N_LINE_ARANGE);
5663       tl_assert(after_len < N_LINE_ARANGE);
5664       tl_assert(get_cacheline_offset(aligned_start) == 0);
5665       if (get_cacheline_offset(a) == 0) {
5666          tl_assert(before_len == 0);
5667          tl_assert(a == aligned_start);
5668       }
5669       if (get_cacheline_offset(a+len) == 0) {
5670          tl_assert(after_len == 0);
5671          tl_assert(after_start == a+len);
5672       }
5673       if (before_len > 0) {
5674          zsm_sset_range_SMALL( before_start, before_len, svNew );
5675       }
5676       if (after_len > 0) {
5677          zsm_sset_range_SMALL( after_start, after_len, svNew );
5678       }
5679       stats__cache_make_New_inZrep += (ULong)aligned_len;
5680 
5681       while (1) {
5682          Addr tag;
5683          UWord wix;
5684          if (aligned_start >= after_start)
5685             break;
5686          tl_assert(get_cacheline_offset(aligned_start) == 0);
5687          tag = aligned_start & ~(N_LINE_ARANGE - 1);
5688          wix = (aligned_start >> N_LINE_BITS) & (N_WAY_NENT - 1);
5689          if (tag == cache_shmem.tags0[wix]) {
5690             UWord i;
5691             for (i = 0; i < N_LINE_ARANGE / 8; i++)
5692                zsm_swrite64( aligned_start + i * 8, svNew );
5693          } else {
5694             UWord i;
5695             Word zix;
5696             SecMap* sm;
5697             LineZ* lineZ;
5698             /* This line is not in the cache.  Do not force it in; instead
5699                modify it in-place. */
5700             /* find the Z line to write in and rcdec it or the
5701                associated F line. */
5702             find_Z_for_writing( &sm, &zix, tag );
5703             tl_assert(sm);
5704             tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
5705             lineZ = &sm->linesZ[zix];
5706             lineZ->dict[0] = svNew;
5707             lineZ->dict[1] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
5708             for (i = 0; i < N_LINE_ARANGE/4; i++)
5709                lineZ->ix2s[i] = 0; /* all refer to dict[0] */
5710             rcinc_LineZ(lineZ);
5711          }
5712          aligned_start += N_LINE_ARANGE;
5713          aligned_len -= N_LINE_ARANGE;
5714       }
5715       tl_assert(aligned_start == after_start);
5716       tl_assert(aligned_len == 0);
5717    }
5718 }
5719 
5720 
5721 /////////////////////////////////////////////////////////
5722 //                                                     //
5723 // Front-filtering accesses                            //
5724 //                                                     //
5725 /////////////////////////////////////////////////////////
5726 
5727 static UWord stats__f_ac = 0;
5728 static UWord stats__f_sk = 0;
5729 
5730 #if 0
5731 #  define STATS__F_SHOW \
5732      do { \
5733         if (UNLIKELY(0 == (stats__f_ac & 0xFFFFFF))) \
5734            VG_(printf)("filters: ac %lu sk %lu\n",   \
5735            stats__f_ac, stats__f_sk); \
5736      } while (0)
5737 #else
5738 #  define STATS__F_SHOW /* */
5739 #endif
5740 
zsm_sapply08_f__msmcwrite(Thr * thr,Addr a)5741 void zsm_sapply08_f__msmcwrite ( Thr* thr, Addr a ) {
5742    stats__f_ac++;
5743    STATS__F_SHOW;
5744    if (LIKELY(Filter__ok_to_skip_cwr08(thr->filter, a))) {
5745       stats__f_sk++;
5746       return;
5747    }
5748    zsm_sapply08__msmcwrite(thr, a);
5749 }
5750 
zsm_sapply16_f__msmcwrite(Thr * thr,Addr a)5751 void zsm_sapply16_f__msmcwrite ( Thr* thr, Addr a ) {
5752    stats__f_ac++;
5753    STATS__F_SHOW;
5754    if (LIKELY(Filter__ok_to_skip_cwr16(thr->filter, a))) {
5755       stats__f_sk++;
5756       return;
5757    }
5758    zsm_sapply16__msmcwrite(thr, a);
5759 }
5760 
zsm_sapply32_f__msmcwrite(Thr * thr,Addr a)5761 void zsm_sapply32_f__msmcwrite ( Thr* thr, Addr a ) {
5762    stats__f_ac++;
5763    STATS__F_SHOW;
5764    if (LIKELY(Filter__ok_to_skip_cwr32(thr->filter, a))) {
5765       stats__f_sk++;
5766       return;
5767    }
5768    zsm_sapply32__msmcwrite(thr, a);
5769 }
5770 
zsm_sapply64_f__msmcwrite(Thr * thr,Addr a)5771 void zsm_sapply64_f__msmcwrite ( Thr* thr, Addr a ) {
5772    stats__f_ac++;
5773    STATS__F_SHOW;
5774    if (LIKELY(Filter__ok_to_skip_cwr64(thr->filter, a))) {
5775       stats__f_sk++;
5776       return;
5777    }
5778    zsm_sapply64__msmcwrite(thr, a);
5779 }
5780 
zsm_sapplyNN_f__msmcwrite(Thr * thr,Addr a,SizeT len)5781 void zsm_sapplyNN_f__msmcwrite ( Thr* thr, Addr a, SizeT len )
5782 {
5783    /* fast track a couple of common cases */
5784    if (len == 4 && aligned32(a)) {
5785       zsm_sapply32_f__msmcwrite( thr, a );
5786       return;
5787    }
5788    if (len == 8 && aligned64(a)) {
5789       zsm_sapply64_f__msmcwrite( thr, a );
5790       return;
5791    }
5792 
5793    /* be completely general (but as efficient as possible) */
5794    if (len == 0) return;
5795 
5796    if (!aligned16(a) && len >= 1) {
5797       zsm_sapply08_f__msmcwrite( thr, a );
5798       a += 1;
5799       len -= 1;
5800       tl_assert(aligned16(a));
5801    }
5802    if (len == 0) return;
5803 
5804    if (!aligned32(a) && len >= 2) {
5805       zsm_sapply16_f__msmcwrite( thr, a );
5806       a += 2;
5807       len -= 2;
5808       tl_assert(aligned32(a));
5809    }
5810    if (len == 0) return;
5811 
5812    if (!aligned64(a) && len >= 4) {
5813       zsm_sapply32_f__msmcwrite( thr, a );
5814       a += 4;
5815       len -= 4;
5816       tl_assert(aligned64(a));
5817    }
5818    if (len == 0) return;
5819 
5820    if (len >= 8) {
5821       tl_assert(aligned64(a));
5822       while (len >= 8) {
5823          zsm_sapply64_f__msmcwrite( thr, a );
5824          a += 8;
5825          len -= 8;
5826       }
5827       tl_assert(aligned64(a));
5828    }
5829    if (len == 0) return;
5830 
5831    if (len >= 4)
5832       tl_assert(aligned32(a));
5833    if (len >= 4) {
5834       zsm_sapply32_f__msmcwrite( thr, a );
5835       a += 4;
5836       len -= 4;
5837    }
5838    if (len == 0) return;
5839 
5840    if (len >= 2)
5841       tl_assert(aligned16(a));
5842    if (len >= 2) {
5843       zsm_sapply16_f__msmcwrite( thr, a );
5844       a += 2;
5845       len -= 2;
5846    }
5847    if (len == 0) return;
5848 
5849    if (len >= 1) {
5850       zsm_sapply08_f__msmcwrite( thr, a );
5851       //a += 1;
5852       len -= 1;
5853    }
5854    tl_assert(len == 0);
5855 }
5856 
zsm_sapply08_f__msmcread(Thr * thr,Addr a)5857 void zsm_sapply08_f__msmcread ( Thr* thr, Addr a ) {
5858    stats__f_ac++;
5859    STATS__F_SHOW;
5860    if (LIKELY(Filter__ok_to_skip_crd08(thr->filter, a))) {
5861       stats__f_sk++;
5862       return;
5863    }
5864    zsm_sapply08__msmcread(thr, a);
5865 }
5866 
zsm_sapply16_f__msmcread(Thr * thr,Addr a)5867 void zsm_sapply16_f__msmcread ( Thr* thr, Addr a ) {
5868    stats__f_ac++;
5869    STATS__F_SHOW;
5870    if (LIKELY(Filter__ok_to_skip_crd16(thr->filter, a))) {
5871       stats__f_sk++;
5872       return;
5873    }
5874    zsm_sapply16__msmcread(thr, a);
5875 }
5876 
zsm_sapply32_f__msmcread(Thr * thr,Addr a)5877 void zsm_sapply32_f__msmcread ( Thr* thr, Addr a ) {
5878    stats__f_ac++;
5879    STATS__F_SHOW;
5880    if (LIKELY(Filter__ok_to_skip_crd32(thr->filter, a))) {
5881       stats__f_sk++;
5882       return;
5883    }
5884    zsm_sapply32__msmcread(thr, a);
5885 }
5886 
zsm_sapply64_f__msmcread(Thr * thr,Addr a)5887 void zsm_sapply64_f__msmcread ( Thr* thr, Addr a ) {
5888    stats__f_ac++;
5889    STATS__F_SHOW;
5890    if (LIKELY(Filter__ok_to_skip_crd64(thr->filter, a))) {
5891       stats__f_sk++;
5892       return;
5893    }
5894    zsm_sapply64__msmcread(thr, a);
5895 }
5896 
zsm_sapplyNN_f__msmcread(Thr * thr,Addr a,SizeT len)5897 void zsm_sapplyNN_f__msmcread ( Thr* thr, Addr a, SizeT len )
5898 {
5899    /* fast track a couple of common cases */
5900    if (len == 4 && aligned32(a)) {
5901       zsm_sapply32_f__msmcread( thr, a );
5902       return;
5903    }
5904    if (len == 8 && aligned64(a)) {
5905       zsm_sapply64_f__msmcread( thr, a );
5906       return;
5907    }
5908 
5909    /* be completely general (but as efficient as possible) */
5910    if (len == 0) return;
5911 
5912    if (!aligned16(a) && len >= 1) {
5913       zsm_sapply08_f__msmcread( thr, a );
5914       a += 1;
5915       len -= 1;
5916       tl_assert(aligned16(a));
5917    }
5918    if (len == 0) return;
5919 
5920    if (!aligned32(a) && len >= 2) {
5921       zsm_sapply16_f__msmcread( thr, a );
5922       a += 2;
5923       len -= 2;
5924       tl_assert(aligned32(a));
5925    }
5926    if (len == 0) return;
5927 
5928    if (!aligned64(a) && len >= 4) {
5929       zsm_sapply32_f__msmcread( thr, a );
5930       a += 4;
5931       len -= 4;
5932       tl_assert(aligned64(a));
5933    }
5934    if (len == 0) return;
5935 
5936    if (len >= 8) {
5937       tl_assert(aligned64(a));
5938       while (len >= 8) {
5939          zsm_sapply64_f__msmcread( thr, a );
5940          a += 8;
5941          len -= 8;
5942       }
5943       tl_assert(aligned64(a));
5944    }
5945    if (len == 0) return;
5946 
5947    if (len >= 4)
5948       tl_assert(aligned32(a));
5949    if (len >= 4) {
5950       zsm_sapply32_f__msmcread( thr, a );
5951       a += 4;
5952       len -= 4;
5953    }
5954    if (len == 0) return;
5955 
5956    if (len >= 2)
5957       tl_assert(aligned16(a));
5958    if (len >= 2) {
5959       zsm_sapply16_f__msmcread( thr, a );
5960       a += 2;
5961       len -= 2;
5962    }
5963    if (len == 0) return;
5964 
5965    if (len >= 1) {
5966       zsm_sapply08_f__msmcread( thr, a );
5967       //a += 1;
5968       len -= 1;
5969    }
5970    tl_assert(len == 0);
5971 }
5972 
libhb_Thr_resumes(Thr * thr)5973 void libhb_Thr_resumes ( Thr* thr )
5974 {
5975    if (0) VG_(printf)("resume %p\n", thr);
5976    tl_assert(thr);
5977    tl_assert(!thr->llexit_done);
5978    Filter__clear(thr->filter, "libhb_Thr_resumes");
5979    /* A kludge, but .. if this thread doesn't have any marker stacks
5980       at all, get one right now.  This is easier than figuring out
5981       exactly when at thread startup we can and can't take a stack
5982       snapshot. */
5983    if (HG_(clo_history_level) == 1) {
5984       tl_assert(thr->local_Kws_n_stacks);
5985       if (VG_(sizeXA)( thr->local_Kws_n_stacks ) == 0)
5986          note_local_Kw_n_stack_for(thr);
5987    }
5988 }
5989 
5990 
5991 /////////////////////////////////////////////////////////
5992 //                                                     //
5993 // Synchronisation objects                             //
5994 //                                                     //
5995 /////////////////////////////////////////////////////////
5996 
5997 /* A double linked list of all the SO's. */
5998 SO* admin_SO = NULL;
5999 
SO__Alloc(void)6000 static SO* SO__Alloc ( void )
6001 {
6002    SO* so = HG_(zalloc)( "libhb.SO__Alloc.1", sizeof(SO) );
6003    so->viR   = VtsID_INVALID;
6004    so->viW   = VtsID_INVALID;
6005    so->magic = SO_MAGIC;
6006    /* Add to double linked list */
6007    if (admin_SO) {
6008       tl_assert(admin_SO->admin_prev == NULL);
6009       admin_SO->admin_prev = so;
6010       so->admin_next = admin_SO;
6011    } else {
6012       so->admin_next = NULL;
6013    }
6014    so->admin_prev = NULL;
6015    admin_SO = so;
6016    /* */
6017    return so;
6018 }
6019 
SO__Dealloc(SO * so)6020 static void SO__Dealloc ( SO* so )
6021 {
6022    tl_assert(so);
6023    tl_assert(so->magic == SO_MAGIC);
6024    if (so->viR == VtsID_INVALID) {
6025       tl_assert(so->viW == VtsID_INVALID);
6026    } else {
6027       tl_assert(so->viW != VtsID_INVALID);
6028       VtsID__rcdec(so->viR);
6029       VtsID__rcdec(so->viW);
6030    }
6031    so->magic = 0;
6032    /* Del from double linked list */
6033    if (so->admin_prev)
6034       so->admin_prev->admin_next = so->admin_next;
6035    if (so->admin_next)
6036       so->admin_next->admin_prev = so->admin_prev;
6037    if (so == admin_SO)
6038       admin_SO = so->admin_next;
6039    /* */
6040    HG_(free)( so );
6041 }
6042 
6043 
6044 /////////////////////////////////////////////////////////
6045 //                                                     //
6046 // Top Level API                                       //
6047 //                                                     //
6048 /////////////////////////////////////////////////////////
6049 
show_thread_state(HChar * str,Thr * t)6050 static void show_thread_state ( HChar* str, Thr* t )
6051 {
6052    if (1) return;
6053    if (t->viR == t->viW) {
6054       VG_(printf)("thr \"%s\" %p has vi* %u==", str, t, t->viR );
6055       VtsID__pp( t->viR );
6056       VG_(printf)("%s","\n");
6057    } else {
6058       VG_(printf)("thr \"%s\" %p has viR %u==", str, t, t->viR );
6059       VtsID__pp( t->viR );
6060       VG_(printf)(" viW %u==", t->viW);
6061       VtsID__pp( t->viW );
6062       VG_(printf)("%s","\n");
6063    }
6064 }
6065 
6066 
libhb_init(void (* get_stacktrace)(Thr *,Addr *,UWord),ExeContext * (* get_EC)(Thr *))6067 Thr* libhb_init (
6068         void        (*get_stacktrace)( Thr*, Addr*, UWord ),
6069         ExeContext* (*get_EC)( Thr* )
6070      )
6071 {
6072    Thr*  thr;
6073    VtsID vi;
6074 
6075    // We will have to have to store a large number of these,
6076    // so make sure they're the size we expect them to be.
6077    tl_assert(sizeof(ScalarTS) == 8);
6078 
6079    /* because first 1024 unusable */
6080    tl_assert(SCALARTS_N_THRBITS >= 11);
6081    /* so as to fit in a UInt w/ 3 bits to spare (see defn of
6082       Thr_n_RCEC). */
6083    tl_assert(SCALARTS_N_THRBITS <= 29);
6084 
6085    /* Need to be sure that Thr_n_RCEC is 2 words (64-bit) or 3 words
6086       (32-bit).  It's not correctness-critical, but there are a lot of
6087       them, so it's important from a space viewpoint.  Unfortunately
6088       we simply can't pack it into 2 words on a 32-bit target. */
6089    if (sizeof(UWord) == 8) {
6090       tl_assert(sizeof(Thr_n_RCEC) == 16);
6091    } else {
6092       tl_assert(sizeof(Thr_n_RCEC) == 12);
6093    }
6094 
6095    /* Word sets really are 32 bits.  Even on a 64 bit target. */
6096    tl_assert(sizeof(WordSetID) == 4);
6097    tl_assert(sizeof(WordSet) == sizeof(WordSetID));
6098 
6099    tl_assert(get_stacktrace);
6100    tl_assert(get_EC);
6101    main_get_stacktrace   = get_stacktrace;
6102    main_get_EC           = get_EC;
6103 
6104    // No need to initialise hg_wordfm.
6105    // No need to initialise hg_wordset.
6106 
6107    /* Allocated once and never deallocated.  Used as a temporary in
6108       VTS singleton, tick and join operations. */
6109    temp_max_sized_VTS = VTS__new( "libhb.libhb_init.1", ThrID_MAX_VALID );
6110    temp_max_sized_VTS->id = VtsID_INVALID;
6111    verydead_thread_table_init();
6112    vts_set_init();
6113    vts_tab_init();
6114    event_map_init();
6115    VtsID__invalidate_caches();
6116 
6117    // initialise shadow memory
6118    zsm_init( SVal__rcinc, SVal__rcdec );
6119 
6120    thr = Thr__new();
6121    vi  = VtsID__mk_Singleton( thr, 1 );
6122    thr->viR = vi;
6123    thr->viW = vi;
6124    VtsID__rcinc(thr->viR);
6125    VtsID__rcinc(thr->viW);
6126 
6127    show_thread_state("  root", thr);
6128    return thr;
6129 }
6130 
6131 
libhb_create(Thr * parent)6132 Thr* libhb_create ( Thr* parent )
6133 {
6134    /* The child's VTSs are copies of the parent's VTSs, but ticked at
6135       the child's index.  Since the child's index is guaranteed
6136       unique, it has never been seen before, so the implicit value
6137       before the tick is zero and after that is one. */
6138    Thr* child = Thr__new();
6139 
6140    child->viR = VtsID__tick( parent->viR, child );
6141    child->viW = VtsID__tick( parent->viW, child );
6142    Filter__clear(child->filter, "libhb_create(child)");
6143    VtsID__rcinc(child->viR);
6144    VtsID__rcinc(child->viW);
6145    /* We need to do note_local_Kw_n_stack_for( child ), but it's too
6146       early for that - it may not have a valid TId yet.  So, let
6147       libhb_Thr_resumes pick it up the first time the thread runs. */
6148 
6149    tl_assert(VtsID__indexAt( child->viR, child ) == 1);
6150    tl_assert(VtsID__indexAt( child->viW, child ) == 1);
6151 
6152    /* and the parent has to move along too */
6153    VtsID__rcdec(parent->viR);
6154    VtsID__rcdec(parent->viW);
6155    parent->viR = VtsID__tick( parent->viR, parent );
6156    parent->viW = VtsID__tick( parent->viW, parent );
6157    Filter__clear(parent->filter, "libhb_create(parent)");
6158    VtsID__rcinc(parent->viR);
6159    VtsID__rcinc(parent->viW);
6160    note_local_Kw_n_stack_for( parent );
6161 
6162    show_thread_state(" child", child);
6163    show_thread_state("parent", parent);
6164 
6165    return child;
6166 }
6167 
6168 /* Shut down the library, and print stats (in fact that's _all_
6169    this is for. */
libhb_shutdown(Bool show_stats)6170 void libhb_shutdown ( Bool show_stats )
6171 {
6172    if (show_stats) {
6173       VG_(printf)("%s","<<< BEGIN libhb stats >>>\n");
6174       VG_(printf)(" secmaps: %'10lu allocd (%'12lu g-a-range)\n",
6175                   stats__secmaps_allocd,
6176                   stats__secmap_ga_space_covered);
6177       VG_(printf)("  linesZ: %'10lu allocd (%'12lu bytes occupied)\n",
6178                   stats__secmap_linesZ_allocd,
6179                   stats__secmap_linesZ_bytes);
6180       VG_(printf)("  linesF: %'10lu allocd (%'12lu bytes occupied)\n",
6181                   stats__secmap_linesF_allocd,
6182                   stats__secmap_linesF_bytes);
6183       VG_(printf)(" secmaps: %'10lu iterator steppings\n",
6184                   stats__secmap_iterator_steppings);
6185       VG_(printf)(" secmaps: %'10lu searches (%'12lu slow)\n",
6186                   stats__secmaps_search, stats__secmaps_search_slow);
6187 
6188       VG_(printf)("%s","\n");
6189       VG_(printf)("   cache: %'lu totrefs (%'lu misses)\n",
6190                   stats__cache_totrefs, stats__cache_totmisses );
6191       VG_(printf)("   cache: %'14lu Z-fetch,    %'14lu F-fetch\n",
6192                   stats__cache_Z_fetches, stats__cache_F_fetches );
6193       VG_(printf)("   cache: %'14lu Z-wback,    %'14lu F-wback\n",
6194                   stats__cache_Z_wbacks, stats__cache_F_wbacks );
6195       VG_(printf)("   cache: %'14lu invals,     %'14lu flushes\n",
6196                   stats__cache_invals, stats__cache_flushes );
6197       VG_(printf)("   cache: %'14llu arange_New  %'14llu direct-to-Zreps\n",
6198                   stats__cache_make_New_arange,
6199                   stats__cache_make_New_inZrep);
6200 
6201       VG_(printf)("%s","\n");
6202       VG_(printf)("   cline: %'10lu normalises\n",
6203                   stats__cline_normalises );
6204       VG_(printf)("   cline: c rds 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6205                   stats__cline_cread64s,
6206                   stats__cline_cread32s,
6207                   stats__cline_cread16s,
6208                   stats__cline_cread08s );
6209       VG_(printf)("   cline: c wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6210                   stats__cline_cwrite64s,
6211                   stats__cline_cwrite32s,
6212                   stats__cline_cwrite16s,
6213                   stats__cline_cwrite08s );
6214       VG_(printf)("   cline: s wrs 8/4/2/1: %'13lu %'13lu %'13lu %'13lu\n",
6215                   stats__cline_swrite64s,
6216                   stats__cline_swrite32s,
6217                   stats__cline_swrite16s,
6218                   stats__cline_swrite08s );
6219       VG_(printf)("   cline: s rd1s %'lu, s copy1s %'lu\n",
6220                   stats__cline_sread08s, stats__cline_scopy08s );
6221       VG_(printf)("   cline:    splits: 8to4 %'12lu    4to2 %'12lu    2to1 %'12lu\n",
6222                  stats__cline_64to32splits,
6223                  stats__cline_32to16splits,
6224                  stats__cline_16to8splits );
6225       VG_(printf)("   cline: pulldowns: 8to4 %'12lu    4to2 %'12lu    2to1 %'12lu\n",
6226                  stats__cline_64to32pulldown,
6227                  stats__cline_32to16pulldown,
6228                  stats__cline_16to8pulldown );
6229       if (0)
6230       VG_(printf)("   cline: sizeof(CacheLineZ) %ld, covers %ld bytes of arange\n",
6231                   (Word)sizeof(LineZ), (Word)N_LINE_ARANGE);
6232 
6233       VG_(printf)("%s","\n");
6234 
6235       VG_(printf)("   libhb: %'13llu msmcread  (%'llu dragovers)\n",
6236                   stats__msmcread, stats__msmcread_change);
6237       VG_(printf)("   libhb: %'13llu msmcwrite (%'llu dragovers)\n",
6238                   stats__msmcwrite, stats__msmcwrite_change);
6239       VG_(printf)("   libhb: %'13llu cmpLEQ queries (%'llu misses)\n",
6240                   stats__cmpLEQ_queries, stats__cmpLEQ_misses);
6241       VG_(printf)("   libhb: %'13llu join2  queries (%'llu misses)\n",
6242                   stats__join2_queries, stats__join2_misses);
6243 
6244       VG_(printf)("%s","\n");
6245       VG_(printf)( "   libhb: VTSops: tick %'lu,  join %'lu,  cmpLEQ %'lu\n",
6246                    stats__vts__tick, stats__vts__join,  stats__vts__cmpLEQ );
6247       VG_(printf)( "   libhb: VTSops: cmp_structural %'lu (%'lu slow)\n",
6248                    stats__vts__cmp_structural, stats__vts__cmp_structural_slow );
6249       VG_(printf)( "   libhb: VTSset: find__or__clone_and_add %'lu (%'lu allocd)\n",
6250                    stats__vts_set__focaa, stats__vts_set__focaa_a );
6251       VG_(printf)( "   libhb: VTSops: indexAt_SLOW %'lu\n",
6252                    stats__vts__indexat_slow );
6253 
6254       VG_(printf)("%s","\n");
6255       VG_(printf)(
6256          "   libhb: %ld entries in vts_table (approximately %lu bytes)\n",
6257          VG_(sizeXA)( vts_tab ), VG_(sizeXA)( vts_tab ) * sizeof(VtsTE)
6258       );
6259       VG_(printf)( "   libhb: %lu entries in vts_set\n",
6260                    VG_(sizeFM)( vts_set ) );
6261 
6262       VG_(printf)("%s","\n");
6263       VG_(printf)( "   libhb: ctxt__rcdec: 1=%lu(%lu eq), 2=%lu, 3=%lu\n",
6264                    stats__ctxt_rcdec1, stats__ctxt_rcdec1_eq,
6265                    stats__ctxt_rcdec2,
6266                    stats__ctxt_rcdec3 );
6267       VG_(printf)( "   libhb: ctxt__rcdec: calls %lu, discards %lu\n",
6268                    stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
6269       VG_(printf)( "   libhb: contextTab: %lu slots, %lu max ents\n",
6270                    (UWord)N_RCEC_TAB,
6271                    stats__ctxt_tab_curr );
6272       VG_(printf)( "   libhb: contextTab: %lu queries, %lu cmps\n",
6273                    stats__ctxt_tab_qs,
6274                    stats__ctxt_tab_cmps );
6275 #if 0
6276       VG_(printf)("sizeof(AvlNode)     = %lu\n", sizeof(AvlNode));
6277       VG_(printf)("sizeof(WordBag)     = %lu\n", sizeof(WordBag));
6278       VG_(printf)("sizeof(MaybeWord)   = %lu\n", sizeof(MaybeWord));
6279       VG_(printf)("sizeof(CacheLine)   = %lu\n", sizeof(CacheLine));
6280       VG_(printf)("sizeof(LineZ)       = %lu\n", sizeof(LineZ));
6281       VG_(printf)("sizeof(LineF)       = %lu\n", sizeof(LineF));
6282       VG_(printf)("sizeof(SecMap)      = %lu\n", sizeof(SecMap));
6283       VG_(printf)("sizeof(Cache)       = %lu\n", sizeof(Cache));
6284       VG_(printf)("sizeof(SMCacheEnt)  = %lu\n", sizeof(SMCacheEnt));
6285       VG_(printf)("sizeof(CountedSVal) = %lu\n", sizeof(CountedSVal));
6286       VG_(printf)("sizeof(VTS)         = %lu\n", sizeof(VTS));
6287       VG_(printf)("sizeof(ScalarTS)    = %lu\n", sizeof(ScalarTS));
6288       VG_(printf)("sizeof(VtsTE)       = %lu\n", sizeof(VtsTE));
6289       VG_(printf)("sizeof(MSMInfo)     = %lu\n", sizeof(MSMInfo));
6290 
6291       VG_(printf)("sizeof(struct _XArray)     = %lu\n", sizeof(struct _XArray));
6292       VG_(printf)("sizeof(struct _WordFM)     = %lu\n", sizeof(struct _WordFM));
6293       VG_(printf)("sizeof(struct _Thr)     = %lu\n", sizeof(struct _Thr));
6294       VG_(printf)("sizeof(struct _SO)     = %lu\n", sizeof(struct _SO));
6295 #endif
6296 
6297       VG_(printf)("%s","<<< END libhb stats >>>\n");
6298       VG_(printf)("%s","\n");
6299 
6300    }
6301 }
6302 
6303 /* Receive notification that a thread has low level exited.  The
6304    significance here is that we do not expect to see any more memory
6305    references from it. */
libhb_async_exit(Thr * thr)6306 void libhb_async_exit ( Thr* thr )
6307 {
6308    tl_assert(thr);
6309    tl_assert(!thr->llexit_done);
6310    thr->llexit_done = True;
6311 
6312    /* free up Filter and local_Kws_n_stacks (well, actually not the
6313       latter ..) */
6314    tl_assert(thr->filter);
6315    HG_(free)(thr->filter);
6316    thr->filter = NULL;
6317 
6318    /* Tell the VTS mechanism this thread has exited, so it can
6319       participate in VTS pruning.  Note this can only happen if the
6320       thread has both ll_exited and has been joined with. */
6321    if (thr->joinedwith_done)
6322       VTS__declare_thread_very_dead(thr);
6323 
6324    /* Another space-accuracy tradeoff.  Do we want to be able to show
6325       H1 history for conflicts in threads which have since exited?  If
6326       yes, then we better not free up thr->local_Kws_n_stacks.  The
6327       downside is a potential per-thread leak of up to
6328       N_KWs_N_STACKs_PER_THREAD * sizeof(ULong_n_EC) * whatever the
6329       XArray average overcommit factor is (1.5 I'd guess). */
6330    // hence:
6331    // VG_(deleteXA)(thr->local_Kws_n_stacks);
6332    // thr->local_Kws_n_stacks = NULL;
6333 }
6334 
6335 /* Receive notification that a thread has been joined with.  The
6336    significance here is that we do not expect to see any further
6337    references to its vector clocks (Thr::viR and Thr::viW). */
libhb_joinedwith_done(Thr * thr)6338 void libhb_joinedwith_done ( Thr* thr )
6339 {
6340    tl_assert(thr);
6341    /* Caller must ensure that this is only ever called once per Thr. */
6342    tl_assert(!thr->joinedwith_done);
6343    thr->joinedwith_done = True;
6344    if (thr->llexit_done)
6345       VTS__declare_thread_very_dead(thr);
6346 }
6347 
6348 
6349 /* Both Segs and SOs point to VTSs.  However, there is no sharing, so
6350    a Seg that points at a VTS is its one-and-only owner, and ditto for
6351    a SO that points at a VTS. */
6352 
libhb_so_alloc(void)6353 SO* libhb_so_alloc ( void )
6354 {
6355    return SO__Alloc();
6356 }
6357 
libhb_so_dealloc(SO * so)6358 void libhb_so_dealloc ( SO* so )
6359 {
6360    tl_assert(so);
6361    tl_assert(so->magic == SO_MAGIC);
6362    SO__Dealloc(so);
6363 }
6364 
6365 /* See comments in libhb.h for details on the meaning of
6366    strong vs weak sends and strong vs weak receives. */
libhb_so_send(Thr * thr,SO * so,Bool strong_send)6367 void libhb_so_send ( Thr* thr, SO* so, Bool strong_send )
6368 {
6369    /* Copy the VTSs from 'thr' into the sync object, and then move
6370       the thread along one step. */
6371 
6372    tl_assert(so);
6373    tl_assert(so->magic == SO_MAGIC);
6374 
6375    /* stay sane .. a thread's read-clock must always lead or be the
6376       same as its write-clock */
6377    { Bool leq = VtsID__cmpLEQ(thr->viW, thr->viR);
6378      tl_assert(leq);
6379    }
6380 
6381    /* since we're overwriting the VtsIDs in the SO, we need to drop
6382       any references made by the previous contents thereof */
6383    if (so->viR == VtsID_INVALID) {
6384       tl_assert(so->viW == VtsID_INVALID);
6385       so->viR = thr->viR;
6386       so->viW = thr->viW;
6387       VtsID__rcinc(so->viR);
6388       VtsID__rcinc(so->viW);
6389    } else {
6390       /* In a strong send, we dump any previous VC in the SO and
6391          install the sending thread's VC instead.  For a weak send we
6392          must join2 with what's already there. */
6393       tl_assert(so->viW != VtsID_INVALID);
6394       VtsID__rcdec(so->viR);
6395       VtsID__rcdec(so->viW);
6396       so->viR = strong_send ? thr->viR : VtsID__join2( so->viR, thr->viR );
6397       so->viW = strong_send ? thr->viW : VtsID__join2( so->viW, thr->viW );
6398       VtsID__rcinc(so->viR);
6399       VtsID__rcinc(so->viW);
6400    }
6401 
6402    /* move both parent clocks along */
6403    VtsID__rcdec(thr->viR);
6404    VtsID__rcdec(thr->viW);
6405    thr->viR = VtsID__tick( thr->viR, thr );
6406    thr->viW = VtsID__tick( thr->viW, thr );
6407    if (!thr->llexit_done) {
6408       Filter__clear(thr->filter, "libhb_so_send");
6409       note_local_Kw_n_stack_for(thr);
6410    }
6411    VtsID__rcinc(thr->viR);
6412    VtsID__rcinc(thr->viW);
6413 
6414    if (strong_send)
6415       show_thread_state("s-send", thr);
6416    else
6417       show_thread_state("w-send", thr);
6418 }
6419 
libhb_so_recv(Thr * thr,SO * so,Bool strong_recv)6420 void libhb_so_recv ( Thr* thr, SO* so, Bool strong_recv )
6421 {
6422    tl_assert(so);
6423    tl_assert(so->magic == SO_MAGIC);
6424 
6425    if (so->viR != VtsID_INVALID) {
6426       tl_assert(so->viW != VtsID_INVALID);
6427 
6428       /* Weak receive (basically, an R-acquisition of a R-W lock).
6429          This advances the read-clock of the receiver, but not the
6430          write-clock. */
6431       VtsID__rcdec(thr->viR);
6432       thr->viR = VtsID__join2( thr->viR, so->viR );
6433       VtsID__rcinc(thr->viR);
6434 
6435       /* At one point (r10589) it seemed safest to tick the clocks for
6436          the receiving thread after the join.  But on reflection, I
6437          wonder if that might cause it to 'overtake' constraints,
6438          which could lead to missing races.  So, back out that part of
6439          r10589. */
6440       //VtsID__rcdec(thr->viR);
6441       //thr->viR = VtsID__tick( thr->viR, thr );
6442       //VtsID__rcinc(thr->viR);
6443 
6444       /* For a strong receive, we also advance the receiver's write
6445          clock, which means the receive as a whole is essentially
6446          equivalent to a W-acquisition of a R-W lock. */
6447       if (strong_recv) {
6448          VtsID__rcdec(thr->viW);
6449          thr->viW = VtsID__join2( thr->viW, so->viW );
6450          VtsID__rcinc(thr->viW);
6451 
6452          /* See comment just above, re r10589. */
6453          //VtsID__rcdec(thr->viW);
6454          //thr->viW = VtsID__tick( thr->viW, thr );
6455          //VtsID__rcinc(thr->viW);
6456       }
6457 
6458       if (thr->filter)
6459          Filter__clear(thr->filter, "libhb_so_recv");
6460       note_local_Kw_n_stack_for(thr);
6461 
6462       if (strong_recv)
6463          show_thread_state("s-recv", thr);
6464       else
6465          show_thread_state("w-recv", thr);
6466 
6467    } else {
6468       tl_assert(so->viW == VtsID_INVALID);
6469       /* Deal with degenerate case: 'so' has no vts, so there has been
6470          no message posted to it.  Just ignore this case. */
6471       show_thread_state("d-recv", thr);
6472    }
6473 }
6474 
libhb_so_everSent(SO * so)6475 Bool libhb_so_everSent ( SO* so )
6476 {
6477    if (so->viR == VtsID_INVALID) {
6478       tl_assert(so->viW == VtsID_INVALID);
6479       return False;
6480    } else {
6481       tl_assert(so->viW != VtsID_INVALID);
6482       return True;
6483    }
6484 }
6485 
6486 #define XXX1 0 // 0x67a106c
6487 #define XXX2 0
6488 
TRACEME(Addr a,SizeT szB)6489 static inline Bool TRACEME(Addr a, SizeT szB) {
6490    if (XXX1 && a <= XXX1 && XXX1 <= a+szB) return True;
6491    if (XXX2 && a <= XXX2 && XXX2 <= a+szB) return True;
6492    return False;
6493 }
trace(Thr * thr,Addr a,SizeT szB,HChar * s)6494 static void trace ( Thr* thr, Addr a, SizeT szB, HChar* s ) {
6495   SVal sv = zsm_sread08(a);
6496   VG_(printf)("thr %p (%#lx,%lu) %s: 0x%016llx ", thr,a,szB,s,sv);
6497   show_thread_state("", thr);
6498   VG_(printf)("%s","\n");
6499 }
6500 
libhb_srange_new(Thr * thr,Addr a,SizeT szB)6501 void libhb_srange_new ( Thr* thr, Addr a, SizeT szB )
6502 {
6503    SVal sv = SVal__mkC(thr->viW, thr->viW);
6504    tl_assert(is_sane_SVal_C(sv));
6505    if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-before");
6506    zsm_sset_range( a, szB, sv );
6507    Filter__clear_range( thr->filter, a, szB );
6508    if (0 && TRACEME(a,szB)) trace(thr,a,szB,"nw-after ");
6509 }
6510 
libhb_srange_noaccess_NoFX(Thr * thr,Addr a,SizeT szB)6511 void libhb_srange_noaccess_NoFX ( Thr* thr, Addr a, SizeT szB )
6512 {
6513    /* do nothing */
6514 }
6515 
libhb_srange_noaccess_AHAE(Thr * thr,Addr a,SizeT szB)6516 void libhb_srange_noaccess_AHAE ( Thr* thr, Addr a, SizeT szB )
6517 {
6518    /* This really does put the requested range in NoAccess.  It's
6519       expensive though. */
6520    SVal sv = SVal_NOACCESS;
6521    tl_assert(is_sane_SVal_C(sv));
6522    zsm_sset_range( a, szB, sv );
6523    Filter__clear_range( thr->filter, a, szB );
6524 }
6525 
libhb_srange_untrack(Thr * thr,Addr a,SizeT szB)6526 void libhb_srange_untrack ( Thr* thr, Addr a, SizeT szB )
6527 {
6528    SVal sv = SVal_NOACCESS;
6529    tl_assert(is_sane_SVal_C(sv));
6530    if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-before");
6531    zsm_sset_range( a, szB, sv );
6532    Filter__clear_range( thr->filter, a, szB );
6533    if (0 && TRACEME(a,szB)) trace(thr,a,szB,"untrack-after ");
6534 }
6535 
libhb_get_Thr_hgthread(Thr * thr)6536 Thread* libhb_get_Thr_hgthread ( Thr* thr ) {
6537    tl_assert(thr);
6538    return thr->hgthread;
6539 }
6540 
libhb_set_Thr_hgthread(Thr * thr,Thread * hgthread)6541 void libhb_set_Thr_hgthread ( Thr* thr, Thread* hgthread ) {
6542    tl_assert(thr);
6543    thr->hgthread = hgthread;
6544 }
6545 
libhb_copy_shadow_state(Thr * thr,Addr src,Addr dst,SizeT len)6546 void libhb_copy_shadow_state ( Thr* thr, Addr src, Addr dst, SizeT len )
6547 {
6548    zsm_scopy_range(src, dst, len);
6549    Filter__clear_range( thr->filter, dst, len );
6550 }
6551 
libhb_maybe_GC(void)6552 void libhb_maybe_GC ( void )
6553 {
6554    event_map_maybe_GC();
6555    /* If there are still freelist entries available, no need for a
6556       GC. */
6557    if (vts_tab_freelist != VtsID_INVALID)
6558       return;
6559    /* So all the table entries are full, and we're having to expand
6560       the table.  But did we hit the threshhold point yet? */
6561    if (VG_(sizeXA)( vts_tab ) < vts_next_GC_at)
6562       return;
6563    vts_tab__do_GC( False/*don't show stats*/ );
6564 }
6565 
6566 
6567 /////////////////////////////////////////////////////////////////
6568 /////////////////////////////////////////////////////////////////
6569 //                                                             //
6570 // SECTION END main library                                    //
6571 //                                                             //
6572 /////////////////////////////////////////////////////////////////
6573 /////////////////////////////////////////////////////////////////
6574 
6575 /*--------------------------------------------------------------------*/
6576 /*--- end                                             libhb_main.c ---*/
6577 /*--------------------------------------------------------------------*/
6578