• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- tsan_clock.h --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of ThreadSanitizer (TSan), a race detector.
10 //
11 //===----------------------------------------------------------------------===//
12 #ifndef TSAN_CLOCK_H
13 #define TSAN_CLOCK_H
14 
15 #include "tsan_defs.h"
16 #include "tsan_dense_alloc.h"
17 
18 namespace __tsan {
19 
20 typedef DenseSlabAlloc<ClockBlock, 1<<16, 1<<10> ClockAlloc;
21 typedef DenseSlabAllocCache ClockCache;
22 
23 // The clock that lives in sync variables (mutexes, atomics, etc).
24 class SyncClock {
25  public:
26   SyncClock();
27   ~SyncClock();
28 
29   uptr size() const;
30 
31   // These are used only in tests.
32   u64 get(unsigned tid) const;
33   u64 get_clean(unsigned tid) const;
34 
35   void Resize(ClockCache *c, uptr nclk);
36   void Reset(ClockCache *c);
37 
38   void DebugDump(int(*printf)(const char *s, ...));
39 
40   // Clock element iterator.
41   // Note: it iterates only over the table without regard to dirty entries.
42   class Iter {
43    public:
44     explicit Iter(SyncClock* parent);
45     Iter& operator++();
46     bool operator!=(const Iter& other);
47     ClockElem &operator*();
48 
49    private:
50     SyncClock *parent_;
51     // [pos_, end_) is the current continuous range of clock elements.
52     ClockElem *pos_;
53     ClockElem *end_;
54     int block_;  // Current number of second level block.
55 
56     NOINLINE void Next();
57   };
58 
59   Iter begin();
60   Iter end();
61 
62  private:
63   friend class ThreadClock;
64   friend class Iter;
65   static const uptr kDirtyTids = 2;
66 
67   struct Dirty {
68     u64 epoch  : kClkBits;
69     u64 tid : 64 - kClkBits;  // kInvalidId if not active
70   };
71 
72   unsigned release_store_tid_;
73   unsigned release_store_reused_;
74   Dirty dirty_[kDirtyTids];
75   // If size_ is 0, tab_ is nullptr.
76   // If size <= 64 (kClockCount), tab_ contains pointer to an array with
77   // 64 ClockElem's (ClockBlock::clock).
78   // Otherwise, tab_ points to an array with up to 127 u32 elements,
79   // each pointing to the second-level 512b block with 64 ClockElem's.
80   // Unused space in the first level ClockBlock is used to store additional
81   // clock elements.
82   // The last u32 element in the first level ClockBlock is always used as
83   // reference counter.
84   //
85   // See the following scheme for details.
86   // All memory blocks are 512 bytes (allocated from ClockAlloc).
87   // Clock (clk) elements are 64 bits.
88   // Idx and ref are 32 bits.
89   //
90   // tab_
91   //    |
92   //    \/
93   //    +----------------------------------------------------+
94   //    | clk128 | clk129 | ...unused... | idx1 | idx0 | ref |
95   //    +----------------------------------------------------+
96   //                                        |      |
97   //                                        |      \/
98   //                                        |      +----------------+
99   //                                        |      | clk0 ... clk63 |
100   //                                        |      +----------------+
101   //                                        \/
102   //                                        +------------------+
103   //                                        | clk64 ... clk127 |
104   //                                        +------------------+
105   //
106   // Note: dirty entries, if active, always override what's stored in the clock.
107   ClockBlock *tab_;
108   u32 tab_idx_;
109   u16 size_;
110   u16 blocks_;  // Number of second level blocks.
111 
112   void Unshare(ClockCache *c);
113   bool IsShared() const;
114   bool Cachable() const;
115   void ResetImpl();
116   void FlushDirty();
117   uptr capacity() const;
118   u32 get_block(uptr bi) const;
119   void append_block(u32 idx);
120   ClockElem &elem(unsigned tid) const;
121 };
122 
123 // The clock that lives in threads.
124 class ThreadClock {
125  public:
126   typedef DenseSlabAllocCache Cache;
127 
128   explicit ThreadClock(unsigned tid, unsigned reused = 0);
129 
130   u64 get(unsigned tid) const;
131   void set(ClockCache *c, unsigned tid, u64 v);
132   void set(u64 v);
133   void tick();
134   uptr size() const;
135 
136   void acquire(ClockCache *c, SyncClock *src);
137   void releaseStoreAcquire(ClockCache *c, SyncClock *src);
138   void release(ClockCache *c, SyncClock *dst);
139   void acq_rel(ClockCache *c, SyncClock *dst);
140   void ReleaseStore(ClockCache *c, SyncClock *dst);
141   void ResetCached(ClockCache *c);
142   void NoteGlobalAcquire(u64 v);
143 
144   void DebugReset();
145   void DebugDump(int(*printf)(const char *s, ...));
146 
147  private:
148   static const uptr kDirtyTids = SyncClock::kDirtyTids;
149   // Index of the thread associated with he clock ("current thread").
150   const unsigned tid_;
151   const unsigned reused_;  // tid_ reuse count.
152   // Current thread time when it acquired something from other threads.
153   u64 last_acquire_;
154 
155   // Last time another thread has done a global acquire of this thread's clock.
156   // It helps to avoid problem described in:
157   // https://github.com/golang/go/issues/39186
158   // See test/tsan/java_finalizer2.cpp for a regression test.
159   // Note the failuire is _extremely_ hard to hit, so if you are trying
160   // to reproduce it, you may want to run something like:
161   // $ go get golang.org/x/tools/cmd/stress
162   // $ stress -p=64 ./a.out
163   //
164   // The crux of the problem is roughly as follows.
165   // A number of O(1) optimizations in the clocks algorithm assume proper
166   // transitive cumulative propagation of clock values. The AcquireGlobal
167   // operation may produce an inconsistent non-linearazable view of
168   // thread clocks. Namely, it may acquire a later value from a thread
169   // with a higher ID, but fail to acquire an earlier value from a thread
170   // with a lower ID. If a thread that executed AcquireGlobal then releases
171   // to a sync clock, it will spoil the sync clock with the inconsistent
172   // values. If another thread later releases to the sync clock, the optimized
173   // algorithm may break.
174   //
175   // The exact sequence of events that leads to the failure.
176   // - thread 1 executes AcquireGlobal
177   // - thread 1 acquires value 1 for thread 2
178   // - thread 2 increments clock to 2
179   // - thread 2 releases to sync object 1
180   // - thread 3 at time 1
181   // - thread 3 acquires from sync object 1
182   // - thread 3 increments clock to 2
183   // - thread 1 acquires value 2 for thread 3
184   // - thread 1 releases to sync object 2
185   // - sync object 2 clock has 1 for thread 2 and 2 for thread 3
186   // - thread 3 releases to sync object 2
187   // - thread 3 sees value 2 in the clock for itself
188   //   and decides that it has already released to the clock
189   //   and did not acquire anything from other threads after that
190   //   (the last_acquire_ check in release operation)
191   // - thread 3 does not update the value for thread 2 in the clock from 1 to 2
192   // - thread 4 acquires from sync object 2
193   // - thread 4 detects a false race with thread 2
194   //   as it should have been synchronized with thread 2 up to time 2,
195   //   but because of the broken clock it is now synchronized only up to time 1
196   //
197   // The global_acquire_ value helps to prevent this scenario.
198   // Namely, thread 3 will not trust any own clock values up to global_acquire_
199   // for the purposes of the last_acquire_ optimization.
200   atomic_uint64_t global_acquire_;
201 
202   // Cached SyncClock (without dirty entries and release_store_tid_).
203   // We reuse it for subsequent store-release operations without intervening
204   // acquire operations. Since it is shared (and thus constant), clock value
205   // for the current thread is then stored in dirty entries in the SyncClock.
206   // We host a refernece to the table while it is cached here.
207   u32 cached_idx_;
208   u16 cached_size_;
209   u16 cached_blocks_;
210 
211   // Number of active elements in the clk_ table (the rest is zeros).
212   uptr nclk_;
213   u64 clk_[kMaxTidInClock];  // Fixed size vector clock.
214 
215   bool IsAlreadyAcquired(const SyncClock *src) const;
216   bool HasAcquiredAfterRelease(const SyncClock *dst) const;
217   void UpdateCurrentThread(ClockCache *c, SyncClock *dst) const;
218 };
219 
get(unsigned tid)220 ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const {
221   DCHECK_LT(tid, kMaxTidInClock);
222   return clk_[tid];
223 }
224 
set(u64 v)225 ALWAYS_INLINE void ThreadClock::set(u64 v) {
226   DCHECK_GE(v, clk_[tid_]);
227   clk_[tid_] = v;
228 }
229 
tick()230 ALWAYS_INLINE void ThreadClock::tick() {
231   clk_[tid_]++;
232 }
233 
size()234 ALWAYS_INLINE uptr ThreadClock::size() const {
235   return nclk_;
236 }
237 
NoteGlobalAcquire(u64 v)238 ALWAYS_INLINE void ThreadClock::NoteGlobalAcquire(u64 v) {
239   // Here we rely on the fact that AcquireGlobal is protected by
240   // ThreadRegistryLock, thus only one thread at a time executes it
241   // and values passed to this function should not go backwards.
242   CHECK_LE(atomic_load_relaxed(&global_acquire_), v);
243   atomic_store_relaxed(&global_acquire_, v);
244 }
245 
begin()246 ALWAYS_INLINE SyncClock::Iter SyncClock::begin() {
247   return Iter(this);
248 }
249 
end()250 ALWAYS_INLINE SyncClock::Iter SyncClock::end() {
251   return Iter(nullptr);
252 }
253 
size()254 ALWAYS_INLINE uptr SyncClock::size() const {
255   return size_;
256 }
257 
Iter(SyncClock * parent)258 ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent)
259     : parent_(parent)
260     , pos_(nullptr)
261     , end_(nullptr)
262     , block_(-1) {
263   if (parent)
264     Next();
265 }
266 
267 ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() {
268   pos_++;
269   if (UNLIKELY(pos_ >= end_))
270     Next();
271   return *this;
272 }
273 
274 ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) {
275   return parent_ != other.parent_;
276 }
277 
278 ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() {
279   return *pos_;
280 }
281 }  // namespace __tsan
282 
283 #endif  // TSAN_CLOCK_H
284