• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- tsan_clock.cc -----------------------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file is a part of ThreadSanitizer (TSan), a race detector.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "tsan_clock.h"
14 #include "tsan_rtl.h"
15 
16 // SyncClock and ThreadClock implement vector clocks for sync variables
17 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
18 // ThreadClock contains fixed-size vector clock for maximum number of threads.
19 // SyncClock contains growable vector clock for currently necessary number of
20 // threads.
21 // Together they implement very simple model of operations, namely:
22 //
23 //   void ThreadClock::acquire(const SyncClock *src) {
24 //     for (int i = 0; i < kMaxThreads; i++)
25 //       clock[i] = max(clock[i], src->clock[i]);
26 //   }
27 //
28 //   void ThreadClock::release(SyncClock *dst) const {
29 //     for (int i = 0; i < kMaxThreads; i++)
30 //       dst->clock[i] = max(dst->clock[i], clock[i]);
31 //   }
32 //
33 //   void ThreadClock::ReleaseStore(SyncClock *dst) const {
34 //     for (int i = 0; i < kMaxThreads; i++)
35 //       dst->clock[i] = clock[i];
36 //   }
37 //
38 //   void ThreadClock::acq_rel(SyncClock *dst) {
39 //     acquire(dst);
40 //     release(dst);
41 //   }
42 //
43 // Conformance to this model is extensively verified in tsan_clock_test.cc.
44 // However, the implementation is significantly more complex. The complexity
45 // allows to implement important classes of use cases in O(1) instead of O(N).
46 //
47 // The use cases are:
48 // 1. Singleton/once atomic that has a single release-store operation followed
49 //    by zillions of acquire-loads (the acquire-load is O(1)).
50 // 2. Thread-local mutex (both lock and unlock can be O(1)).
51 // 3. Leaf mutex (unlock is O(1)).
52 // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
53 // 5. An atomic with a single writer (writes can be O(1)).
54 // The implementation dynamically adopts to workload. So if an atomic is in
55 // read-only phase, these reads will be O(1); if it later switches to read/write
56 // phase, the implementation will correctly handle that by switching to O(N).
57 //
58 // Thread-safety note: all const operations on SyncClock's are conducted under
59 // a shared lock; all non-const operations on SyncClock's are conducted under
60 // an exclusive lock; ThreadClock's are private to respective threads and so
61 // do not need any protection.
62 //
63 // Description of ThreadClock state:
64 // clk_ - fixed size vector clock.
65 // nclk_ - effective size of the vector clock (the rest is zeros).
66 // tid_ - index of the thread associated with he clock ("current thread").
67 // last_acquire_ - current thread time when it acquired something from
68 //   other threads.
69 //
70 // Description of SyncClock state:
71 // clk_ - variable size vector clock, low kClkBits hold timestamp,
72 //   the remaining bits hold "acquired" flag (the actual value is thread's
73 //   reused counter);
74 //   if acquried == thr->reused_, then the respective thread has already
75 //   acquired this clock (except possibly dirty_tids_).
76 // dirty_tids_ - holds up to two indeces in the vector clock that other threads
77 //   need to acquire regardless of "acquired" flag value;
78 // release_store_tid_ - denotes that the clock state is a result of
79 //   release-store operation by the thread with release_store_tid_ index.
80 // release_store_reused_ - reuse count of release_store_tid_.
81 
82 // We don't have ThreadState in these methods, so this is an ugly hack that
83 // works only in C++.
84 #ifndef TSAN_GO
85 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
86 #else
87 # define CPP_STAT_INC(typ) (void)0
88 #endif
89 
90 namespace __tsan {
91 
92 const unsigned kInvalidTid = (unsigned)-1;
93 
ThreadClock(unsigned tid,unsigned reused)94 ThreadClock::ThreadClock(unsigned tid, unsigned reused)
95     : tid_(tid)
96     , reused_(reused + 1) {  // 0 has special meaning
97   CHECK_LT(tid, kMaxTidInClock);
98   CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
99   nclk_ = tid_ + 1;
100   last_acquire_ = 0;
101   internal_memset(clk_, 0, sizeof(clk_));
102   clk_[tid_].reused = reused_;
103 }
104 
acquire(const SyncClock * src)105 void ThreadClock::acquire(const SyncClock *src) {
106   DCHECK(nclk_ <= kMaxTid);
107   DCHECK(src->clk_.Size() <= kMaxTid);
108   CPP_STAT_INC(StatClockAcquire);
109 
110   // Check if it's empty -> no need to do anything.
111   const uptr nclk = src->clk_.Size();
112   if (nclk == 0) {
113     CPP_STAT_INC(StatClockAcquireEmpty);
114     return;
115   }
116 
117   // Check if we've already acquired src after the last release operation on src
118   bool acquired = false;
119   if (nclk > tid_) {
120     CPP_STAT_INC(StatClockAcquireLarge);
121     if (src->clk_[tid_].reused == reused_) {
122       CPP_STAT_INC(StatClockAcquireRepeat);
123       for (unsigned i = 0; i < kDirtyTids; i++) {
124         unsigned tid = src->dirty_tids_[i];
125         if (tid != kInvalidTid) {
126           u64 epoch = src->clk_[tid].epoch;
127           if (clk_[tid].epoch < epoch) {
128             clk_[tid].epoch = epoch;
129             acquired = true;
130           }
131         }
132       }
133       if (acquired) {
134         CPP_STAT_INC(StatClockAcquiredSomething);
135         last_acquire_ = clk_[tid_].epoch;
136       }
137       return;
138     }
139   }
140 
141   // O(N) acquire.
142   CPP_STAT_INC(StatClockAcquireFull);
143   nclk_ = max(nclk_, nclk);
144   for (uptr i = 0; i < nclk; i++) {
145     u64 epoch = src->clk_[i].epoch;
146     if (clk_[i].epoch < epoch) {
147       clk_[i].epoch = epoch;
148       acquired = true;
149     }
150   }
151 
152   // Remember that this thread has acquired this clock.
153   if (nclk > tid_)
154     src->clk_[tid_].reused = reused_;
155 
156   if (acquired) {
157     CPP_STAT_INC(StatClockAcquiredSomething);
158     last_acquire_ = clk_[tid_].epoch;
159   }
160 }
161 
release(SyncClock * dst) const162 void ThreadClock::release(SyncClock *dst) const {
163   DCHECK_LE(nclk_, kMaxTid);
164   DCHECK_LE(dst->clk_.Size(), kMaxTid);
165 
166   if (dst->clk_.Size() == 0) {
167     // ReleaseStore will correctly set release_store_tid_,
168     // which can be important for future operations.
169     ReleaseStore(dst);
170     return;
171   }
172 
173   CPP_STAT_INC(StatClockRelease);
174   // Check if we need to resize dst.
175   if (dst->clk_.Size() < nclk_) {
176     CPP_STAT_INC(StatClockReleaseResize);
177     dst->clk_.Resize(nclk_);
178   }
179 
180   // Check if we had not acquired anything from other threads
181   // since the last release on dst. If so, we need to update
182   // only dst->clk_[tid_].
183   if (dst->clk_[tid_].epoch > last_acquire_) {
184     UpdateCurrentThread(dst);
185     if (dst->release_store_tid_ != tid_ ||
186         dst->release_store_reused_ != reused_)
187       dst->release_store_tid_ = kInvalidTid;
188     return;
189   }
190 
191   // O(N) release.
192   CPP_STAT_INC(StatClockReleaseFull);
193   // First, remember whether we've acquired dst.
194   bool acquired = IsAlreadyAcquired(dst);
195   if (acquired)
196     CPP_STAT_INC(StatClockReleaseAcquired);
197   // Update dst->clk_.
198   for (uptr i = 0; i < nclk_; i++) {
199     dst->clk_[i].epoch = max(dst->clk_[i].epoch, clk_[i].epoch);
200     dst->clk_[i].reused = 0;
201   }
202   // Clear 'acquired' flag in the remaining elements.
203   if (nclk_ < dst->clk_.Size())
204     CPP_STAT_INC(StatClockReleaseClearTail);
205   for (uptr i = nclk_; i < dst->clk_.Size(); i++)
206     dst->clk_[i].reused = 0;
207   for (unsigned i = 0; i < kDirtyTids; i++)
208     dst->dirty_tids_[i] = kInvalidTid;
209   dst->release_store_tid_ = kInvalidTid;
210   dst->release_store_reused_ = 0;
211   // If we've acquired dst, remember this fact,
212   // so that we don't need to acquire it on next acquire.
213   if (acquired)
214     dst->clk_[tid_].reused = reused_;
215 }
216 
ReleaseStore(SyncClock * dst) const217 void ThreadClock::ReleaseStore(SyncClock *dst) const {
218   DCHECK(nclk_ <= kMaxTid);
219   DCHECK(dst->clk_.Size() <= kMaxTid);
220   CPP_STAT_INC(StatClockStore);
221 
222   // Check if we need to resize dst.
223   if (dst->clk_.Size() < nclk_) {
224     CPP_STAT_INC(StatClockStoreResize);
225     dst->clk_.Resize(nclk_);
226   }
227 
228   if (dst->release_store_tid_ == tid_ &&
229       dst->release_store_reused_ == reused_ &&
230       dst->clk_[tid_].epoch > last_acquire_) {
231     CPP_STAT_INC(StatClockStoreFast);
232     UpdateCurrentThread(dst);
233     return;
234   }
235 
236   // O(N) release-store.
237   CPP_STAT_INC(StatClockStoreFull);
238   for (uptr i = 0; i < nclk_; i++) {
239     dst->clk_[i].epoch = clk_[i].epoch;
240     dst->clk_[i].reused = 0;
241   }
242   // Clear the tail of dst->clk_.
243   if (nclk_ < dst->clk_.Size()) {
244     internal_memset(&dst->clk_[nclk_], 0,
245         (dst->clk_.Size() - nclk_) * sizeof(dst->clk_[0]));
246     CPP_STAT_INC(StatClockStoreTail);
247   }
248   for (unsigned i = 0; i < kDirtyTids; i++)
249     dst->dirty_tids_[i] = kInvalidTid;
250   dst->release_store_tid_ = tid_;
251   dst->release_store_reused_ = reused_;
252   // Rememeber that we don't need to acquire it in future.
253   dst->clk_[tid_].reused = reused_;
254 }
255 
acq_rel(SyncClock * dst)256 void ThreadClock::acq_rel(SyncClock *dst) {
257   CPP_STAT_INC(StatClockAcquireRelease);
258   acquire(dst);
259   ReleaseStore(dst);
260 }
261 
262 // Updates only single element related to the current thread in dst->clk_.
UpdateCurrentThread(SyncClock * dst) const263 void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
264   // Update the threads time, but preserve 'acquired' flag.
265   dst->clk_[tid_].epoch = clk_[tid_].epoch;
266 
267   for (unsigned i = 0; i < kDirtyTids; i++) {
268     if (dst->dirty_tids_[i] == tid_) {
269       CPP_STAT_INC(StatClockReleaseFast1);
270       return;
271     }
272     if (dst->dirty_tids_[i] == kInvalidTid) {
273       CPP_STAT_INC(StatClockReleaseFast2);
274       dst->dirty_tids_[i] = tid_;
275       return;
276     }
277   }
278   // Reset all 'acquired' flags, O(N).
279   CPP_STAT_INC(StatClockReleaseSlow);
280   for (uptr i = 0; i < dst->clk_.Size(); i++) {
281     dst->clk_[i].reused = 0;
282   }
283   for (unsigned i = 0; i < kDirtyTids; i++)
284     dst->dirty_tids_[i] = kInvalidTid;
285 }
286 
287 // Checks whether the current threads has already acquired src.
IsAlreadyAcquired(const SyncClock * src) const288 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
289   if (src->clk_[tid_].reused != reused_)
290     return false;
291   for (unsigned i = 0; i < kDirtyTids; i++) {
292     unsigned tid = src->dirty_tids_[i];
293     if (tid != kInvalidTid) {
294       if (clk_[tid].epoch < src->clk_[tid].epoch)
295         return false;
296     }
297   }
298   return true;
299 }
300 
301 // Sets a single element in the vector clock.
302 // This function is called only from weird places like AcquireGlobal.
set(unsigned tid,u64 v)303 void ThreadClock::set(unsigned tid, u64 v) {
304   DCHECK_LT(tid, kMaxTid);
305   DCHECK_GE(v, clk_[tid].epoch);
306   clk_[tid].epoch = v;
307   if (nclk_ <= tid)
308     nclk_ = tid + 1;
309   last_acquire_ = clk_[tid_].epoch;
310 }
311 
DebugDump(int (* printf)(const char * s,...))312 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
313   printf("clock=[");
314   for (uptr i = 0; i < nclk_; i++)
315     printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
316   printf("] reused=[");
317   for (uptr i = 0; i < nclk_; i++)
318     printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
319   printf("] tid=%u/%u last_acq=%llu",
320       tid_, reused_, last_acquire_);
321 }
322 
SyncClock()323 SyncClock::SyncClock()
324     : clk_(MBlockClock) {
325   release_store_tid_ = kInvalidTid;
326   release_store_reused_ = 0;
327   for (uptr i = 0; i < kDirtyTids; i++)
328     dirty_tids_[i] = kInvalidTid;
329 }
330 
Reset()331 void SyncClock::Reset() {
332   clk_.Reset();
333   Zero();
334 }
335 
Zero()336 void SyncClock::Zero() {
337   clk_.Resize(0);
338   release_store_tid_ = kInvalidTid;
339   release_store_reused_ = 0;
340   for (uptr i = 0; i < kDirtyTids; i++)
341     dirty_tids_[i] = kInvalidTid;
342 }
343 
DebugDump(int (* printf)(const char * s,...))344 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
345   printf("clock=[");
346   for (uptr i = 0; i < clk_.Size(); i++)
347     printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
348   printf("] reused=[");
349   for (uptr i = 0; i < clk_.Size(); i++)
350     printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
351   printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
352       release_store_tid_, release_store_reused_,
353       dirty_tids_[0], dirty_tids_[1]);
354 }
355 }  // namespace __tsan
356