• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- tsan_clock.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of ThreadSanitizer (TSan), a race detector.
10 //
11 //===----------------------------------------------------------------------===//
12 #include "tsan_clock.h"
13 #include "tsan_rtl.h"
14 #include "sanitizer_common/sanitizer_placement_new.h"
15 
16 // SyncClock and ThreadClock implement vector clocks for sync variables
17 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
18 // ThreadClock contains fixed-size vector clock for maximum number of threads.
19 // SyncClock contains growable vector clock for currently necessary number of
20 // threads.
21 // Together they implement very simple model of operations, namely:
22 //
23 //   void ThreadClock::acquire(const SyncClock *src) {
24 //     for (int i = 0; i < kMaxThreads; i++)
25 //       clock[i] = max(clock[i], src->clock[i]);
26 //   }
27 //
28 //   void ThreadClock::release(SyncClock *dst) const {
29 //     for (int i = 0; i < kMaxThreads; i++)
30 //       dst->clock[i] = max(dst->clock[i], clock[i]);
31 //   }
32 //
33 //   void ThreadClock::releaseStoreAcquire(SyncClock *sc) const {
34 //     for (int i = 0; i < kMaxThreads; i++) {
35 //       tmp = clock[i];
36 //       clock[i] = max(clock[i], sc->clock[i]);
37 //       sc->clock[i] = tmp;
38 //     }
39 //   }
40 //
41 //   void ThreadClock::ReleaseStore(SyncClock *dst) const {
42 //     for (int i = 0; i < kMaxThreads; i++)
43 //       dst->clock[i] = clock[i];
44 //   }
45 //
46 //   void ThreadClock::acq_rel(SyncClock *dst) {
47 //     acquire(dst);
48 //     release(dst);
49 //   }
50 //
51 // Conformance to this model is extensively verified in tsan_clock_test.cpp.
52 // However, the implementation is significantly more complex. The complexity
53 // allows to implement important classes of use cases in O(1) instead of O(N).
54 //
55 // The use cases are:
56 // 1. Singleton/once atomic that has a single release-store operation followed
57 //    by zillions of acquire-loads (the acquire-load is O(1)).
58 // 2. Thread-local mutex (both lock and unlock can be O(1)).
59 // 3. Leaf mutex (unlock is O(1)).
60 // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
61 // 5. An atomic with a single writer (writes can be O(1)).
62 // The implementation dynamically adopts to workload. So if an atomic is in
63 // read-only phase, these reads will be O(1); if it later switches to read/write
64 // phase, the implementation will correctly handle that by switching to O(N).
65 //
66 // Thread-safety note: all const operations on SyncClock's are conducted under
67 // a shared lock; all non-const operations on SyncClock's are conducted under
68 // an exclusive lock; ThreadClock's are private to respective threads and so
69 // do not need any protection.
70 //
71 // Description of SyncClock state:
72 // clk_ - variable size vector clock, low kClkBits hold timestamp,
73 //   the remaining bits hold "acquired" flag (the actual value is thread's
74 //   reused counter);
75 //   if acquried == thr->reused_, then the respective thread has already
76 //   acquired this clock (except possibly for dirty elements).
77 // dirty_ - holds up to two indeces in the vector clock that other threads
78 //   need to acquire regardless of "acquired" flag value;
79 // release_store_tid_ - denotes that the clock state is a result of
80 //   release-store operation by the thread with release_store_tid_ index.
81 // release_store_reused_ - reuse count of release_store_tid_.
82 
83 // We don't have ThreadState in these methods, so this is an ugly hack that
84 // works only in C++.
85 #if !SANITIZER_GO
86 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
87 #else
88 # define CPP_STAT_INC(typ) (void)0
89 #endif
90 
91 namespace __tsan {
92 
ref_ptr(ClockBlock * cb)93 static atomic_uint32_t *ref_ptr(ClockBlock *cb) {
94   return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);
95 }
96 
97 // Drop reference to the first level block idx.
UnrefClockBlock(ClockCache * c,u32 idx,uptr blocks)98 static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {
99   ClockBlock *cb = ctx->clock_alloc.Map(idx);
100   atomic_uint32_t *ref = ref_ptr(cb);
101   u32 v = atomic_load(ref, memory_order_acquire);
102   for (;;) {
103     CHECK_GT(v, 0);
104     if (v == 1)
105       break;
106     if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))
107       return;
108   }
109   // First level block owns second level blocks, so them as well.
110   for (uptr i = 0; i < blocks; i++)
111     ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);
112   ctx->clock_alloc.Free(c, idx);
113 }
114 
ThreadClock(unsigned tid,unsigned reused)115 ThreadClock::ThreadClock(unsigned tid, unsigned reused)
116     : tid_(tid)
117     , reused_(reused + 1)  // 0 has special meaning
118     , last_acquire_()
119     , global_acquire_()
120     , cached_idx_()
121     , cached_size_()
122     , cached_blocks_() {
123   CHECK_LT(tid, kMaxTidInClock);
124   CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
125   nclk_ = tid_ + 1;
126   internal_memset(clk_, 0, sizeof(clk_));
127 }
128 
ResetCached(ClockCache * c)129 void ThreadClock::ResetCached(ClockCache *c) {
130   if (cached_idx_) {
131     UnrefClockBlock(c, cached_idx_, cached_blocks_);
132     cached_idx_ = 0;
133     cached_size_ = 0;
134     cached_blocks_ = 0;
135   }
136 }
137 
acquire(ClockCache * c,SyncClock * src)138 void ThreadClock::acquire(ClockCache *c, SyncClock *src) {
139   DCHECK_LE(nclk_, kMaxTid);
140   DCHECK_LE(src->size_, kMaxTid);
141   CPP_STAT_INC(StatClockAcquire);
142 
143   // Check if it's empty -> no need to do anything.
144   const uptr nclk = src->size_;
145   if (nclk == 0) {
146     CPP_STAT_INC(StatClockAcquireEmpty);
147     return;
148   }
149 
150   bool acquired = false;
151   for (unsigned i = 0; i < kDirtyTids; i++) {
152     SyncClock::Dirty dirty = src->dirty_[i];
153     unsigned tid = dirty.tid;
154     if (tid != kInvalidTid) {
155       if (clk_[tid] < dirty.epoch) {
156         clk_[tid] = dirty.epoch;
157         acquired = true;
158       }
159     }
160   }
161 
162   // Check if we've already acquired src after the last release operation on src
163   if (tid_ >= nclk || src->elem(tid_).reused != reused_) {
164     // O(N) acquire.
165     CPP_STAT_INC(StatClockAcquireFull);
166     nclk_ = max(nclk_, nclk);
167     u64 *dst_pos = &clk_[0];
168     for (ClockElem &src_elem : *src) {
169       u64 epoch = src_elem.epoch;
170       if (*dst_pos < epoch) {
171         *dst_pos = epoch;
172         acquired = true;
173       }
174       dst_pos++;
175     }
176 
177     // Remember that this thread has acquired this clock.
178     if (nclk > tid_)
179       src->elem(tid_).reused = reused_;
180   }
181 
182   if (acquired) {
183     CPP_STAT_INC(StatClockAcquiredSomething);
184     last_acquire_ = clk_[tid_];
185     ResetCached(c);
186   }
187 }
188 
releaseStoreAcquire(ClockCache * c,SyncClock * sc)189 void ThreadClock::releaseStoreAcquire(ClockCache *c, SyncClock *sc) {
190   DCHECK_LE(nclk_, kMaxTid);
191   DCHECK_LE(sc->size_, kMaxTid);
192 
193   if (sc->size_ == 0) {
194     // ReleaseStore will correctly set release_store_tid_,
195     // which can be important for future operations.
196     ReleaseStore(c, sc);
197     return;
198   }
199 
200   nclk_ = max(nclk_, (uptr) sc->size_);
201 
202   // Check if we need to resize sc.
203   if (sc->size_ < nclk_)
204     sc->Resize(c, nclk_);
205 
206   bool acquired = false;
207 
208   sc->Unshare(c);
209   // Update sc->clk_.
210   sc->FlushDirty();
211   uptr i = 0;
212   for (ClockElem &ce : *sc) {
213     u64 tmp = clk_[i];
214     if (clk_[i] < ce.epoch) {
215       clk_[i] = ce.epoch;
216       acquired = true;
217     }
218     ce.epoch = tmp;
219     ce.reused = 0;
220     i++;
221   }
222   sc->release_store_tid_ = kInvalidTid;
223   sc->release_store_reused_ = 0;
224 
225   if (acquired) {
226     CPP_STAT_INC(StatClockAcquiredSomething);
227     last_acquire_ = clk_[tid_];
228     ResetCached(c);
229   }
230 }
231 
release(ClockCache * c,SyncClock * dst)232 void ThreadClock::release(ClockCache *c, SyncClock *dst) {
233   DCHECK_LE(nclk_, kMaxTid);
234   DCHECK_LE(dst->size_, kMaxTid);
235 
236   if (dst->size_ == 0) {
237     // ReleaseStore will correctly set release_store_tid_,
238     // which can be important for future operations.
239     ReleaseStore(c, dst);
240     return;
241   }
242 
243   CPP_STAT_INC(StatClockRelease);
244   // Check if we need to resize dst.
245   if (dst->size_ < nclk_)
246     dst->Resize(c, nclk_);
247 
248   // Check if we had not acquired anything from other threads
249   // since the last release on dst. If so, we need to update
250   // only dst->elem(tid_).
251   if (!HasAcquiredAfterRelease(dst)) {
252     UpdateCurrentThread(c, dst);
253     if (dst->release_store_tid_ != tid_ ||
254         dst->release_store_reused_ != reused_)
255       dst->release_store_tid_ = kInvalidTid;
256     return;
257   }
258 
259   // O(N) release.
260   CPP_STAT_INC(StatClockReleaseFull);
261   dst->Unshare(c);
262   // First, remember whether we've acquired dst.
263   bool acquired = IsAlreadyAcquired(dst);
264   if (acquired)
265     CPP_STAT_INC(StatClockReleaseAcquired);
266   // Update dst->clk_.
267   dst->FlushDirty();
268   uptr i = 0;
269   for (ClockElem &ce : *dst) {
270     ce.epoch = max(ce.epoch, clk_[i]);
271     ce.reused = 0;
272     i++;
273   }
274   // Clear 'acquired' flag in the remaining elements.
275   if (nclk_ < dst->size_)
276     CPP_STAT_INC(StatClockReleaseClearTail);
277   dst->release_store_tid_ = kInvalidTid;
278   dst->release_store_reused_ = 0;
279   // If we've acquired dst, remember this fact,
280   // so that we don't need to acquire it on next acquire.
281   if (acquired)
282     dst->elem(tid_).reused = reused_;
283 }
284 
ReleaseStore(ClockCache * c,SyncClock * dst)285 void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {
286   DCHECK_LE(nclk_, kMaxTid);
287   DCHECK_LE(dst->size_, kMaxTid);
288   CPP_STAT_INC(StatClockStore);
289 
290   if (dst->size_ == 0 && cached_idx_ != 0) {
291     // Reuse the cached clock.
292     // Note: we could reuse/cache the cached clock in more cases:
293     // we could update the existing clock and cache it, or replace it with the
294     // currently cached clock and release the old one. And for a shared
295     // existing clock, we could replace it with the currently cached;
296     // or unshare, update and cache. But, for simplicity, we currnetly reuse
297     // cached clock only when the target clock is empty.
298     dst->tab_ = ctx->clock_alloc.Map(cached_idx_);
299     dst->tab_idx_ = cached_idx_;
300     dst->size_ = cached_size_;
301     dst->blocks_ = cached_blocks_;
302     CHECK_EQ(dst->dirty_[0].tid, kInvalidTid);
303     // The cached clock is shared (immutable),
304     // so this is where we store the current clock.
305     dst->dirty_[0].tid = tid_;
306     dst->dirty_[0].epoch = clk_[tid_];
307     dst->release_store_tid_ = tid_;
308     dst->release_store_reused_ = reused_;
309     // Rememeber that we don't need to acquire it in future.
310     dst->elem(tid_).reused = reused_;
311     // Grab a reference.
312     atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
313     return;
314   }
315 
316   // Check if we need to resize dst.
317   if (dst->size_ < nclk_)
318     dst->Resize(c, nclk_);
319 
320   if (dst->release_store_tid_ == tid_ &&
321       dst->release_store_reused_ == reused_ &&
322       !HasAcquiredAfterRelease(dst)) {
323     CPP_STAT_INC(StatClockStoreFast);
324     UpdateCurrentThread(c, dst);
325     return;
326   }
327 
328   // O(N) release-store.
329   CPP_STAT_INC(StatClockStoreFull);
330   dst->Unshare(c);
331   // Note: dst can be larger than this ThreadClock.
332   // This is fine since clk_ beyond size is all zeros.
333   uptr i = 0;
334   for (ClockElem &ce : *dst) {
335     ce.epoch = clk_[i];
336     ce.reused = 0;
337     i++;
338   }
339   for (uptr i = 0; i < kDirtyTids; i++)
340     dst->dirty_[i].tid = kInvalidTid;
341   dst->release_store_tid_ = tid_;
342   dst->release_store_reused_ = reused_;
343   // Rememeber that we don't need to acquire it in future.
344   dst->elem(tid_).reused = reused_;
345 
346   // If the resulting clock is cachable, cache it for future release operations.
347   // The clock is always cachable if we released to an empty sync object.
348   if (cached_idx_ == 0 && dst->Cachable()) {
349     // Grab a reference to the ClockBlock.
350     atomic_uint32_t *ref = ref_ptr(dst->tab_);
351     if (atomic_load(ref, memory_order_acquire) == 1)
352       atomic_store_relaxed(ref, 2);
353     else
354       atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
355     cached_idx_ = dst->tab_idx_;
356     cached_size_ = dst->size_;
357     cached_blocks_ = dst->blocks_;
358   }
359 }
360 
acq_rel(ClockCache * c,SyncClock * dst)361 void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
362   CPP_STAT_INC(StatClockAcquireRelease);
363   acquire(c, dst);
364   ReleaseStore(c, dst);
365 }
366 
367 // Updates only single element related to the current thread in dst->clk_.
UpdateCurrentThread(ClockCache * c,SyncClock * dst) const368 void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const {
369   // Update the threads time, but preserve 'acquired' flag.
370   for (unsigned i = 0; i < kDirtyTids; i++) {
371     SyncClock::Dirty *dirty = &dst->dirty_[i];
372     const unsigned tid = dirty->tid;
373     if (tid == tid_ || tid == kInvalidTid) {
374       CPP_STAT_INC(StatClockReleaseFast);
375       dirty->tid = tid_;
376       dirty->epoch = clk_[tid_];
377       return;
378     }
379   }
380   // Reset all 'acquired' flags, O(N).
381   // We are going to touch dst elements, so we need to unshare it.
382   dst->Unshare(c);
383   CPP_STAT_INC(StatClockReleaseSlow);
384   dst->elem(tid_).epoch = clk_[tid_];
385   for (uptr i = 0; i < dst->size_; i++)
386     dst->elem(i).reused = 0;
387   dst->FlushDirty();
388 }
389 
390 // Checks whether the current thread has already acquired src.
IsAlreadyAcquired(const SyncClock * src) const391 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
392   if (src->elem(tid_).reused != reused_)
393     return false;
394   for (unsigned i = 0; i < kDirtyTids; i++) {
395     SyncClock::Dirty dirty = src->dirty_[i];
396     if (dirty.tid != kInvalidTid) {
397       if (clk_[dirty.tid] < dirty.epoch)
398         return false;
399     }
400   }
401   return true;
402 }
403 
404 // Checks whether the current thread has acquired anything
405 // from other clocks after releasing to dst (directly or indirectly).
HasAcquiredAfterRelease(const SyncClock * dst) const406 bool ThreadClock::HasAcquiredAfterRelease(const SyncClock *dst) const {
407   const u64 my_epoch = dst->elem(tid_).epoch;
408   return my_epoch <= last_acquire_ ||
409       my_epoch <= atomic_load_relaxed(&global_acquire_);
410 }
411 
412 // Sets a single element in the vector clock.
413 // This function is called only from weird places like AcquireGlobal.
set(ClockCache * c,unsigned tid,u64 v)414 void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {
415   DCHECK_LT(tid, kMaxTid);
416   DCHECK_GE(v, clk_[tid]);
417   clk_[tid] = v;
418   if (nclk_ <= tid)
419     nclk_ = tid + 1;
420   last_acquire_ = clk_[tid_];
421   ResetCached(c);
422 }
423 
DebugDump(int (* printf)(const char * s,...))424 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
425   printf("clock=[");
426   for (uptr i = 0; i < nclk_; i++)
427     printf("%s%llu", i == 0 ? "" : ",", clk_[i]);
428   printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);
429 }
430 
SyncClock()431 SyncClock::SyncClock() {
432   ResetImpl();
433 }
434 
~SyncClock()435 SyncClock::~SyncClock() {
436   // Reset must be called before dtor.
437   CHECK_EQ(size_, 0);
438   CHECK_EQ(blocks_, 0);
439   CHECK_EQ(tab_, 0);
440   CHECK_EQ(tab_idx_, 0);
441 }
442 
Reset(ClockCache * c)443 void SyncClock::Reset(ClockCache *c) {
444   if (size_)
445     UnrefClockBlock(c, tab_idx_, blocks_);
446   ResetImpl();
447 }
448 
ResetImpl()449 void SyncClock::ResetImpl() {
450   tab_ = 0;
451   tab_idx_ = 0;
452   size_ = 0;
453   blocks_ = 0;
454   release_store_tid_ = kInvalidTid;
455   release_store_reused_ = 0;
456   for (uptr i = 0; i < kDirtyTids; i++)
457     dirty_[i].tid = kInvalidTid;
458 }
459 
Resize(ClockCache * c,uptr nclk)460 void SyncClock::Resize(ClockCache *c, uptr nclk) {
461   CPP_STAT_INC(StatClockReleaseResize);
462   Unshare(c);
463   if (nclk <= capacity()) {
464     // Memory is already allocated, just increase the size.
465     size_ = nclk;
466     return;
467   }
468   if (size_ == 0) {
469     // Grow from 0 to one-level table.
470     CHECK_EQ(size_, 0);
471     CHECK_EQ(blocks_, 0);
472     CHECK_EQ(tab_, 0);
473     CHECK_EQ(tab_idx_, 0);
474     tab_idx_ = ctx->clock_alloc.Alloc(c);
475     tab_ = ctx->clock_alloc.Map(tab_idx_);
476     internal_memset(tab_, 0, sizeof(*tab_));
477     atomic_store_relaxed(ref_ptr(tab_), 1);
478     size_ = 1;
479   } else if (size_ > blocks_ * ClockBlock::kClockCount) {
480     u32 idx = ctx->clock_alloc.Alloc(c);
481     ClockBlock *new_cb = ctx->clock_alloc.Map(idx);
482     uptr top = size_ - blocks_ * ClockBlock::kClockCount;
483     CHECK_LT(top, ClockBlock::kClockCount);
484     const uptr move = top * sizeof(tab_->clock[0]);
485     internal_memcpy(&new_cb->clock[0], tab_->clock, move);
486     internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);
487     internal_memset(tab_->clock, 0, move);
488     append_block(idx);
489   }
490   // At this point we have first level table allocated and all clock elements
491   // are evacuated from it to a second level block.
492   // Add second level tables as necessary.
493   while (nclk > capacity()) {
494     u32 idx = ctx->clock_alloc.Alloc(c);
495     ClockBlock *cb = ctx->clock_alloc.Map(idx);
496     internal_memset(cb, 0, sizeof(*cb));
497     append_block(idx);
498   }
499   size_ = nclk;
500 }
501 
502 // Flushes all dirty elements into the main clock array.
FlushDirty()503 void SyncClock::FlushDirty() {
504   for (unsigned i = 0; i < kDirtyTids; i++) {
505     Dirty *dirty = &dirty_[i];
506     if (dirty->tid != kInvalidTid) {
507       CHECK_LT(dirty->tid, size_);
508       elem(dirty->tid).epoch = dirty->epoch;
509       dirty->tid = kInvalidTid;
510     }
511   }
512 }
513 
IsShared() const514 bool SyncClock::IsShared() const {
515   if (size_ == 0)
516     return false;
517   atomic_uint32_t *ref = ref_ptr(tab_);
518   u32 v = atomic_load(ref, memory_order_acquire);
519   CHECK_GT(v, 0);
520   return v > 1;
521 }
522 
523 // Unshares the current clock if it's shared.
524 // Shared clocks are immutable, so they need to be unshared before any updates.
525 // Note: this does not apply to dirty entries as they are not shared.
Unshare(ClockCache * c)526 void SyncClock::Unshare(ClockCache *c) {
527   if (!IsShared())
528     return;
529   // First, copy current state into old.
530   SyncClock old;
531   old.tab_ = tab_;
532   old.tab_idx_ = tab_idx_;
533   old.size_ = size_;
534   old.blocks_ = blocks_;
535   old.release_store_tid_ = release_store_tid_;
536   old.release_store_reused_ = release_store_reused_;
537   for (unsigned i = 0; i < kDirtyTids; i++)
538     old.dirty_[i] = dirty_[i];
539   // Then, clear current object.
540   ResetImpl();
541   // Allocate brand new clock in the current object.
542   Resize(c, old.size_);
543   // Now copy state back into this object.
544   Iter old_iter(&old);
545   for (ClockElem &ce : *this) {
546     ce = *old_iter;
547     ++old_iter;
548   }
549   release_store_tid_ = old.release_store_tid_;
550   release_store_reused_ = old.release_store_reused_;
551   for (unsigned i = 0; i < kDirtyTids; i++)
552     dirty_[i] = old.dirty_[i];
553   // Drop reference to old and delete if necessary.
554   old.Reset(c);
555 }
556 
557 // Can we cache this clock for future release operations?
Cachable() const558 ALWAYS_INLINE bool SyncClock::Cachable() const {
559   if (size_ == 0)
560     return false;
561   for (unsigned i = 0; i < kDirtyTids; i++) {
562     if (dirty_[i].tid != kInvalidTid)
563       return false;
564   }
565   return atomic_load_relaxed(ref_ptr(tab_)) == 1;
566 }
567 
568 // elem linearizes the two-level structure into linear array.
569 // Note: this is used only for one time accesses, vector operations use
570 // the iterator as it is much faster.
elem(unsigned tid) const571 ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {
572   DCHECK_LT(tid, size_);
573   const uptr block = tid / ClockBlock::kClockCount;
574   DCHECK_LE(block, blocks_);
575   tid %= ClockBlock::kClockCount;
576   if (block == blocks_)
577     return tab_->clock[tid];
578   u32 idx = get_block(block);
579   ClockBlock *cb = ctx->clock_alloc.Map(idx);
580   return cb->clock[tid];
581 }
582 
capacity() const583 ALWAYS_INLINE uptr SyncClock::capacity() const {
584   if (size_ == 0)
585     return 0;
586   uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);
587   // How many clock elements we can fit into the first level block.
588   // +1 for ref counter.
589   uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;
590   return blocks_ * ClockBlock::kClockCount + top;
591 }
592 
get_block(uptr bi) const593 ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {
594   DCHECK(size_);
595   DCHECK_LT(bi, blocks_);
596   return tab_->table[ClockBlock::kBlockIdx - bi];
597 }
598 
append_block(u32 idx)599 ALWAYS_INLINE void SyncClock::append_block(u32 idx) {
600   uptr bi = blocks_++;
601   CHECK_EQ(get_block(bi), 0);
602   tab_->table[ClockBlock::kBlockIdx - bi] = idx;
603 }
604 
605 // Used only by tests.
get(unsigned tid) const606 u64 SyncClock::get(unsigned tid) const {
607   for (unsigned i = 0; i < kDirtyTids; i++) {
608     Dirty dirty = dirty_[i];
609     if (dirty.tid == tid)
610       return dirty.epoch;
611   }
612   return elem(tid).epoch;
613 }
614 
615 // Used only by Iter test.
get_clean(unsigned tid) const616 u64 SyncClock::get_clean(unsigned tid) const {
617   return elem(tid).epoch;
618 }
619 
DebugDump(int (* printf)(const char * s,...))620 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
621   printf("clock=[");
622   for (uptr i = 0; i < size_; i++)
623     printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
624   printf("] reused=[");
625   for (uptr i = 0; i < size_; i++)
626     printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
627   printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",
628       release_store_tid_, release_store_reused_,
629       dirty_[0].tid, dirty_[0].epoch,
630       dirty_[1].tid, dirty_[1].epoch);
631 }
632 
Next()633 void SyncClock::Iter::Next() {
634   // Finished with the current block, move on to the next one.
635   block_++;
636   if (block_ < parent_->blocks_) {
637     // Iterate over the next second level block.
638     u32 idx = parent_->get_block(block_);
639     ClockBlock *cb = ctx->clock_alloc.Map(idx);
640     pos_ = &cb->clock[0];
641     end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
642         ClockBlock::kClockCount);
643     return;
644   }
645   if (block_ == parent_->blocks_ &&
646       parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {
647     // Iterate over elements in the first level block.
648     pos_ = &parent_->tab_->clock[0];
649     end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
650         ClockBlock::kClockCount);
651     return;
652   }
653   parent_ = nullptr;  // denotes end
654 }
655 }  // namespace __tsan
656