1 //===-- tsan_rtl.h ----------------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file is a part of ThreadSanitizer (TSan), a race detector.
11 //
12 // Main internal TSan header file.
13 //
14 // Ground rules:
15 // - C++ run-time should not be used (static CTORs, RTTI, exceptions, static
16 // function-scope locals)
17 // - All functions/classes/etc reside in namespace __tsan, except for those
18 // declared in tsan_interface.h.
19 // - Platform-specific files should be used instead of ifdefs (*).
20 // - No system headers included in header files (*).
21 // - Platform specific headres included only into platform-specific files (*).
22 //
23 // (*) Except when inlining is critical for performance.
24 //===----------------------------------------------------------------------===//
25
26 #ifndef TSAN_RTL_H
27 #define TSAN_RTL_H
28
29 #include "sanitizer_common/sanitizer_common.h"
30 #include "sanitizer_common/sanitizer_allocator64.h"
31 #include "tsan_clock.h"
32 #include "tsan_defs.h"
33 #include "tsan_flags.h"
34 #include "tsan_sync.h"
35 #include "tsan_trace.h"
36 #include "tsan_vector.h"
37 #include "tsan_report.h"
38
39 namespace __tsan {
40
41 // Descriptor of user's memory block.
42 struct MBlock {
43 Mutex mtx;
44 uptr size;
45 u32 alloc_tid;
46 u32 alloc_stack_id;
47 SyncVar *head;
48 };
49
50 #ifndef TSAN_GO
51 #if defined(TSAN_COMPAT_SHADOW) && TSAN_COMPAT_SHADOW
52 const uptr kAllocatorSpace = 0x7d0000000000ULL;
53 #else
54 const uptr kAllocatorSpace = 0x7d0000000000ULL;
55 #endif
56 const uptr kAllocatorSize = 0x10000000000ULL; // 1T.
57
58 typedef SizeClassAllocator64<kAllocatorSpace, kAllocatorSize, sizeof(MBlock),
59 DefaultSizeClassMap> PrimaryAllocator;
60 typedef SizeClassAllocatorLocalCache<PrimaryAllocator::kNumClasses,
61 PrimaryAllocator> AllocatorCache;
62 typedef LargeMmapAllocator SecondaryAllocator;
63 typedef CombinedAllocator<PrimaryAllocator, AllocatorCache,
64 SecondaryAllocator> Allocator;
65 Allocator *allocator();
66 #endif
67
68 void TsanPrintf(const char *format, ...);
69
70 // FastState (from most significant bit):
71 // unused : 1
72 // tid : kTidBits
73 // epoch : kClkBits
74 // unused : -
75 // ignore_bit : 1
76 class FastState {
77 public:
FastState(u64 tid,u64 epoch)78 FastState(u64 tid, u64 epoch) {
79 x_ = tid << kTidShift;
80 x_ |= epoch << kClkShift;
81 DCHECK(tid == this->tid());
82 DCHECK(epoch == this->epoch());
83 }
84
FastState(u64 x)85 explicit FastState(u64 x)
86 : x_(x) {
87 }
88
raw()89 u64 raw() const {
90 return x_;
91 }
92
tid()93 u64 tid() const {
94 u64 res = x_ >> kTidShift;
95 return res;
96 }
97
epoch()98 u64 epoch() const {
99 u64 res = (x_ << (kTidBits + 1)) >> (64 - kClkBits);
100 return res;
101 }
102
IncrementEpoch()103 void IncrementEpoch() {
104 u64 old_epoch = epoch();
105 x_ += 1 << kClkShift;
106 DCHECK_EQ(old_epoch + 1, epoch());
107 (void)old_epoch;
108 }
109
SetIgnoreBit()110 void SetIgnoreBit() { x_ |= kIgnoreBit; }
ClearIgnoreBit()111 void ClearIgnoreBit() { x_ &= ~kIgnoreBit; }
GetIgnoreBit()112 bool GetIgnoreBit() const { return x_ & kIgnoreBit; }
113
114 private:
115 friend class Shadow;
116 static const int kTidShift = 64 - kTidBits - 1;
117 static const int kClkShift = kTidShift - kClkBits;
118 static const u64 kIgnoreBit = 1ull;
119 static const u64 kFreedBit = 1ull << 63;
120 u64 x_;
121 };
122
123 // Shadow (from most significant bit):
124 // freed : 1
125 // tid : kTidBits
126 // epoch : kClkBits
127 // is_write : 1
128 // size_log : 2
129 // addr0 : 3
130 class Shadow : public FastState {
131 public:
Shadow(u64 x)132 explicit Shadow(u64 x) : FastState(x) { }
133
Shadow(const FastState & s)134 explicit Shadow(const FastState &s) : FastState(s.x_) { }
135
SetAddr0AndSizeLog(u64 addr0,unsigned kAccessSizeLog)136 void SetAddr0AndSizeLog(u64 addr0, unsigned kAccessSizeLog) {
137 DCHECK_EQ(x_ & 31, 0);
138 DCHECK_LE(addr0, 7);
139 DCHECK_LE(kAccessSizeLog, 3);
140 x_ |= (kAccessSizeLog << 3) | addr0;
141 DCHECK_EQ(kAccessSizeLog, size_log());
142 DCHECK_EQ(addr0, this->addr0());
143 }
144
SetWrite(unsigned kAccessIsWrite)145 void SetWrite(unsigned kAccessIsWrite) {
146 DCHECK_EQ(x_ & 32, 0);
147 if (kAccessIsWrite)
148 x_ |= 32;
149 DCHECK_EQ(kAccessIsWrite, is_write());
150 }
151
IsZero()152 bool IsZero() const { return x_ == 0; }
153
TidsAreEqual(const Shadow s1,const Shadow s2)154 static inline bool TidsAreEqual(const Shadow s1, const Shadow s2) {
155 u64 shifted_xor = (s1.x_ ^ s2.x_) >> kTidShift;
156 DCHECK_EQ(shifted_xor == 0, s1.tid() == s2.tid());
157 return shifted_xor == 0;
158 }
159
Addr0AndSizeAreEqual(const Shadow s1,const Shadow s2)160 static inline bool Addr0AndSizeAreEqual(const Shadow s1, const Shadow s2) {
161 u64 masked_xor = (s1.x_ ^ s2.x_) & 31;
162 return masked_xor == 0;
163 }
164
TwoRangesIntersect(Shadow s1,Shadow s2,unsigned kS2AccessSize)165 static inline bool TwoRangesIntersect(Shadow s1, Shadow s2,
166 unsigned kS2AccessSize) {
167 bool res = false;
168 u64 diff = s1.addr0() - s2.addr0();
169 if ((s64)diff < 0) { // s1.addr0 < s2.addr0 // NOLINT
170 // if (s1.addr0() + size1) > s2.addr0()) return true;
171 if (s1.size() > -diff) res = true;
172 } else {
173 // if (s2.addr0() + kS2AccessSize > s1.addr0()) return true;
174 if (kS2AccessSize > diff) res = true;
175 }
176 DCHECK_EQ(res, TwoRangesIntersectSLOW(s1, s2));
177 DCHECK_EQ(res, TwoRangesIntersectSLOW(s2, s1));
178 return res;
179 }
180
181 // The idea behind the offset is as follows.
182 // Consider that we have 8 bool's contained within a single 8-byte block
183 // (mapped to a single shadow "cell"). Now consider that we write to the bools
184 // from a single thread (which we consider the common case).
185 // W/o offsetting each access will have to scan 4 shadow values at average
186 // to find the corresponding shadow value for the bool.
187 // With offsetting we start scanning shadow with the offset so that
188 // each access hits necessary shadow straight off (at least in an expected
189 // optimistic case).
190 // This logic works seamlessly for any layout of user data. For example,
191 // if user data is {int, short, char, char}, then accesses to the int are
192 // offsetted to 0, short - 4, 1st char - 6, 2nd char - 7. Hopefully, accesses
193 // from a single thread won't need to scan all 8 shadow values.
ComputeSearchOffset()194 unsigned ComputeSearchOffset() {
195 return x_ & 7;
196 }
addr0()197 u64 addr0() const { return x_ & 7; }
size()198 u64 size() const { return 1ull << size_log(); }
is_write()199 bool is_write() const { return x_ & 32; }
200
201 // The idea behind the freed bit is as follows.
202 // When the memory is freed (or otherwise unaccessible) we write to the shadow
203 // values with tid/epoch related to the free and the freed bit set.
204 // During memory accesses processing the freed bit is considered
205 // as msb of tid. So any access races with shadow with freed bit set
206 // (it is as if write from a thread with which we never synchronized before).
207 // This allows us to detect accesses to freed memory w/o additional
208 // overheads in memory access processing and at the same time restore
209 // tid/epoch of free.
MarkAsFreed()210 void MarkAsFreed() {
211 x_ |= kFreedBit;
212 }
213
GetFreedAndReset()214 bool GetFreedAndReset() {
215 bool res = x_ & kFreedBit;
216 x_ &= ~kFreedBit;
217 return res;
218 }
219
220 private:
size_log()221 u64 size_log() const { return (x_ >> 3) & 3; }
222
TwoRangesIntersectSLOW(const Shadow s1,const Shadow s2)223 static bool TwoRangesIntersectSLOW(const Shadow s1, const Shadow s2) {
224 if (s1.addr0() == s2.addr0()) return true;
225 if (s1.addr0() < s2.addr0() && s1.addr0() + s1.size() > s2.addr0())
226 return true;
227 if (s2.addr0() < s1.addr0() && s2.addr0() + s2.size() > s1.addr0())
228 return true;
229 return false;
230 }
231 };
232
233 // Freed memory.
234 // As if 8-byte write by thread 0xff..f at epoch 0xff..f, races with everything.
235 const u64 kShadowFreed = 0xfffffffffffffff8ull;
236
237 struct SignalContext;
238
239 // This struct is stored in TLS.
240 struct ThreadState {
241 FastState fast_state;
242 // Synch epoch represents the threads's epoch before the last synchronization
243 // action. It allows to reduce number of shadow state updates.
244 // For example, fast_synch_epoch=100, last write to addr X was at epoch=150,
245 // if we are processing write to X from the same thread at epoch=200,
246 // we do nothing, because both writes happen in the same 'synch epoch'.
247 // That is, if another memory access does not race with the former write,
248 // it does not race with the latter as well.
249 // QUESTION: can we can squeeze this into ThreadState::Fast?
250 // E.g. ThreadState::Fast is a 44-bit, 32 are taken by synch_epoch and 12 are
251 // taken by epoch between synchs.
252 // This way we can save one load from tls.
253 u64 fast_synch_epoch;
254 // This is a slow path flag. On fast path, fast_state.GetIgnoreBit() is read.
255 // We do not distinguish beteween ignoring reads and writes
256 // for better performance.
257 int ignore_reads_and_writes;
258 uptr *shadow_stack_pos;
259 u64 *racy_shadow_addr;
260 u64 racy_state[2];
261 Trace trace;
262 #ifndef TSAN_GO
263 // C/C++ uses embed shadow stack of fixed size.
264 uptr shadow_stack[kShadowStackSize];
265 #else
266 // Go uses satellite shadow stack with dynamic size.
267 uptr *shadow_stack;
268 uptr *shadow_stack_end;
269 #endif
270 ThreadClock clock;
271 #ifndef TSAN_GO
272 AllocatorCache alloc_cache;
273 #endif
274 u64 stat[StatCnt];
275 const int tid;
276 const int unique_id;
277 int in_rtl;
278 bool is_alive;
279 const uptr stk_addr;
280 const uptr stk_size;
281 const uptr tls_addr;
282 const uptr tls_size;
283
284 DeadlockDetector deadlock_detector;
285
286 bool in_signal_handler;
287 SignalContext *signal_ctx;
288
289 #ifndef TSAN_GO
290 u32 last_sleep_stack_id;
291 ThreadClock last_sleep_clock;
292 #endif
293
294 // Set in regions of runtime that must be signal-safe and fork-safe.
295 // If set, malloc must not be called.
296 int nomalloc;
297
298 explicit ThreadState(Context *ctx, int tid, int unique_id, u64 epoch,
299 uptr stk_addr, uptr stk_size,
300 uptr tls_addr, uptr tls_size);
301 };
302
303 Context *CTX();
304
305 #ifndef TSAN_GO
306 extern THREADLOCAL char cur_thread_placeholder[];
cur_thread()307 INLINE ThreadState *cur_thread() {
308 return reinterpret_cast<ThreadState *>(&cur_thread_placeholder);
309 }
310 #endif
311
312 enum ThreadStatus {
313 ThreadStatusInvalid, // Non-existent thread, data is invalid.
314 ThreadStatusCreated, // Created but not yet running.
315 ThreadStatusRunning, // The thread is currently running.
316 ThreadStatusFinished, // Joinable thread is finished but not yet joined.
317 ThreadStatusDead, // Joined, but some info (trace) is still alive.
318 };
319
320 // An info about a thread that is hold for some time after its termination.
321 struct ThreadDeadInfo {
322 Trace trace;
323 };
324
325 struct ThreadContext {
326 const int tid;
327 int unique_id; // Non-rolling thread id.
328 uptr user_id; // Some opaque user thread id (e.g. pthread_t).
329 ThreadState *thr;
330 ThreadStatus status;
331 bool detached;
332 int reuse_count;
333 SyncClock sync;
334 // Epoch at which the thread had started.
335 // If we see an event from the thread stamped by an older epoch,
336 // the event is from a dead thread that shared tid with this thread.
337 u64 epoch0;
338 u64 epoch1;
339 StackTrace creation_stack;
340 ThreadDeadInfo *dead_info;
341 ThreadContext *dead_next; // In dead thread list.
342
343 explicit ThreadContext(int tid);
344 };
345
346 struct RacyStacks {
347 MD5Hash hash[2];
348 bool operator==(const RacyStacks &other) const {
349 if (hash[0] == other.hash[0] && hash[1] == other.hash[1])
350 return true;
351 if (hash[0] == other.hash[1] && hash[1] == other.hash[0])
352 return true;
353 return false;
354 }
355 };
356
357 struct RacyAddress {
358 uptr addr_min;
359 uptr addr_max;
360 };
361
362 struct Context {
363 Context();
364
365 bool initialized;
366
367 SyncTab synctab;
368
369 Mutex report_mtx;
370 int nreported;
371 int nmissed_expected;
372
373 Mutex thread_mtx;
374 unsigned thread_seq;
375 unsigned unique_thread_seq;
376 int alive_threads;
377 int max_alive_threads;
378 ThreadContext *threads[kMaxTid];
379 int dead_list_size;
380 ThreadContext* dead_list_head;
381 ThreadContext* dead_list_tail;
382
383 Vector<RacyStacks> racy_stacks;
384 Vector<RacyAddress> racy_addresses;
385
386 Flags flags;
387
388 u64 stat[StatCnt];
389 u64 int_alloc_cnt[MBlockTypeCount];
390 u64 int_alloc_siz[MBlockTypeCount];
391 };
392
393 class ScopedInRtl {
394 public:
395 ScopedInRtl();
396 ~ScopedInRtl();
397 private:
398 ThreadState*thr_;
399 int in_rtl_;
400 int errno_;
401 };
402
403 class ScopedReport {
404 public:
405 explicit ScopedReport(ReportType typ);
406 ~ScopedReport();
407
408 void AddStack(const StackTrace *stack);
409 void AddMemoryAccess(uptr addr, Shadow s, const StackTrace *stack);
410 void AddThread(const ThreadContext *tctx);
411 void AddMutex(const SyncVar *s);
412 void AddLocation(uptr addr, uptr size);
413 void AddSleep(u32 stack_id);
414
415 const ReportDesc *GetReport() const;
416
417 private:
418 Context *ctx_;
419 ReportDesc *rep_;
420
421 ScopedReport(const ScopedReport&);
422 void operator = (const ScopedReport&);
423 };
424
425 void RestoreStack(int tid, const u64 epoch, StackTrace *stk);
426
427 void StatAggregate(u64 *dst, u64 *src);
428 void StatOutput(u64 *stat);
429 void ALWAYS_INLINE INLINE StatInc(ThreadState *thr, StatType typ, u64 n = 1) {
430 if (kCollectStats)
431 thr->stat[typ] += n;
432 }
433
434 void InitializeShadowMemory();
435 void InitializeInterceptors();
436 void InitializeDynamicAnnotations();
437
438 void ReportRace(ThreadState *thr);
439 bool OutputReport(const ScopedReport &srep,
440 const ReportStack *suppress_stack = 0);
441 bool IsExpectedReport(uptr addr, uptr size);
442
443 #if defined(TSAN_DEBUG_OUTPUT) && TSAN_DEBUG_OUTPUT >= 1
444 # define DPrintf TsanPrintf
445 #else
446 # define DPrintf(...)
447 #endif
448
449 #if defined(TSAN_DEBUG_OUTPUT) && TSAN_DEBUG_OUTPUT >= 2
450 # define DPrintf2 TsanPrintf
451 #else
452 # define DPrintf2(...)
453 #endif
454
455 u32 CurrentStackId(ThreadState *thr, uptr pc);
456 void PrintCurrentStack(ThreadState *thr, uptr pc);
457
458 void Initialize(ThreadState *thr);
459 int Finalize(ThreadState *thr);
460
461 void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
462 int kAccessSizeLog, bool kAccessIsWrite);
463 void MemoryAccessImpl(ThreadState *thr, uptr addr,
464 int kAccessSizeLog, bool kAccessIsWrite, FastState fast_state,
465 u64 *shadow_mem, Shadow cur);
466 void MemoryRead1Byte(ThreadState *thr, uptr pc, uptr addr);
467 void MemoryWrite1Byte(ThreadState *thr, uptr pc, uptr addr);
468 void MemoryRead8Byte(ThreadState *thr, uptr pc, uptr addr);
469 void MemoryWrite8Byte(ThreadState *thr, uptr pc, uptr addr);
470 void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr,
471 uptr size, bool is_write);
472 void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size);
473 void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size);
474 void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size);
475 void IgnoreCtl(ThreadState *thr, bool write, bool begin);
476
477 void FuncEntry(ThreadState *thr, uptr pc);
478 void FuncExit(ThreadState *thr);
479
480 int ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached);
481 void ThreadStart(ThreadState *thr, int tid);
482 void ThreadFinish(ThreadState *thr);
483 int ThreadTid(ThreadState *thr, uptr pc, uptr uid);
484 void ThreadJoin(ThreadState *thr, uptr pc, int tid);
485 void ThreadDetach(ThreadState *thr, uptr pc, int tid);
486 void ThreadFinalize(ThreadState *thr);
487 void ThreadFinalizerGoroutine(ThreadState *thr);
488
489 void MutexCreate(ThreadState *thr, uptr pc, uptr addr,
490 bool rw, bool recursive, bool linker_init);
491 void MutexDestroy(ThreadState *thr, uptr pc, uptr addr);
492 void MutexLock(ThreadState *thr, uptr pc, uptr addr);
493 void MutexUnlock(ThreadState *thr, uptr pc, uptr addr);
494 void MutexReadLock(ThreadState *thr, uptr pc, uptr addr);
495 void MutexReadUnlock(ThreadState *thr, uptr pc, uptr addr);
496 void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr);
497
498 void Acquire(ThreadState *thr, uptr pc, uptr addr);
499 void Release(ThreadState *thr, uptr pc, uptr addr);
500 void ReleaseStore(ThreadState *thr, uptr pc, uptr addr);
501 void AfterSleep(ThreadState *thr, uptr pc);
502
503 // The hacky call uses custom calling convention and an assembly thunk.
504 // It is considerably faster that a normal call for the caller
505 // if it is not executed (it is intended for slow paths from hot functions).
506 // The trick is that the call preserves all registers and the compiler
507 // does not treat it as a call.
508 // If it does not work for you, use normal call.
509 #if TSAN_DEBUG == 0
510 // The caller may not create the stack frame for itself at all,
511 // so we create a reserve stack frame for it (1024b must be enough).
512 #define HACKY_CALL(f) \
513 __asm__ __volatile__("sub $1024, %%rsp;" \
514 "/*.cfi_adjust_cfa_offset 1024;*/" \
515 "call " #f "_thunk;" \
516 "add $1024, %%rsp;" \
517 "/*.cfi_adjust_cfa_offset -1024;*/" \
518 ::: "memory", "cc");
519 #else
520 #define HACKY_CALL(f) f()
521 #endif
522
523 void TraceSwitch(ThreadState *thr);
524
525 extern "C" void __tsan_trace_switch();
TraceAddEvent(ThreadState * thr,u64 epoch,EventType typ,uptr addr)526 void ALWAYS_INLINE INLINE TraceAddEvent(ThreadState *thr, u64 epoch,
527 EventType typ, uptr addr) {
528 StatInc(thr, StatEvents);
529 if (UNLIKELY((epoch % kTracePartSize) == 0)) {
530 #ifndef TSAN_GO
531 HACKY_CALL(__tsan_trace_switch);
532 #else
533 TraceSwitch(thr);
534 #endif
535 }
536 Event *evp = &thr->trace.events[epoch % kTraceSize];
537 Event ev = (u64)addr | ((u64)typ << 61);
538 *evp = ev;
539 }
540
541 } // namespace __tsan
542
543 #endif // TSAN_RTL_H
544