1 // Copyright 2022 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef BASE_ALLOCATOR_DISPATCHER_TLS_H_
6 #define BASE_ALLOCATOR_DISPATCHER_TLS_H_
7
8 #include "build/build_config.h"
9
10 #if BUILDFLAG(IS_POSIX) // the current allocation mechanism (mmap) and TLS
11 // support (pthread) are both defined by POSIX
12 #define USE_LOCAL_TLS_EMULATION() true
13 #else
14 #define USE_LOCAL_TLS_EMULATION() false
15 #endif
16
17 #if USE_LOCAL_TLS_EMULATION()
18 #include <algorithm>
19 #include <atomic>
20 #include <memory>
21 #include <mutex>
22
23 #include "base/allocator/partition_allocator/src/partition_alloc/partition_alloc_constants.h"
24 #include "base/base_export.h"
25 #include "base/check.h"
26 #include "base/compiler_specific.h"
27 #include "base/strings/string_piece.h"
28
29 #include <pthread.h>
30
31 #if HAS_FEATURE(thread_sanitizer)
32 #define DISABLE_TSAN_INSTRUMENTATION __attribute__((no_sanitize("thread")))
33 #else
34 #define DISABLE_TSAN_INSTRUMENTATION
35 #endif
36
37 #define STR_HELPER(x) #x
38 #define STR(x) STR_HELPER(x)
39
40 // Verify that a condition holds and cancel the process in case it doesn't. The
41 // functionality is similar to RAW_CHECK but includes more information in the
42 // logged messages. It is non allocating to prevent recursions.
43 #define TLS_RAW_CHECK(error_message, condition) \
44 TLS_RAW_CHECK_IMPL(error_message, condition, __FILE__, __LINE__)
45
46 #define TLS_RAW_CHECK_IMPL(error_message, condition, file, line) \
47 do { \
48 if (!(condition)) { \
49 constexpr const char* message = \
50 "TLS System: " error_message " Failed condition '" #condition \
51 "' in (" file "@" STR(line) ").\n"; \
52 ::logging::RawCheckFailure(message); \
53 } \
54 } while (0)
55
56 namespace base::debug {
57 struct CrashKeyString;
58 }
59
60 namespace base::allocator::dispatcher {
61 namespace internal {
62
63 // Allocate memory using POSIX' mmap and unmap functionality. The allocator
64 // implements the allocator interface required by ThreadLocalStorage.
65 struct BASE_EXPORT MMapAllocator {
66 // The minimum size of a memory chunk when allocating. Even for chunks with
67 // fewer bytes, at least AllocationChunkSize bytes are allocated. For mmap, this
68 // is usually the page size of the system.
69 // For various OS-CPU combinations, partition_alloc::PartitionPageSize() is not
70 // constexpr. Hence, we can not use this value but define it locally.
71 #if defined(PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR) && \
72 PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR
73 constexpr static size_t AllocationChunkSize =
74 partition_alloc::PartitionPageSize();
75 #elif BUILDFLAG(IS_APPLE)
76 constexpr static size_t AllocationChunkSize = 16384;
77 #elif BUILDFLAG(IS_LINUX) && defined(ARCH_CPU_ARM64)
78 constexpr static size_t AllocationChunkSize = 16384;
79 #else
80 constexpr static size_t AllocationChunkSize = 4096;
81 #endif
82
83 // Allocate size_in_bytes bytes of raw memory. Return nullptr if allocation
84 // fails.
85 void* AllocateMemory(size_t size_in_bytes);
86 // Free the raw memory pointed to by pointer_to_allocated. Returns a boolean
87 // value indicating if the free was successful.
88 bool FreeMemoryForTesting(void* pointer_to_allocated, size_t size_in_bytes);
89 };
90
91 // The allocator used by default for the thread local storage.
92 using DefaultAllocator = MMapAllocator;
93
94 using OnThreadTerminationFunction = void (*)(void*);
95
96 // The TLS system used by default for the thread local storage. It stores and
97 // retrieves thread specific data pointers.
98 class BASE_EXPORT PThreadTLSSystem {
99 public:
100 PThreadTLSSystem();
101
102 PThreadTLSSystem(const PThreadTLSSystem&) = delete;
103 PThreadTLSSystem(PThreadTLSSystem&&);
104 PThreadTLSSystem& operator=(const PThreadTLSSystem&) = delete;
105 PThreadTLSSystem& operator=(PThreadTLSSystem&&);
106
107 // Initialize the TLS system to store a data set for different threads.
108 // @param thread_termination_function An optional function which will be
109 // invoked upon termination of a thread.
110 bool Setup(OnThreadTerminationFunction thread_termination_function,
111 const base::StringPiece instance_id);
112 // Tear down the TLS system. After completing tear down, the thread
113 // termination function passed to Setup will not be invoked anymore.
114 bool TearDownForTesting();
115
116 // Get the pointer to the data associated to the current thread. Returns
117 // nullptr if the TLS system is not initialized or no data was set before.
118 void* GetThreadSpecificData();
119 // Set the pointer to the data associated to the current thread. Return true
120 // if stored successfully, false otherwise.
121 bool SetThreadSpecificData(void* data);
122
123 private:
124 base::debug::CrashKeyString* crash_key_ = nullptr;
125 pthread_key_t data_access_key_ = 0;
126 #if DCHECK_IS_ON()
127 // From POSIX standard at https://www.open-std.org/jtc1/sc22/open/n4217.pdf:
128 // The effect of calling pthread_getspecific() or pthread_setspecific() with a
129 // key value not obtained from pthread_key_create() or after key has been
130 // deleted with pthread_key_delete() is undefined.
131 //
132 // Unfortunately, POSIX doesn't define a special value of pthread_key_t
133 // indicating an invalid key which would allow us to detect accesses outside
134 // of initialized state. Hence, to prevent us from drifting into the evil
135 // realm of undefined behaviour we store whether we're somewhere between Setup
136 // and Teardown.
137 std::atomic_bool initialized_{false};
138 #endif
139 };
140
141 using DefaultTLSSystem = PThreadTLSSystem;
142
143 // In some scenarios, most notably when testing, the allocator and TLS system
144 // passed to |ThreadLocalStorage| are not copyable and have to be wrapped, i.e.
145 // using std::reference_wrapper. |dereference| is a small helper to retrieve the
146 // underlying value.
147 template <typename T>
dereference(T & ref)148 T& dereference(T& ref) {
149 return ref;
150 }
151
152 template <typename T>
dereference(std::reference_wrapper<T> & ref)153 T& dereference(std::reference_wrapper<T>& ref) {
154 // std::reference_wrapper requires a valid reference for construction,
155 // therefore, no need in checking here.
156 return ref.get();
157 }
158
159 // Store thread local data. The data is organized in chunks, where each chunk
160 // holds |ItemsPerChunk|. Each item may be free or used.
161 //
162 // When a thread requests data, the chunks are searched for a free data item,
163 // which is registered for this thread and marked as |used|. Further requests by
164 // this thread will then always return the same item. When a thread terminates,
165 // the item will be reset and return to the pool of free items.
166 //
167 // Upon construction, the first chunk is created. If a thread requests data and
168 // there is no free item available, another chunk is created. Upon destruction,
169 // all memory is freed. Pointers to data items become invalid!
170 //
171 // Constructor and destructor are not thread safe.
172 //
173 // @tparam PayloadType The item type to be stored.
174 // @tparam AllocatorType The allocator being used. An allocator must provide
175 // the following interface:
176 // void* AllocateMemory(size_t size_in_bytes); // Allocate size_in_bytes bytes
177 // of raw memory.
178 // void FreeMemory(void* pointer_to_allocated, size_t size_in_bytes); // Free
179 // the raw memory pointed to by pointer_to_allocated.
180 // Any failure in allocation or free must terminate the process.
181 // @tparam TLSSystemType The TLS system being used. A TLS system must provide
182 // the following interface:
183 // bool Setup(OnThreadTerminationFunction thread_termination_function);
184 // bool Destroy();
185 // void* GetThreadSpecificData();
186 // bool SetThreadSpecificData(void* data);
187 // @tparam AllocationChunkSize The minimum size of a memory chunk that the
188 // allocator can handle. We try to size the chunks so that each chunk uses this
189 // size to the maximum.
190 // @tparam IsDestructibleForTesting For testing purposes we allow the destructor
191 // to perform clean up upon destruction. Otherwise, using the destructor will
192 // result in a compilation failure.
193 template <typename PayloadType,
194 typename AllocatorType,
195 typename TLSSystemType,
196 size_t AllocationChunkSize,
197 bool IsDestructibleForTesting>
198 struct ThreadLocalStorage {
ThreadLocalStorageThreadLocalStorage199 explicit ThreadLocalStorage(const base::StringPiece instance_id)
200 : root_(AllocateAndInitializeChunk()) {
201 Initialize(instance_id);
202 }
203
204 // Create a new instance of |ThreadLocalStorage| using the passed allocator
205 // and TLS system. This initializes the underlying TLS system and creates the
206 // first chunk of data.
ThreadLocalStorageThreadLocalStorage207 ThreadLocalStorage(const base::StringPiece instance_id,
208 AllocatorType allocator,
209 TLSSystemType tls_system)
210 : allocator_(std::move(allocator)),
211 tls_system_(std::move(tls_system)),
212 root_(AllocateAndInitializeChunk()) {
213 Initialize(instance_id);
214 }
215
216 // Deletes an instance of |ThreadLocalStorage| and delete all the data chunks
217 // created.
~ThreadLocalStorageThreadLocalStorage218 ~ThreadLocalStorage() {
219 if constexpr (IsDestructibleForTesting) {
220 TearDownForTesting();
221 } else if constexpr (!IsDestructibleForTesting) {
222 static_assert(
223 IsDestructibleForTesting,
224 "ThreadLocalStorage cannot be destructed outside of test code.");
225 }
226 }
227
228 // Explicitly prevent all forms of Copy/Move construction/assignment. For an
229 // exact copy of ThreadLocalStorage we would need to copy the mapping of
230 // thread to item, which we can't do at the moment. On the other side, our
231 // atomic members do not support moving out of the box.
232 ThreadLocalStorage(const ThreadLocalStorage&) = delete;
233 ThreadLocalStorage(ThreadLocalStorage&& other) = delete;
234 ThreadLocalStorage& operator=(const ThreadLocalStorage&) = delete;
235 ThreadLocalStorage& operator=(ThreadLocalStorage&&) = delete;
236
237 // Get the data item for the current thread. If no data is registered so far,
238 // find a free item in the chunks and register it for the current thread.
GetThreadLocalDataThreadLocalStorage239 PayloadType* GetThreadLocalData() {
240 auto& tls_system = dereference(tls_system_);
241
242 auto* slot = static_cast<SingleSlot*>(tls_system.GetThreadSpecificData());
243
244 if (UNLIKELY(slot == nullptr)) {
245 slot = FindAndAllocateFreeSlot(root_.load(std::memory_order_relaxed));
246
247 // We might be called in the course of handling a memory allocation. We do
248 // not use CHECK since they might allocate and cause a recursion.
249 TLS_RAW_CHECK("Failed to set thread specific data.",
250 tls_system.SetThreadSpecificData(slot));
251
252 // Reset the content to wipe out any previous data.
253 Reset(slot->item);
254 }
255
256 return &(slot->item);
257 }
258
259 private:
260 // Encapsulate the payload item and some administrative data.
261 struct SingleSlot {
262 PayloadType item;
263 #if !defined(__cpp_lib_atomic_value_initialization) || \
264 __cpp_lib_atomic_value_initialization < 201911L
265 std::atomic_flag is_used = ATOMIC_FLAG_INIT;
266 #else
267 std::atomic_flag is_used;
268 #endif
269 };
270
271 template <size_t NumberOfItems>
272 struct ChunkT {
273 SingleSlot slots[NumberOfItems];
274 // Pointer to the next chunk.
275 std::atomic<ChunkT*> next_chunk = nullptr;
276 // Helper flag to ensure we create the next chunk only once in a multi
277 // threaded environment.
278 std::once_flag create_next_chunk_flag;
279 };
280
281 template <size_t LowerNumberOfItems,
282 size_t UpperNumberOfItems,
283 size_t NumberOfBytes>
CalculateEffectiveNumberOfItemsBinSearchThreadLocalStorage284 static constexpr size_t CalculateEffectiveNumberOfItemsBinSearch() {
285 if constexpr (LowerNumberOfItems == UpperNumberOfItems) {
286 return LowerNumberOfItems;
287 }
288
289 constexpr size_t CurrentNumberOfItems =
290 (UpperNumberOfItems - LowerNumberOfItems) / 2 + LowerNumberOfItems;
291
292 if constexpr (sizeof(ChunkT<CurrentNumberOfItems>) > NumberOfBytes) {
293 return CalculateEffectiveNumberOfItemsBinSearch<
294 LowerNumberOfItems, CurrentNumberOfItems, NumberOfBytes>();
295 }
296
297 if constexpr (sizeof(ChunkT<CurrentNumberOfItems + 1>) < NumberOfBytes) {
298 return CalculateEffectiveNumberOfItemsBinSearch<
299 CurrentNumberOfItems + 1, UpperNumberOfItems, NumberOfBytes>();
300 }
301
302 return CurrentNumberOfItems;
303 }
304
305 // Calculate the maximum number of items we can store in one chunk without the
306 // size of the chunk exceeding NumberOfBytes. To avoid things like alignment
307 // and packing tampering with the calculation, instead of calculating the
308 // correct number of items we use sizeof-operator against ChunkT to search for
309 // the correct size. Unfortunately, the number of recursions is limited by the
310 // compiler. Therefore, we use a binary search instead of a simple linear
311 // search.
312 template <size_t MinimumNumberOfItems, size_t NumberOfBytes>
CalculateEffectiveNumberOfItemsThreadLocalStorage313 static constexpr size_t CalculateEffectiveNumberOfItems() {
314 if constexpr (sizeof(ChunkT<MinimumNumberOfItems>) < NumberOfBytes) {
315 constexpr size_t LowerNumberOfItems = MinimumNumberOfItems;
316 constexpr size_t UpperNumberOfItems =
317 NumberOfBytes / sizeof(PayloadType) + 1;
318 return CalculateEffectiveNumberOfItemsBinSearch<
319 LowerNumberOfItems, UpperNumberOfItems, NumberOfBytes>();
320 }
321
322 return MinimumNumberOfItems;
323 }
324
325 public:
326 // The minimum number of items per chunk. It should be high enough to
327 // accommodate most items in the root chunk whilst not wasting to much space
328 // on unnecessary items.
329 static constexpr size_t MinimumNumberOfItemsPerChunk = 75;
330 // The effective number of items per chunk. We use the AllocationChunkSize as
331 // a hint to calculate to effective number of items so we occupy one of these
332 // memory chunks to the maximum extent possible.
333 static constexpr size_t ItemsPerChunk =
334 CalculateEffectiveNumberOfItems<MinimumNumberOfItemsPerChunk,
335 AllocationChunkSize>();
336
337 private:
338 using Chunk = ChunkT<ItemsPerChunk>;
339
340 static_assert(ItemsPerChunk >= MinimumNumberOfItemsPerChunk);
341
342 // Mark an item's slot ready for reuse. This function is used as thread
343 // termination function in the TLS system. We do not destroy anything at this
344 // point but simply mark the slot as unused.
MarkSlotAsFreeThreadLocalStorage345 static void MarkSlotAsFree(void* data) {
346 // We always store SingleSlots in the TLS system. Therefore, we cast to
347 // SingleSlot and reset the is_used flag.
348 auto* const slot = static_cast<SingleSlot*>(data);
349
350 // We might be called in the course of handling a memory allocation.
351 // Therefore, do not use CHECK since it might allocate and cause a
352 // recursion.
353 TLS_RAW_CHECK("Received an invalid slot.",
354 slot && slot->is_used.test_and_set());
355
356 slot->is_used.clear(std::memory_order_relaxed);
357 }
358
359 // Perform common initialization during construction of an instance.
InitializeThreadLocalStorage360 void Initialize(const base::StringPiece instance_id) {
361 // The constructor must be called outside of the allocation path. Therefore,
362 // it is secure to verify with CHECK.
363
364 // Passing MarkSlotAsFree as thread_termination_function we ensure the
365 // slot/item assigned to the finished thread will be returned to the pool of
366 // unused items.
367 CHECK(dereference(tls_system_).Setup(&MarkSlotAsFree, instance_id));
368 }
369
AllocateAndInitializeChunkThreadLocalStorage370 Chunk* AllocateAndInitializeChunk() {
371 void* const uninitialized_memory =
372 dereference(allocator_).AllocateMemory(sizeof(Chunk));
373
374 // We might be called in the course of handling a memory allocation. We do
375 // not use CHECK since they might allocate and cause a recursion.
376 TLS_RAW_CHECK("Failed to allocate memory for new chunk.",
377 uninitialized_memory != nullptr);
378
379 return new (uninitialized_memory) Chunk{};
380 }
381
FreeAndDeallocateChunkForTestingThreadLocalStorage382 void FreeAndDeallocateChunkForTesting(Chunk* chunk_to_erase) {
383 chunk_to_erase->~Chunk();
384
385 // FreeAndDeallocateChunkForTesting must be called outside of the allocation
386 // path. Therefore, it is secure to verify with CHECK.
387 CHECK(dereference(allocator_)
388 .FreeMemoryForTesting(chunk_to_erase, sizeof(Chunk)));
389 }
390
391 // Find a free slot in the passed chunk, reserve it and return it to the
392 // caller. If no free slot can be found, head on to the next chunk. If the
393 // next chunk doesn't exist, create it.
FindAndAllocateFreeSlotThreadLocalStorage394 SingleSlot* FindAndAllocateFreeSlot(Chunk* const chunk) {
395 SingleSlot* const slot = std::find_if_not(
396 std::begin(chunk->slots), std::end(chunk->slots),
397 [](SingleSlot& candidate_slot) {
398 return candidate_slot.is_used.test_and_set(std::memory_order_relaxed);
399 });
400
401 // So we found a slot. Happily return it to the caller.
402 if (slot != std::end(chunk->slots)) {
403 return slot;
404 }
405
406 // Ok, there are no more free slots in this chunk. First, ensure the next
407 // chunk is valid and create one if necessary.
408 std::call_once(chunk->create_next_chunk_flag, [&] {
409 // From https://eel.is/c++draft/thread.once.callonce#3
410 //
411 // Synchronization: For any given once_flag: all active executions occur
412 // in a total order; completion of an active execution synchronizes with
413 // the start of the next one in this total order; and the returning
414 // execution synchronizes with the return from all passive executions.
415 //
416 // Therefore, we do only a relaxed store here, call_once synchronizes with
417 // other threads.
418 chunk->next_chunk.store(AllocateAndInitializeChunk(),
419 std::memory_order_relaxed);
420 });
421
422 return FindAndAllocateFreeSlot(chunk->next_chunk);
423 }
424
425 template <bool IsDestructibleForTestingP = IsDestructibleForTesting>
426 typename std::enable_if<IsDestructibleForTestingP>::type
TearDownForTestingThreadLocalStorage427 TearDownForTesting() {
428 // The destructor must be called outside of the allocation path. Therefore,
429 // it is secure to verify with CHECK.
430
431 // All accessing threads must be terminated by now. For additional security
432 // we tear down the TLS system first. This way we ensure that
433 // MarkSlotAsFree is not called anymore and we have no accesses from the
434 // TLS system's side.
435 CHECK(dereference(tls_system_).TearDownForTesting());
436
437 // Delete all data chunks.
438 for (auto* chunk = root_.load(); chunk != nullptr;) {
439 auto* next_chunk = chunk->next_chunk.load();
440 FreeAndDeallocateChunkForTesting(chunk);
441 chunk = next_chunk;
442 }
443 }
444
445 // Reset a single item to its default value.
446 // Since items are re-used, they may be accessed from different threads,
447 // causing TSan to trigger. Therefore, the reset is exempt from TSan
448 // instrumentation.
ResetThreadLocalStorage449 DISABLE_TSAN_INSTRUMENTATION void Reset(PayloadType& item) { item = {}; }
450
451 AllocatorType allocator_;
452 TLSSystemType tls_system_;
453 std::atomic<Chunk*> const root_;
454 };
455
456 } // namespace internal
457
458 // The ThreadLocalStorage visible to the user. This uses the internal default
459 // allocator and TLS system.
460 template <typename StorageType,
461 typename AllocatorType = internal::DefaultAllocator,
462 typename TLSSystemType = internal::DefaultTLSSystem,
463 size_t AllocationChunkSize = AllocatorType::AllocationChunkSize,
464 bool IsDestructibleForTesting = false>
465 using ThreadLocalStorage =
466 internal::ThreadLocalStorage<StorageType,
467 AllocatorType,
468 TLSSystemType,
469 AllocationChunkSize,
470 IsDestructibleForTesting>;
471
472 } // namespace base::allocator::dispatcher
473
474 #undef TLS_RAW_CHECK_IMPL
475 #undef TLS_RAW_CHECK
476 #undef STR
477 #undef STR_HELPER
478
479 #endif // USE_LOCAL_TLS_EMULATION()
480 #endif // BASE_ALLOCATOR_DISPATCHER_TLS_H_
481