1 // Copyright 2022 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef BASE_ALLOCATOR_DISPATCHER_TLS_H_
6 #define BASE_ALLOCATOR_DISPATCHER_TLS_H_
7
8 #include <string_view>
9
10 #include "build/build_config.h"
11
12 #if BUILDFLAG(IS_POSIX) // the current allocation mechanism (mmap) and TLS
13 // support (pthread) are both defined by POSIX
14 #define USE_LOCAL_TLS_EMULATION() true
15 #else
16 #define USE_LOCAL_TLS_EMULATION() false
17 #endif
18
19 #if USE_LOCAL_TLS_EMULATION()
20 #include <algorithm>
21 #include <atomic>
22 #include <memory>
23 #include <mutex>
24
25 #include "base/base_export.h"
26 #include "base/check.h"
27 #include "base/compiler_specific.h"
28
29 #if PA_BUILDFLAG(USE_PARTITION_ALLOC)
30 #include "partition_alloc/partition_alloc_constants.h" // nogncheck
31 #endif
32
33 #include <pthread.h>
34
35 #if HAS_FEATURE(thread_sanitizer)
36 #define DISABLE_TSAN_INSTRUMENTATION __attribute__((no_sanitize("thread")))
37 #else
38 #define DISABLE_TSAN_INSTRUMENTATION
39 #endif
40
41 #define STR_HELPER(x) #x
42 #define STR(x) STR_HELPER(x)
43
44 // Verify that a condition holds and cancel the process in case it doesn't. The
45 // functionality is similar to RAW_CHECK but includes more information in the
46 // logged messages. It is non allocating to prevent recursions.
47 #define TLS_RAW_CHECK(error_message, condition) \
48 TLS_RAW_CHECK_IMPL(error_message, condition, __FILE__, __LINE__)
49
50 #define TLS_RAW_CHECK_IMPL(error_message, condition, file, line) \
51 do { \
52 if (!(condition)) { \
53 constexpr const char* message = \
54 "TLS System: " error_message " Failed condition '" #condition \
55 "' in (" file "@" STR(line) ").\n"; \
56 ::logging::RawCheckFailure(message); \
57 } \
58 } while (0)
59
60 namespace base::debug {
61 struct CrashKeyString;
62 }
63
64 namespace base::allocator::dispatcher {
65 namespace internal {
66
67 // Allocate memory using POSIX' mmap and unmap functionality. The allocator
68 // implements the allocator interface required by ThreadLocalStorage.
69 struct BASE_EXPORT MMapAllocator {
70 // The minimum size of a memory chunk when allocating. Even for chunks with
71 // fewer bytes, at least AllocationChunkSize bytes are allocated. For mmap, this
72 // is usually the page size of the system.
73 // For various OS-CPU combinations, partition_alloc::PartitionPageSize() is not
74 // constexpr. Hence, we can not use this value but define it locally.
75 #if defined(PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR) && \
76 PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR
77 constexpr static size_t AllocationChunkSize =
78 partition_alloc::PartitionPageSize();
79 #elif BUILDFLAG(IS_APPLE)
80 constexpr static size_t AllocationChunkSize = 16384;
81 #elif BUILDFLAG(IS_ANDROID) && defined(ARCH_CPU_64_BITS)
82 constexpr static size_t AllocationChunkSize = 16384;
83 #elif BUILDFLAG(IS_LINUX) && defined(ARCH_CPU_ARM64)
84 constexpr static size_t AllocationChunkSize = 16384;
85 #else
86 constexpr static size_t AllocationChunkSize = 4096;
87 #endif
88
89 // Allocate size_in_bytes bytes of raw memory. Return nullptr if allocation
90 // fails.
91 void* AllocateMemory(size_t size_in_bytes);
92 // Free the raw memory pointed to by pointer_to_allocated. Returns a boolean
93 // value indicating if the free was successful.
94 bool FreeMemoryForTesting(void* pointer_to_allocated, size_t size_in_bytes);
95 };
96
97 // The allocator used by default for the thread local storage.
98 using DefaultAllocator = MMapAllocator;
99
100 using OnThreadTerminationFunction = void (*)(void*);
101
102 // The TLS system used by default for the thread local storage. It stores and
103 // retrieves thread specific data pointers.
104 class BASE_EXPORT PThreadTLSSystem {
105 public:
106 PThreadTLSSystem();
107
108 PThreadTLSSystem(const PThreadTLSSystem&) = delete;
109 PThreadTLSSystem(PThreadTLSSystem&&);
110 PThreadTLSSystem& operator=(const PThreadTLSSystem&) = delete;
111 PThreadTLSSystem& operator=(PThreadTLSSystem&&);
112
113 // Initialize the TLS system to store a data set for different threads.
114 // @param thread_termination_function An optional function which will be
115 // invoked upon termination of a thread.
116 bool Setup(OnThreadTerminationFunction thread_termination_function,
117 std::string_view instance_id);
118 // Tear down the TLS system. After completing tear down, the thread
119 // termination function passed to Setup will not be invoked anymore.
120 bool TearDownForTesting();
121
122 // Get the pointer to the data associated to the current thread. Returns
123 // nullptr if the TLS system is not initialized or no data was set before.
124 void* GetThreadSpecificData();
125 // Set the pointer to the data associated to the current thread. Return true
126 // if stored successfully, false otherwise.
127 bool SetThreadSpecificData(void* data);
128
129 private:
130 base::debug::CrashKeyString* crash_key_ = nullptr;
131 pthread_key_t data_access_key_ = 0;
132 #if DCHECK_IS_ON()
133 // From POSIX standard at https://www.open-std.org/jtc1/sc22/open/n4217.pdf:
134 // The effect of calling pthread_getspecific() or pthread_setspecific() with a
135 // key value not obtained from pthread_key_create() or after key has been
136 // deleted with pthread_key_delete() is undefined.
137 //
138 // Unfortunately, POSIX doesn't define a special value of pthread_key_t
139 // indicating an invalid key which would allow us to detect accesses outside
140 // of initialized state. Hence, to prevent us from drifting into the evil
141 // realm of undefined behaviour we store whether we're somewhere between Setup
142 // and Teardown.
143 std::atomic_bool initialized_{false};
144 #endif
145 };
146
147 using DefaultTLSSystem = PThreadTLSSystem;
148
149 // In some scenarios, most notably when testing, the allocator and TLS system
150 // passed to |ThreadLocalStorage| are not copyable and have to be wrapped, i.e.
151 // using std::reference_wrapper. |dereference| is a small helper to retrieve the
152 // underlying value.
153 template <typename T>
dereference(T & ref)154 T& dereference(T& ref) {
155 return ref;
156 }
157
158 template <typename T>
dereference(std::reference_wrapper<T> & ref)159 T& dereference(std::reference_wrapper<T>& ref) {
160 // std::reference_wrapper requires a valid reference for construction,
161 // therefore, no need in checking here.
162 return ref.get();
163 }
164
165 // Store thread local data. The data is organized in chunks, where each chunk
166 // holds |ItemsPerChunk|. Each item may be free or used.
167 //
168 // When a thread requests data, the chunks are searched for a free data item,
169 // which is registered for this thread and marked as |used|. Further requests by
170 // this thread will then always return the same item. When a thread terminates,
171 // the item will be reset and return to the pool of free items.
172 //
173 // Upon construction, the first chunk is created. If a thread requests data and
174 // there is no free item available, another chunk is created. Upon destruction,
175 // all memory is freed. Pointers to data items become invalid!
176 //
177 // Constructor and destructor are not thread safe.
178 //
179 // @tparam PayloadType The item type to be stored.
180 // @tparam AllocatorType The allocator being used. An allocator must provide
181 // the following interface:
182 // void* AllocateMemory(size_t size_in_bytes); // Allocate size_in_bytes bytes
183 // of raw memory.
184 // void FreeMemory(void* pointer_to_allocated, size_t size_in_bytes); // Free
185 // the raw memory pointed to by pointer_to_allocated.
186 // Any failure in allocation or free must terminate the process.
187 // @tparam TLSSystemType The TLS system being used. A TLS system must provide
188 // the following interface:
189 // bool Setup(OnThreadTerminationFunction thread_termination_function);
190 // bool Destroy();
191 // void* GetThreadSpecificData();
192 // bool SetThreadSpecificData(void* data);
193 // @tparam AllocationChunkSize The minimum size of a memory chunk that the
194 // allocator can handle. We try to size the chunks so that each chunk uses this
195 // size to the maximum.
196 // @tparam IsDestructibleForTesting For testing purposes we allow the destructor
197 // to perform clean up upon destruction. Otherwise, using the destructor will
198 // result in a compilation failure.
199 template <typename PayloadType,
200 typename AllocatorType,
201 typename TLSSystemType,
202 size_t AllocationChunkSize,
203 bool IsDestructibleForTesting>
204 struct ThreadLocalStorage {
ThreadLocalStorageThreadLocalStorage205 explicit ThreadLocalStorage(std::string_view instance_id)
206 : root_(AllocateAndInitializeChunk()) {
207 Initialize(instance_id);
208 }
209
210 // Create a new instance of |ThreadLocalStorage| using the passed allocator
211 // and TLS system. This initializes the underlying TLS system and creates the
212 // first chunk of data.
ThreadLocalStorageThreadLocalStorage213 ThreadLocalStorage(std::string_view instance_id,
214 AllocatorType allocator,
215 TLSSystemType tls_system)
216 : allocator_(std::move(allocator)),
217 tls_system_(std::move(tls_system)),
218 root_(AllocateAndInitializeChunk()) {
219 Initialize(instance_id);
220 }
221
222 // Deletes an instance of |ThreadLocalStorage| and delete all the data chunks
223 // created.
~ThreadLocalStorageThreadLocalStorage224 ~ThreadLocalStorage() {
225 if constexpr (IsDestructibleForTesting) {
226 TearDownForTesting();
227 } else if constexpr (!IsDestructibleForTesting) {
228 static_assert(
229 IsDestructibleForTesting,
230 "ThreadLocalStorage cannot be destructed outside of test code.");
231 }
232 }
233
234 // Explicitly prevent all forms of Copy/Move construction/assignment. For an
235 // exact copy of ThreadLocalStorage we would need to copy the mapping of
236 // thread to item, which we can't do at the moment. On the other side, our
237 // atomic members do not support moving out of the box.
238 ThreadLocalStorage(const ThreadLocalStorage&) = delete;
239 ThreadLocalStorage(ThreadLocalStorage&& other) = delete;
240 ThreadLocalStorage& operator=(const ThreadLocalStorage&) = delete;
241 ThreadLocalStorage& operator=(ThreadLocalStorage&&) = delete;
242
243 // Get the data item for the current thread. If no data is registered so far,
244 // find a free item in the chunks and register it for the current thread.
GetThreadLocalDataThreadLocalStorage245 PayloadType* GetThreadLocalData() {
246 auto& tls_system = dereference(tls_system_);
247
248 auto* slot = static_cast<SingleSlot*>(tls_system.GetThreadSpecificData());
249
250 if (slot == nullptr) [[unlikely]] {
251 slot = FindAndAllocateFreeSlot(root_.load(std::memory_order_relaxed));
252
253 // We might be called in the course of handling a memory allocation. We do
254 // not use CHECK since they might allocate and cause a recursion.
255 TLS_RAW_CHECK("Failed to set thread specific data.",
256 tls_system.SetThreadSpecificData(slot));
257
258 // Reset the content to wipe out any previous data.
259 Reset(slot->item);
260 }
261
262 return &(slot->item);
263 }
264
265 private:
266 // Encapsulate the payload item and some administrative data.
267 struct SingleSlot {
268 PayloadType item;
269 #if !defined(__cpp_lib_atomic_value_initialization) || \
270 __cpp_lib_atomic_value_initialization < 201911L
271 std::atomic_flag is_used = ATOMIC_FLAG_INIT;
272 #else
273 std::atomic_flag is_used;
274 #endif
275 };
276
277 template <size_t NumberOfItems>
278 struct ChunkT {
279 SingleSlot slots[NumberOfItems];
280 // Pointer to the next chunk.
281 std::atomic<ChunkT*> next_chunk = nullptr;
282 // Helper flag to ensure we create the next chunk only once in a multi
283 // threaded environment.
284 std::once_flag create_next_chunk_flag;
285 };
286
287 template <size_t LowerNumberOfItems,
288 size_t UpperNumberOfItems,
289 size_t NumberOfBytes>
CalculateEffectiveNumberOfItemsBinSearchThreadLocalStorage290 static constexpr size_t CalculateEffectiveNumberOfItemsBinSearch() {
291 if constexpr (LowerNumberOfItems == UpperNumberOfItems) {
292 return LowerNumberOfItems;
293 }
294
295 constexpr size_t CurrentNumberOfItems =
296 (UpperNumberOfItems - LowerNumberOfItems) / 2 + LowerNumberOfItems;
297
298 if constexpr (sizeof(ChunkT<CurrentNumberOfItems>) > NumberOfBytes) {
299 return CalculateEffectiveNumberOfItemsBinSearch<
300 LowerNumberOfItems, CurrentNumberOfItems, NumberOfBytes>();
301 }
302
303 if constexpr (sizeof(ChunkT<CurrentNumberOfItems + 1>) < NumberOfBytes) {
304 return CalculateEffectiveNumberOfItemsBinSearch<
305 CurrentNumberOfItems + 1, UpperNumberOfItems, NumberOfBytes>();
306 }
307
308 return CurrentNumberOfItems;
309 }
310
311 // Calculate the maximum number of items we can store in one chunk without the
312 // size of the chunk exceeding NumberOfBytes. To avoid things like alignment
313 // and packing tampering with the calculation, instead of calculating the
314 // correct number of items we use sizeof-operator against ChunkT to search for
315 // the correct size. Unfortunately, the number of recursions is limited by the
316 // compiler. Therefore, we use a binary search instead of a simple linear
317 // search.
318 template <size_t MinimumNumberOfItems, size_t NumberOfBytes>
CalculateEffectiveNumberOfItemsThreadLocalStorage319 static constexpr size_t CalculateEffectiveNumberOfItems() {
320 if constexpr (sizeof(ChunkT<MinimumNumberOfItems>) < NumberOfBytes) {
321 constexpr size_t LowerNumberOfItems = MinimumNumberOfItems;
322 constexpr size_t UpperNumberOfItems =
323 NumberOfBytes / sizeof(PayloadType) + 1;
324 return CalculateEffectiveNumberOfItemsBinSearch<
325 LowerNumberOfItems, UpperNumberOfItems, NumberOfBytes>();
326 }
327
328 return MinimumNumberOfItems;
329 }
330
331 public:
332 // The minimum number of items per chunk. It should be high enough to
333 // accommodate most items in the root chunk whilst not wasting to much space
334 // on unnecessary items.
335 static constexpr size_t MinimumNumberOfItemsPerChunk = 75;
336 // The effective number of items per chunk. We use the AllocationChunkSize as
337 // a hint to calculate to effective number of items so we occupy one of these
338 // memory chunks to the maximum extent possible.
339 static constexpr size_t ItemsPerChunk =
340 CalculateEffectiveNumberOfItems<MinimumNumberOfItemsPerChunk,
341 AllocationChunkSize>();
342
343 private:
344 using Chunk = ChunkT<ItemsPerChunk>;
345
346 static_assert(ItemsPerChunk >= MinimumNumberOfItemsPerChunk);
347
348 // Mark an item's slot ready for reuse. This function is used as thread
349 // termination function in the TLS system. We do not destroy anything at this
350 // point but simply mark the slot as unused.
MarkSlotAsFreeThreadLocalStorage351 static void MarkSlotAsFree(void* data) {
352 // We always store SingleSlots in the TLS system. Therefore, we cast to
353 // SingleSlot and reset the is_used flag.
354 auto* const slot = static_cast<SingleSlot*>(data);
355
356 // We might be called in the course of handling a memory allocation.
357 // Therefore, do not use CHECK since it might allocate and cause a
358 // recursion.
359 TLS_RAW_CHECK("Received an invalid slot.",
360 slot && slot->is_used.test_and_set());
361
362 slot->is_used.clear(std::memory_order_relaxed);
363 }
364
365 // Perform common initialization during construction of an instance.
InitializeThreadLocalStorage366 void Initialize(std::string_view instance_id) {
367 // The constructor must be called outside of the allocation path. Therefore,
368 // it is secure to verify with CHECK.
369
370 // Passing MarkSlotAsFree as thread_termination_function we ensure the
371 // slot/item assigned to the finished thread will be returned to the pool of
372 // unused items.
373 CHECK(dereference(tls_system_).Setup(&MarkSlotAsFree, instance_id));
374 }
375
AllocateAndInitializeChunkThreadLocalStorage376 Chunk* AllocateAndInitializeChunk() {
377 void* const uninitialized_memory =
378 dereference(allocator_).AllocateMemory(sizeof(Chunk));
379
380 // We might be called in the course of handling a memory allocation. We do
381 // not use CHECK since they might allocate and cause a recursion.
382 TLS_RAW_CHECK("Failed to allocate memory for new chunk.",
383 uninitialized_memory != nullptr);
384
385 return new (uninitialized_memory) Chunk{};
386 }
387
FreeAndDeallocateChunkForTestingThreadLocalStorage388 void FreeAndDeallocateChunkForTesting(Chunk* chunk_to_erase) {
389 chunk_to_erase->~Chunk();
390
391 // FreeAndDeallocateChunkForTesting must be called outside of the allocation
392 // path. Therefore, it is secure to verify with CHECK.
393 CHECK(dereference(allocator_)
394 .FreeMemoryForTesting(chunk_to_erase, sizeof(Chunk)));
395 }
396
397 // Find a free slot in the passed chunk, reserve it and return it to the
398 // caller. If no free slot can be found, head on to the next chunk. If the
399 // next chunk doesn't exist, create it.
FindAndAllocateFreeSlotThreadLocalStorage400 SingleSlot* FindAndAllocateFreeSlot(Chunk* const chunk) {
401 SingleSlot* const slot = std::find_if_not(
402 std::begin(chunk->slots), std::end(chunk->slots),
403 [](SingleSlot& candidate_slot) {
404 return candidate_slot.is_used.test_and_set(std::memory_order_relaxed);
405 });
406
407 // So we found a slot. Happily return it to the caller.
408 if (slot != std::end(chunk->slots)) {
409 return slot;
410 }
411
412 // Ok, there are no more free slots in this chunk. First, ensure the next
413 // chunk is valid and create one if necessary.
414 std::call_once(chunk->create_next_chunk_flag, [&] {
415 // From https://eel.is/c++draft/thread.once.callonce#3
416 //
417 // Synchronization: For any given once_flag: all active executions occur
418 // in a total order; completion of an active execution synchronizes with
419 // the start of the next one in this total order; and the returning
420 // execution synchronizes with the return from all passive executions.
421 //
422 // Therefore, we do only a relaxed store here, call_once synchronizes with
423 // other threads.
424 chunk->next_chunk.store(AllocateAndInitializeChunk(),
425 std::memory_order_relaxed);
426 });
427
428 return FindAndAllocateFreeSlot(chunk->next_chunk);
429 }
430
431 template <bool IsDestructibleForTestingP = IsDestructibleForTesting>
432 typename std::enable_if<IsDestructibleForTestingP>::type
TearDownForTestingThreadLocalStorage433 TearDownForTesting() {
434 // The destructor must be called outside of the allocation path. Therefore,
435 // it is secure to verify with CHECK.
436
437 // All accessing threads must be terminated by now. For additional security
438 // we tear down the TLS system first. This way we ensure that
439 // MarkSlotAsFree is not called anymore and we have no accesses from the
440 // TLS system's side.
441 CHECK(dereference(tls_system_).TearDownForTesting());
442
443 // Delete all data chunks.
444 for (auto* chunk = root_.load(); chunk != nullptr;) {
445 auto* next_chunk = chunk->next_chunk.load();
446 FreeAndDeallocateChunkForTesting(chunk);
447 chunk = next_chunk;
448 }
449 }
450
451 // Reset a single item to its default value.
452 // Since items are re-used, they may be accessed from different threads,
453 // causing TSan to trigger. Therefore, the reset is exempt from TSan
454 // instrumentation.
ResetThreadLocalStorage455 DISABLE_TSAN_INSTRUMENTATION void Reset(PayloadType& item) { item = {}; }
456
457 AllocatorType allocator_;
458 TLSSystemType tls_system_;
459 std::atomic<Chunk*> const root_;
460 };
461
462 } // namespace internal
463
464 // The ThreadLocalStorage visible to the user. This uses the internal default
465 // allocator and TLS system.
466 template <typename StorageType,
467 typename AllocatorType = internal::DefaultAllocator,
468 typename TLSSystemType = internal::DefaultTLSSystem,
469 size_t AllocationChunkSize = AllocatorType::AllocationChunkSize,
470 bool IsDestructibleForTesting = false>
471 using ThreadLocalStorage =
472 internal::ThreadLocalStorage<StorageType,
473 AllocatorType,
474 TLSSystemType,
475 AllocationChunkSize,
476 IsDestructibleForTesting>;
477
478 } // namespace base::allocator::dispatcher
479
480 #undef TLS_RAW_CHECK_IMPL
481 #undef TLS_RAW_CHECK
482 #undef STR
483 #undef STR_HELPER
484
485 #endif // USE_LOCAL_TLS_EMULATION()
486 #endif // BASE_ALLOCATOR_DISPATCHER_TLS_H_
487