1 // Copyright 2022 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef BASE_ALLOCATOR_DISPATCHER_TLS_H_
6 #define BASE_ALLOCATOR_DISPATCHER_TLS_H_
7
8 #include "build/build_config.h"
9
10 #if BUILDFLAG(IS_POSIX) // the current allocation mechanism (mmap) and TLS
11 // support (pthread) are both defined by POSIX
12 #define USE_LOCAL_TLS_EMULATION() true
13 #else
14 #define USE_LOCAL_TLS_EMULATION() false
15 #endif
16
17 #if USE_LOCAL_TLS_EMULATION()
18 #include <algorithm>
19 #include <atomic>
20 #include <memory>
21 #include <mutex>
22
23 #include "base/allocator/partition_allocator/partition_alloc_constants.h"
24 #include "base/base_export.h"
25 #include "base/check.h"
26 #include "base/compiler_specific.h"
27
28 #include <pthread.h>
29
30 #if HAS_FEATURE(thread_sanitizer)
31 #define DISABLE_TSAN_INSTRUMENTATION __attribute__((no_sanitize("thread")))
32 #else
33 #define DISABLE_TSAN_INSTRUMENTATION
34 #endif
35
36 namespace base::allocator::dispatcher {
37 namespace internal {
38
39 // Allocate memory using POSIX' mmap and unmap functionality. The allocator
40 // implements the allocator interface required by ThreadLocalStorage.
41 struct BASE_EXPORT MMapAllocator {
42 // The minimum size of a memory chunk when allocating. Even for chunks with
43 // fewer bytes, at least AllocationChunkSize bytes are allocated. For mmap, this
44 // is usually the page size of the system.
45 // For various OS-CPU combinations, partition_alloc::PartitionPageSize() is not
46 // constexpr. Hence, we can not use this value but define it locally.
47 #if defined(PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR) && \
48 PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR
49 constexpr static size_t AllocationChunkSize =
50 partition_alloc::PartitionPageSize();
51 #elif BUILDFLAG(IS_APPLE)
52 constexpr static size_t AllocationChunkSize = 16384;
53 #elif BUILDFLAG(IS_LINUX) && defined(ARCH_CPU_ARM64)
54 constexpr static size_t AllocationChunkSize = 16384;
55 #else
56 constexpr static size_t AllocationChunkSize = 4096;
57 #endif
58
59 // Allocate size_in_bytes bytes of raw memory. Return nullptr if allocation
60 // fails.
61 void* AllocateMemory(size_t size_in_bytes);
62 // Free the raw memory pointed to by pointer_to_allocated. Returns a boolean
63 // value indicating if the free was successful.
64 bool FreeMemoryForTesting(void* pointer_to_allocated, size_t size_in_bytes);
65 };
66
67 // The allocator used by default for the thread local storage.
68 using DefaultAllocator = MMapAllocator;
69
70 using OnThreadTerminationFunction = void (*)(void*);
71
72 // The TLS system used by default for the thread local storage. It stores and
73 // retrieves thread specific data pointers.
74 struct BASE_EXPORT PThreadTLSSystem {
75 // Initialize the TLS system to store a data set for different threads.
76 // @param thread_termination_function An optional function which will be
77 // invoked upon termination of a thread.
78 bool Setup(OnThreadTerminationFunction thread_termination_function);
79 // Tear down the TLS system. After completing tear down, the thread
80 // termination function passed to Setup will not be invoked anymore.
81 bool TearDownForTesting();
82
83 // Get the pointer to the data associated to the current thread. Returns
84 // nullptr if the TLS system is not initialized or no data was set before.
85 void* GetThreadSpecificData();
86 // Set the pointer to the data associated to the current thread. Return true
87 // if stored successfully, false otherwise.
88 bool SetThreadSpecificData(void* data);
89
90 private:
91 pthread_key_t data_access_key_ = 0;
92 #if DCHECK_IS_ON()
93 // From POSIX standard at https://www.open-std.org/jtc1/sc22/open/n4217.pdf:
94 // The effect of calling pthread_getspecific() or pthread_setspecific() with a
95 // key value not obtained from pthread_key_create() or after key has been
96 // deleted with pthread_key_delete() is undefined.
97 //
98 // Unfortunately, POSIX doesn't define a special value of pthread_key_t
99 // indicating an invalid key which would allow us to detect accesses outside
100 // of initialized state. Hence, to prevent us from drifting into the evil
101 // realm of undefined behaviour we store whether we're somewhere between Setup
102 // and Teardown.
103 std::atomic_bool initialized_{false};
104 #endif
105 };
106
107 using DefaultTLSSystem = PThreadTLSSystem;
108
109 // In some scenarios, most notably when testing, the allocator and TLS system
110 // passed to |ThreadLocalStorage| are not copyable and have to be wrapped, i.e.
111 // using std::reference_wrapper. |dereference| is a small helper to retrieve the
112 // underlying value.
113 template <typename T>
dereference(T & ref)114 T& dereference(T& ref) {
115 return ref;
116 }
117
118 template <typename T>
dereference(std::reference_wrapper<T> & ref)119 T& dereference(std::reference_wrapper<T>& ref) {
120 // std::reference_wrapper requires a valid reference for construction,
121 // therefore, no need in checking here.
122 return ref.get();
123 }
124
125 // Store thread local data. The data is organized in chunks, where each chunk
126 // holds |ItemsPerChunk|. Each item may be free or used.
127 //
128 // When a thread requests data, the chunks are searched for a free data item,
129 // which is registered for this thread and marked as |used|. Further requests by
130 // this thread will then always return the same item. When a thread terminates,
131 // the item will be reset and return to the pool of free items.
132 //
133 // Upon construction, the first chunk is created. If a thread requests data and
134 // there is no free item available, another chunk is created. Upon destruction,
135 // all memory is freed. Pointers to data items become invalid!
136 //
137 // Constructor and destructor are not thread safe.
138 //
139 // @tparam PayloadType The item type to be stored.
140 // @tparam AllocatorType The allocator being used. An allocator must provide
141 // the following interface:
142 // void* AllocateMemory(size_t size_in_bytes); // Allocate size_in_bytes bytes
143 // of raw memory.
144 // void FreeMemory(void* pointer_to_allocated, size_t size_in_bytes); // Free
145 // the raw memory pointed to by pointer_to_allocated.
146 // Any failure in allocation or free must terminate the process.
147 // @tparam TLSSystemType The TLS system being used. A TLS system must provide
148 // the following interface:
149 // bool Setup(OnThreadTerminationFunction thread_termination_function);
150 // bool Destroy();
151 // void* GetThreadSpecificData();
152 // bool SetThreadSpecificData(void* data);
153 // @tparam AllocationChunkSize The minimum size of a memory chunk that the
154 // allocator can handle. We try to size the chunks so that each chunk uses this
155 // size to the maximum.
156 // @tparam IsDestructibleForTesting For testing purposes we allow the destructor
157 // to perform clean up upon destruction. Otherwise, using the destructor will
158 // result in a compilation failure.
159 template <typename PayloadType,
160 typename AllocatorType,
161 typename TLSSystemType,
162 size_t AllocationChunkSize,
163 bool IsDestructibleForTesting>
164 struct ThreadLocalStorage {
ThreadLocalStorageThreadLocalStorage165 ThreadLocalStorage() : root_(AllocateAndInitializeChunk()) { Initialize(); }
166
167 // Create a new instance of |ThreadLocalStorage| using the passed allocator
168 // and TLS system. This initializes the underlying TLS system and creates the
169 // first chunk of data.
ThreadLocalStorageThreadLocalStorage170 ThreadLocalStorage(AllocatorType allocator, TLSSystemType tlsSystem)
171 : allocator_(std::move(allocator)),
172 tls_system_(std::move(tlsSystem)),
173 root_(AllocateAndInitializeChunk()) {
174 Initialize();
175 }
176
177 // Deletes an instance of |ThreadLocalStorage| and delete all the data chunks
178 // created.
~ThreadLocalStorageThreadLocalStorage179 ~ThreadLocalStorage() {
180 if constexpr (IsDestructibleForTesting) {
181 TearDownForTesting();
182 } else if constexpr (!IsDestructibleForTesting) {
183 static_assert(
184 IsDestructibleForTesting,
185 "ThreadLocalStorage cannot be destructed outside of test code.");
186 }
187 }
188
189 // Explicitly prevent all forms of Copy/Move construction/assignment. For an
190 // exact copy of ThreadLocalStorage we would need to copy the mapping of
191 // thread to item, which we can't do at the moment. On the other side, our
192 // atomic members do not support moving out of the box.
193 ThreadLocalStorage(const ThreadLocalStorage&) = delete;
194 ThreadLocalStorage(ThreadLocalStorage&& other) = delete;
195 ThreadLocalStorage& operator=(const ThreadLocalStorage&) = delete;
196 ThreadLocalStorage& operator=(ThreadLocalStorage&&) = delete;
197
198 // Get the data item for the current thread. If no data is registered so far,
199 // find a free item in the chunks and register it for the current thread.
GetThreadLocalDataThreadLocalStorage200 PayloadType* GetThreadLocalData() {
201 auto& tls_system = dereference(tls_system_);
202
203 auto* slot = static_cast<SingleSlot*>(tls_system.GetThreadSpecificData());
204
205 if (UNLIKELY(slot == nullptr)) {
206 slot = FindAndAllocateFreeSlot(root_.load(std::memory_order_relaxed));
207
208 // We might be called in the course of handling a memory allocation. We do
209 // not use CHECK since they might allocate and cause a recursion.
210 RAW_CHECK(tls_system.SetThreadSpecificData(slot));
211
212 // Reset the content to wipe out any previous data.
213 Reset(slot->item);
214 }
215
216 return &(slot->item);
217 }
218
219 private:
220 // Encapsulate the payload item and some administrative data.
221 struct SingleSlot {
222 PayloadType item;
223 #if !defined(__cpp_lib_atomic_value_initialization) || \
224 __cpp_lib_atomic_value_initialization < 201911L
225 std::atomic_flag is_used = ATOMIC_FLAG_INIT;
226 #else
227 std::atomic_flag is_used;
228 #endif
229 };
230
231 template <size_t NumberOfItems>
232 struct ChunkT {
233 SingleSlot slots[NumberOfItems];
234 // Pointer to the next chunk.
235 std::atomic<ChunkT*> next_chunk = nullptr;
236 // Helper flag to ensure we create the next chunk only once in a multi
237 // threaded environment.
238 std::once_flag create_next_chunk_flag;
239 };
240
241 template <size_t LowerNumberOfItems,
242 size_t UpperNumberOfItems,
243 size_t NumberOfBytes>
CalculateEffectiveNumberOfItemsBinSearchThreadLocalStorage244 static constexpr size_t CalculateEffectiveNumberOfItemsBinSearch() {
245 if constexpr (LowerNumberOfItems == UpperNumberOfItems) {
246 return LowerNumberOfItems;
247 }
248
249 constexpr size_t CurrentNumberOfItems =
250 (UpperNumberOfItems - LowerNumberOfItems) / 2 + LowerNumberOfItems;
251
252 if constexpr (sizeof(ChunkT<CurrentNumberOfItems>) > NumberOfBytes) {
253 return CalculateEffectiveNumberOfItemsBinSearch<
254 LowerNumberOfItems, CurrentNumberOfItems, NumberOfBytes>();
255 }
256
257 if constexpr (sizeof(ChunkT<CurrentNumberOfItems + 1>) < NumberOfBytes) {
258 return CalculateEffectiveNumberOfItemsBinSearch<
259 CurrentNumberOfItems + 1, UpperNumberOfItems, NumberOfBytes>();
260 }
261
262 return CurrentNumberOfItems;
263 }
264
265 // Calculate the maximum number of items we can store in one chunk without the
266 // size of the chunk exceeding NumberOfBytes. To avoid things like alignment
267 // and packing tampering with the calculation, instead of calculating the
268 // correct number of items we use sizeof-operator against ChunkT to search for
269 // the correct size. Unfortunately, the number of recursions is limited by the
270 // compiler. Therefore, we use a binary search instead of a simple linear
271 // search.
272 template <size_t MinimumNumberOfItems, size_t NumberOfBytes>
CalculateEffectiveNumberOfItemsThreadLocalStorage273 static constexpr size_t CalculateEffectiveNumberOfItems() {
274 if constexpr (sizeof(ChunkT<MinimumNumberOfItems>) < NumberOfBytes) {
275 constexpr size_t LowerNumberOfItems = MinimumNumberOfItems;
276 constexpr size_t UpperNumberOfItems =
277 NumberOfBytes / sizeof(PayloadType) + 1;
278 return CalculateEffectiveNumberOfItemsBinSearch<
279 LowerNumberOfItems, UpperNumberOfItems, NumberOfBytes>();
280 }
281
282 return MinimumNumberOfItems;
283 }
284
285 public:
286 // The minimum number of items per chunk. It should be high enough to
287 // accommodate most items in the root chunk whilst not wasting to much space
288 // on unnecessary items.
289 static constexpr size_t MinimumNumberOfItemsPerChunk = 75;
290 // The effective number of items per chunk. We use the AllocationChunkSize as
291 // a hint to calculate to effective number of items so we occupy one of these
292 // memory chunks to the maximum extent possible.
293 static constexpr size_t ItemsPerChunk =
294 CalculateEffectiveNumberOfItems<MinimumNumberOfItemsPerChunk,
295 AllocationChunkSize>();
296
297 private:
298 using Chunk = ChunkT<ItemsPerChunk>;
299
300 static_assert(ItemsPerChunk >= MinimumNumberOfItemsPerChunk);
301
302 // Mark an item's slot ready for reuse. This function is used as thread
303 // termination function in the TLS system. We do not destroy anything at this
304 // point but simply mark the slot as unused.
MarkSlotAsFreeThreadLocalStorage305 static void MarkSlotAsFree(void* data) {
306 // We always store SingleSlots in the TLS system. Therefore, we cast to
307 // SingleSlot and reset the is_used flag.
308 auto* const slot = static_cast<SingleSlot*>(data);
309
310 // We might be called in the course of handling a memory allocation. We do
311 // not use CHECK since they might allocate and cause a recursion.
312 RAW_CHECK(slot && slot->is_used.test_and_set());
313
314 slot->is_used.clear(std::memory_order_relaxed);
315 }
316
317 // Perform common initialization during construction of an instance.
InitializeThreadLocalStorage318 void Initialize() {
319 // The constructor must be called outside of the allocation path. Therefore,
320 // it is secure to verify with CHECK.
321
322 // Passing MarkSlotAsFree as thread_termination_function we ensure the
323 // slot/item assigned to the finished thread will be returned to the pool of
324 // unused items.
325 CHECK(dereference(tls_system_).Setup(&MarkSlotAsFree));
326 }
327
AllocateAndInitializeChunkThreadLocalStorage328 Chunk* AllocateAndInitializeChunk() {
329 void* const uninitialized_memory =
330 dereference(allocator_).AllocateMemory(sizeof(Chunk));
331
332 // We might be called in the course of handling a memory allocation. We do
333 // not use CHECK since they might allocate and cause a recursion.
334 RAW_CHECK(uninitialized_memory != nullptr);
335
336 return new (uninitialized_memory) Chunk{};
337 }
338
FreeAndDeallocateChunkForTestingThreadLocalStorage339 void FreeAndDeallocateChunkForTesting(Chunk* chunk_to_erase) {
340 chunk_to_erase->~Chunk();
341
342 // FreeAndDeallocateChunkForTesting must be called outside of the allocation
343 // path. Therefore, it is secure to verify with CHECK.
344 CHECK(dereference(allocator_)
345 .FreeMemoryForTesting(chunk_to_erase, sizeof(Chunk)));
346 }
347
348 // Find a free slot in the passed chunk, reserve it and return it to the
349 // caller. If no free slot can be found, head on to the next chunk. If the
350 // next chunk doesn't exist, create it.
FindAndAllocateFreeSlotThreadLocalStorage351 SingleSlot* FindAndAllocateFreeSlot(Chunk* const chunk) {
352 SingleSlot* const slot = std::find_if_not(
353 std::begin(chunk->slots), std::end(chunk->slots),
354 [](SingleSlot& candidate_slot) {
355 return candidate_slot.is_used.test_and_set(std::memory_order_relaxed);
356 });
357
358 // So we found a slot. Happily return it to the caller.
359 if (slot != std::end(chunk->slots)) {
360 return slot;
361 }
362
363 // Ok, there are no more free slots in this chunk. First, ensure the next
364 // chunk is valid and create one if necessary.
365 std::call_once(chunk->create_next_chunk_flag, [&] {
366 // From https://eel.is/c++draft/thread.once.callonce#3
367 //
368 // Synchronization: For any given once_flag: all active executions occur
369 // in a total order; completion of an active execution synchronizes with
370 // the start of the next one in this total order; and the returning
371 // execution synchronizes with the return from all passive executions.
372 //
373 // Therefore, we do only a relaxed store here, call_once synchronizes with
374 // other threads.
375 chunk->next_chunk.store(AllocateAndInitializeChunk(),
376 std::memory_order_relaxed);
377 });
378
379 return FindAndAllocateFreeSlot(chunk->next_chunk);
380 }
381
382 template <bool IsDestructibleForTestingP = IsDestructibleForTesting>
383 typename std::enable_if<IsDestructibleForTestingP>::type
TearDownForTestingThreadLocalStorage384 TearDownForTesting() {
385 // The destructor must be called outside of the allocation path. Therefore,
386 // it is secure to verify with CHECK.
387
388 // All accessing threads must be terminated by now. For additional security
389 // we tear down the TLS system first. This way we ensure that
390 // MarkSlotAsFree is not called anymore and we have no accesses from the
391 // TLS system's side.
392 CHECK(dereference(tls_system_).TearDownForTesting());
393
394 // Delete all data chunks.
395 for (auto* chunk = root_.load(); chunk != nullptr;) {
396 auto* next_chunk = chunk->next_chunk.load();
397 FreeAndDeallocateChunkForTesting(chunk);
398 chunk = next_chunk;
399 }
400 }
401
402 // Reset a single item to its default value.
403 // Since items are re-used, they may be accessed from different threads,
404 // causing TSan to trigger. Therefore, the reset is exempt from TSan
405 // instrumentation.
ResetThreadLocalStorage406 DISABLE_TSAN_INSTRUMENTATION void Reset(PayloadType& item) { item = {}; }
407
408 AllocatorType allocator_;
409 TLSSystemType tls_system_;
410 std::atomic<Chunk*> const root_;
411 };
412
413 } // namespace internal
414
415 // The ThreadLocalStorage visible to the user. This uses the internal default
416 // allocator and TLS system.
417 template <typename StorageType,
418 typename AllocatorType = internal::DefaultAllocator,
419 typename TLSSystemType = internal::DefaultTLSSystem,
420 size_t AllocationChunkSize = AllocatorType::AllocationChunkSize,
421 bool IsDestructibleForTesting = false>
422 using ThreadLocalStorage =
423 internal::ThreadLocalStorage<StorageType,
424 AllocatorType,
425 TLSSystemType,
426 AllocationChunkSize,
427 IsDestructibleForTesting>;
428
429 } // namespace base::allocator::dispatcher
430
431 #endif // USE_LOCAL_TLS_EMULATION()
432 #endif // BASE_ALLOCATOR_DISPATCHER_TLS_H_
433