• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef BASE_ALLOCATOR_DISPATCHER_TLS_H_
6 #define BASE_ALLOCATOR_DISPATCHER_TLS_H_
7 
8 #include "build/build_config.h"
9 
10 #if BUILDFLAG(IS_POSIX)  // the current allocation mechanism (mmap) and TLS
11                          // support (pthread) are both defined by POSIX
12 #define USE_LOCAL_TLS_EMULATION() true
13 #else
14 #define USE_LOCAL_TLS_EMULATION() false
15 #endif
16 
17 #if USE_LOCAL_TLS_EMULATION()
18 #include <algorithm>
19 #include <atomic>
20 #include <memory>
21 #include <mutex>
22 
23 #include "base/allocator/partition_allocator/partition_alloc_constants.h"
24 #include "base/base_export.h"
25 #include "base/check.h"
26 #include "base/compiler_specific.h"
27 
28 #include <pthread.h>
29 
30 #if HAS_FEATURE(thread_sanitizer)
31 #define DISABLE_TSAN_INSTRUMENTATION __attribute__((no_sanitize("thread")))
32 #else
33 #define DISABLE_TSAN_INSTRUMENTATION
34 #endif
35 
36 namespace base::allocator::dispatcher {
37 namespace internal {
38 
39 // Allocate memory using POSIX' mmap and unmap functionality. The allocator
40 // implements the allocator interface required by ThreadLocalStorage.
41 struct BASE_EXPORT MMapAllocator {
42 // The minimum size of a memory chunk when allocating. Even for chunks with
43 // fewer bytes, at least AllocationChunkSize bytes are allocated. For mmap, this
44 // is usually the page size of the system.
45 // For various OS-CPU combinations, partition_alloc::PartitionPageSize() is not
46 // constexpr. Hence, we can not use this value but define it locally.
47 #if defined(PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR) && \
48     PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR
49   constexpr static size_t AllocationChunkSize =
50       partition_alloc::PartitionPageSize();
51 #elif BUILDFLAG(IS_APPLE)
52   constexpr static size_t AllocationChunkSize = 16384;
53 #elif BUILDFLAG(IS_LINUX) && defined(ARCH_CPU_ARM64)
54   constexpr static size_t AllocationChunkSize = 16384;
55 #else
56   constexpr static size_t AllocationChunkSize = 4096;
57 #endif
58 
59   // Allocate size_in_bytes bytes of raw memory. Return nullptr if allocation
60   // fails.
61   void* AllocateMemory(size_t size_in_bytes);
62   // Free the raw memory pointed to by pointer_to_allocated. Returns a boolean
63   // value indicating if the free was successful.
64   bool FreeMemoryForTesting(void* pointer_to_allocated, size_t size_in_bytes);
65 };
66 
67 // The allocator used by default for the thread local storage.
68 using DefaultAllocator = MMapAllocator;
69 
70 using OnThreadTerminationFunction = void (*)(void*);
71 
72 // The TLS system used by default for the thread local storage. It stores and
73 // retrieves thread specific data pointers.
74 struct BASE_EXPORT PThreadTLSSystem {
75   // Initialize the TLS system to store a data set for different threads.
76   // @param thread_termination_function An optional function which will be
77   // invoked upon termination of a thread.
78   bool Setup(OnThreadTerminationFunction thread_termination_function);
79   // Tear down the TLS system. After completing tear down, the thread
80   // termination function passed to Setup will not be invoked anymore.
81   bool TearDownForTesting();
82 
83   // Get the pointer to the data associated to the current thread. Returns
84   // nullptr if the TLS system is not initialized or no data was set before.
85   void* GetThreadSpecificData();
86   // Set the pointer to the data associated to the current thread. Return true
87   // if stored successfully, false otherwise.
88   bool SetThreadSpecificData(void* data);
89 
90  private:
91   pthread_key_t data_access_key_ = 0;
92 #if DCHECK_IS_ON()
93   // From POSIX standard at https://www.open-std.org/jtc1/sc22/open/n4217.pdf:
94   // The effect of calling pthread_getspecific() or pthread_setspecific() with a
95   // key value not obtained from pthread_key_create() or after key has been
96   // deleted with pthread_key_delete() is undefined.
97   //
98   // Unfortunately, POSIX doesn't define a special value of pthread_key_t
99   // indicating an invalid key which would allow us to detect accesses outside
100   // of initialized state. Hence, to prevent us from drifting into the evil
101   // realm of undefined behaviour we store whether we're somewhere between Setup
102   // and Teardown.
103   std::atomic_bool initialized_{false};
104 #endif
105 };
106 
107 using DefaultTLSSystem = PThreadTLSSystem;
108 
109 // In some scenarios, most notably when testing, the allocator and TLS system
110 // passed to |ThreadLocalStorage| are not copyable and have to be wrapped, i.e.
111 // using std::reference_wrapper. |dereference| is a small helper to retrieve the
112 // underlying value.
113 template <typename T>
dereference(T & ref)114 T& dereference(T& ref) {
115   return ref;
116 }
117 
118 template <typename T>
dereference(std::reference_wrapper<T> & ref)119 T& dereference(std::reference_wrapper<T>& ref) {
120   // std::reference_wrapper requires a valid reference for construction,
121   // therefore, no need in checking here.
122   return ref.get();
123 }
124 
125 // Store thread local data. The data is organized in chunks, where each chunk
126 // holds |ItemsPerChunk|. Each item may be free or used.
127 //
128 // When a thread requests data, the chunks are searched for a free data item,
129 // which is registered for this thread and marked as |used|. Further requests by
130 // this thread will then always return the same item. When a thread terminates,
131 // the item will be reset and return to the pool of free items.
132 //
133 // Upon construction, the first chunk is created. If a thread requests data and
134 // there is no free item available, another chunk is created. Upon destruction,
135 // all memory is freed. Pointers to data items become invalid!
136 //
137 // Constructor and destructor are not thread safe.
138 //
139 // @tparam PayloadType The item type to be stored.
140 // @tparam AllocatorType The allocator being used. An allocator must provide
141 // the following interface:
142 //  void* AllocateMemory(size_t size_in_bytes); // Allocate size_in_bytes bytes
143 //  of raw memory.
144 //  void FreeMemory(void* pointer_to_allocated, size_t size_in_bytes); // Free
145 //  the raw memory pointed to by pointer_to_allocated.
146 // Any failure in allocation or free must terminate the process.
147 // @tparam TLSSystemType The TLS system being used. A TLS system must provide
148 // the following interface:
149 //  bool Setup(OnThreadTerminationFunction thread_termination_function);
150 //  bool Destroy();
151 //  void* GetThreadSpecificData();
152 //  bool SetThreadSpecificData(void* data);
153 // @tparam AllocationChunkSize The minimum size of a memory chunk that the
154 // allocator can handle. We try to size the chunks so that each chunk uses this
155 // size to the maximum.
156 // @tparam IsDestructibleForTesting For testing purposes we allow the destructor
157 // to perform clean up upon destruction. Otherwise, using the destructor will
158 // result in a compilation failure.
159 template <typename PayloadType,
160           typename AllocatorType,
161           typename TLSSystemType,
162           size_t AllocationChunkSize,
163           bool IsDestructibleForTesting>
164 struct ThreadLocalStorage {
ThreadLocalStorageThreadLocalStorage165   ThreadLocalStorage() : root_(AllocateAndInitializeChunk()) { Initialize(); }
166 
167   // Create a new instance of |ThreadLocalStorage| using the passed allocator
168   // and TLS system. This initializes the underlying TLS system and creates the
169   // first chunk of data.
ThreadLocalStorageThreadLocalStorage170   ThreadLocalStorage(AllocatorType allocator, TLSSystemType tlsSystem)
171       : allocator_(std::move(allocator)),
172         tls_system_(std::move(tlsSystem)),
173         root_(AllocateAndInitializeChunk()) {
174     Initialize();
175   }
176 
177   // Deletes an instance of |ThreadLocalStorage| and delete all the data chunks
178   // created.
~ThreadLocalStorageThreadLocalStorage179   ~ThreadLocalStorage() {
180     if constexpr (IsDestructibleForTesting) {
181       TearDownForTesting();
182     } else if constexpr (!IsDestructibleForTesting) {
183       static_assert(
184           IsDestructibleForTesting,
185           "ThreadLocalStorage cannot be destructed outside of test code.");
186     }
187   }
188 
189   // Explicitly prevent all forms of Copy/Move construction/assignment. For an
190   // exact copy of ThreadLocalStorage we would need to copy the mapping of
191   // thread to item, which we can't do at the moment. On the other side, our
192   // atomic members do not support moving out of the box.
193   ThreadLocalStorage(const ThreadLocalStorage&) = delete;
194   ThreadLocalStorage(ThreadLocalStorage&& other) = delete;
195   ThreadLocalStorage& operator=(const ThreadLocalStorage&) = delete;
196   ThreadLocalStorage& operator=(ThreadLocalStorage&&) = delete;
197 
198   // Get the data item for the current thread. If no data is registered so far,
199   // find a free item in the chunks and register it for the current thread.
GetThreadLocalDataThreadLocalStorage200   PayloadType* GetThreadLocalData() {
201     auto& tls_system = dereference(tls_system_);
202 
203     auto* slot = static_cast<SingleSlot*>(tls_system.GetThreadSpecificData());
204 
205     if (UNLIKELY(slot == nullptr)) {
206       slot = FindAndAllocateFreeSlot(root_.load(std::memory_order_relaxed));
207 
208       // We might be called in the course of handling a memory allocation. We do
209       // not use CHECK since they might allocate and cause a recursion.
210       RAW_CHECK(tls_system.SetThreadSpecificData(slot));
211 
212       // Reset the content to wipe out any previous data.
213       Reset(slot->item);
214     }
215 
216     return &(slot->item);
217   }
218 
219  private:
220   // Encapsulate the payload item and some administrative data.
221   struct SingleSlot {
222     PayloadType item;
223 #if !defined(__cpp_lib_atomic_value_initialization) || \
224     __cpp_lib_atomic_value_initialization < 201911L
225     std::atomic_flag is_used = ATOMIC_FLAG_INIT;
226 #else
227     std::atomic_flag is_used;
228 #endif
229   };
230 
231   template <size_t NumberOfItems>
232   struct ChunkT {
233     SingleSlot slots[NumberOfItems];
234     // Pointer to the next chunk.
235     std::atomic<ChunkT*> next_chunk = nullptr;
236     // Helper flag to ensure we create the next chunk only once in a multi
237     // threaded environment.
238     std::once_flag create_next_chunk_flag;
239   };
240 
241   template <size_t LowerNumberOfItems,
242             size_t UpperNumberOfItems,
243             size_t NumberOfBytes>
CalculateEffectiveNumberOfItemsBinSearchThreadLocalStorage244   static constexpr size_t CalculateEffectiveNumberOfItemsBinSearch() {
245     if constexpr (LowerNumberOfItems == UpperNumberOfItems) {
246       return LowerNumberOfItems;
247     }
248 
249     constexpr size_t CurrentNumberOfItems =
250         (UpperNumberOfItems - LowerNumberOfItems) / 2 + LowerNumberOfItems;
251 
252     if constexpr (sizeof(ChunkT<CurrentNumberOfItems>) > NumberOfBytes) {
253       return CalculateEffectiveNumberOfItemsBinSearch<
254           LowerNumberOfItems, CurrentNumberOfItems, NumberOfBytes>();
255     }
256 
257     if constexpr (sizeof(ChunkT<CurrentNumberOfItems + 1>) < NumberOfBytes) {
258       return CalculateEffectiveNumberOfItemsBinSearch<
259           CurrentNumberOfItems + 1, UpperNumberOfItems, NumberOfBytes>();
260     }
261 
262     return CurrentNumberOfItems;
263   }
264 
265   // Calculate the maximum number of items we can store in one chunk without the
266   // size of the chunk exceeding NumberOfBytes. To avoid things like alignment
267   // and packing tampering with the calculation, instead of calculating the
268   // correct number of items we use sizeof-operator against ChunkT to search for
269   // the correct size. Unfortunately, the number of recursions is limited by the
270   // compiler. Therefore, we use a binary search instead of a simple linear
271   // search.
272   template <size_t MinimumNumberOfItems, size_t NumberOfBytes>
CalculateEffectiveNumberOfItemsThreadLocalStorage273   static constexpr size_t CalculateEffectiveNumberOfItems() {
274     if constexpr (sizeof(ChunkT<MinimumNumberOfItems>) < NumberOfBytes) {
275       constexpr size_t LowerNumberOfItems = MinimumNumberOfItems;
276       constexpr size_t UpperNumberOfItems =
277           NumberOfBytes / sizeof(PayloadType) + 1;
278       return CalculateEffectiveNumberOfItemsBinSearch<
279           LowerNumberOfItems, UpperNumberOfItems, NumberOfBytes>();
280     }
281 
282     return MinimumNumberOfItems;
283   }
284 
285  public:
286   // The minimum number of items per chunk. It should be high enough to
287   // accommodate most items in the root chunk whilst not wasting to much space
288   // on unnecessary items.
289   static constexpr size_t MinimumNumberOfItemsPerChunk = 75;
290   // The effective number of items per chunk. We use the AllocationChunkSize as
291   // a hint to calculate to effective number of items so we occupy one of these
292   // memory chunks to the maximum extent possible.
293   static constexpr size_t ItemsPerChunk =
294       CalculateEffectiveNumberOfItems<MinimumNumberOfItemsPerChunk,
295                                       AllocationChunkSize>();
296 
297  private:
298   using Chunk = ChunkT<ItemsPerChunk>;
299 
300   static_assert(ItemsPerChunk >= MinimumNumberOfItemsPerChunk);
301 
302   // Mark an item's slot ready for reuse. This function is used as thread
303   // termination function in the TLS system. We do not destroy anything at this
304   // point but simply mark the slot as unused.
MarkSlotAsFreeThreadLocalStorage305   static void MarkSlotAsFree(void* data) {
306     // We always store SingleSlots in the TLS system. Therefore, we cast to
307     // SingleSlot and reset the is_used flag.
308     auto* const slot = static_cast<SingleSlot*>(data);
309 
310     // We might be called in the course of handling a memory allocation. We do
311     // not use CHECK since they might allocate and cause a recursion.
312     RAW_CHECK(slot && slot->is_used.test_and_set());
313 
314     slot->is_used.clear(std::memory_order_relaxed);
315   }
316 
317   // Perform common initialization during construction of an instance.
InitializeThreadLocalStorage318   void Initialize() {
319     // The constructor must be called outside of the allocation path. Therefore,
320     // it is secure to verify with CHECK.
321 
322     // Passing MarkSlotAsFree as thread_termination_function we ensure the
323     // slot/item assigned to the finished thread will be returned to the pool of
324     // unused items.
325     CHECK(dereference(tls_system_).Setup(&MarkSlotAsFree));
326   }
327 
AllocateAndInitializeChunkThreadLocalStorage328   Chunk* AllocateAndInitializeChunk() {
329     void* const uninitialized_memory =
330         dereference(allocator_).AllocateMemory(sizeof(Chunk));
331 
332     // We might be called in the course of handling a memory allocation. We do
333     // not use CHECK since they might allocate and cause a recursion.
334     RAW_CHECK(uninitialized_memory != nullptr);
335 
336     return new (uninitialized_memory) Chunk{};
337   }
338 
FreeAndDeallocateChunkForTestingThreadLocalStorage339   void FreeAndDeallocateChunkForTesting(Chunk* chunk_to_erase) {
340     chunk_to_erase->~Chunk();
341 
342     // FreeAndDeallocateChunkForTesting must be called outside of the allocation
343     // path. Therefore, it is secure to verify with CHECK.
344     CHECK(dereference(allocator_)
345               .FreeMemoryForTesting(chunk_to_erase, sizeof(Chunk)));
346   }
347 
348   // Find a free slot in the passed chunk, reserve it and return it to the
349   // caller. If no free slot can be found, head on to the next chunk. If the
350   // next chunk doesn't exist, create it.
FindAndAllocateFreeSlotThreadLocalStorage351   SingleSlot* FindAndAllocateFreeSlot(Chunk* const chunk) {
352     SingleSlot* const slot = std::find_if_not(
353         std::begin(chunk->slots), std::end(chunk->slots),
354         [](SingleSlot& candidate_slot) {
355           return candidate_slot.is_used.test_and_set(std::memory_order_relaxed);
356         });
357 
358     // So we found a slot. Happily return it to the caller.
359     if (slot != std::end(chunk->slots)) {
360       return slot;
361     }
362 
363     // Ok, there are no more free slots in this chunk. First, ensure the next
364     // chunk is valid and create one if necessary.
365     std::call_once(chunk->create_next_chunk_flag, [&] {
366       // From https://eel.is/c++draft/thread.once.callonce#3
367       //
368       // Synchronization: For any given once_­flag: all active executions occur
369       // in a total order; completion of an active execution synchronizes with
370       // the start of the next one in this total order; and the returning
371       // execution synchronizes with the return from all passive executions.
372       //
373       // Therefore, we do only a relaxed store here, call_once synchronizes with
374       // other threads.
375       chunk->next_chunk.store(AllocateAndInitializeChunk(),
376                               std::memory_order_relaxed);
377     });
378 
379     return FindAndAllocateFreeSlot(chunk->next_chunk);
380   }
381 
382   template <bool IsDestructibleForTestingP = IsDestructibleForTesting>
383   typename std::enable_if<IsDestructibleForTestingP>::type
TearDownForTestingThreadLocalStorage384   TearDownForTesting() {
385     // The destructor must be called outside of the allocation path. Therefore,
386     // it is secure to verify with CHECK.
387 
388     // All accessing threads must be terminated by now. For additional security
389     // we tear down the TLS system first. This way we ensure that
390     // MarkSlotAsFree is not called anymore and we have no accesses from the
391     // TLS system's side.
392     CHECK(dereference(tls_system_).TearDownForTesting());
393 
394     // Delete all data chunks.
395     for (auto* chunk = root_.load(); chunk != nullptr;) {
396       auto* next_chunk = chunk->next_chunk.load();
397       FreeAndDeallocateChunkForTesting(chunk);
398       chunk = next_chunk;
399     }
400   }
401 
402   // Reset a single item to its default value.
403   // Since items are re-used, they may be accessed from different threads,
404   // causing TSan to trigger. Therefore, the reset is exempt from TSan
405   // instrumentation.
ResetThreadLocalStorage406   DISABLE_TSAN_INSTRUMENTATION void Reset(PayloadType& item) { item = {}; }
407 
408   AllocatorType allocator_;
409   TLSSystemType tls_system_;
410   std::atomic<Chunk*> const root_;
411 };
412 
413 }  // namespace internal
414 
415 // The ThreadLocalStorage visible to the user. This uses the internal default
416 // allocator and TLS system.
417 template <typename StorageType,
418           typename AllocatorType = internal::DefaultAllocator,
419           typename TLSSystemType = internal::DefaultTLSSystem,
420           size_t AllocationChunkSize = AllocatorType::AllocationChunkSize,
421           bool IsDestructibleForTesting = false>
422 using ThreadLocalStorage =
423     internal::ThreadLocalStorage<StorageType,
424                                  AllocatorType,
425                                  TLSSystemType,
426                                  AllocationChunkSize,
427                                  IsDestructibleForTesting>;
428 
429 }  // namespace base::allocator::dispatcher
430 
431 #endif  // USE_LOCAL_TLS_EMULATION()
432 #endif  // BASE_ALLOCATOR_DISPATCHER_TLS_H_
433