• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef BASE_ALLOCATOR_DISPATCHER_TLS_H_
6 #define BASE_ALLOCATOR_DISPATCHER_TLS_H_
7 
8 #include <string_view>
9 
10 #include "build/build_config.h"
11 
12 #if BUILDFLAG(IS_POSIX)  // the current allocation mechanism (mmap) and TLS
13                          // support (pthread) are both defined by POSIX
14 #define USE_LOCAL_TLS_EMULATION() true
15 #else
16 #define USE_LOCAL_TLS_EMULATION() false
17 #endif
18 
19 #if USE_LOCAL_TLS_EMULATION()
20 #include <algorithm>
21 #include <atomic>
22 #include <memory>
23 #include <mutex>
24 
25 #include "base/base_export.h"
26 #include "base/check.h"
27 #include "base/compiler_specific.h"
28 
29 #if PA_BUILDFLAG(USE_PARTITION_ALLOC)
30 #include "partition_alloc/partition_alloc_constants.h"  // nogncheck
31 #endif
32 
33 #include <pthread.h>
34 
35 #if HAS_FEATURE(thread_sanitizer)
36 #define DISABLE_TSAN_INSTRUMENTATION __attribute__((no_sanitize("thread")))
37 #else
38 #define DISABLE_TSAN_INSTRUMENTATION
39 #endif
40 
41 #define STR_HELPER(x) #x
42 #define STR(x) STR_HELPER(x)
43 
44 // Verify that a condition holds and cancel the process in case it doesn't. The
45 // functionality is similar to RAW_CHECK but includes more information in the
46 // logged messages. It is non allocating to prevent recursions.
47 #define TLS_RAW_CHECK(error_message, condition) \
48   TLS_RAW_CHECK_IMPL(error_message, condition, __FILE__, __LINE__)
49 
50 #define TLS_RAW_CHECK_IMPL(error_message, condition, file, line)        \
51   do {                                                                  \
52     if (!(condition)) {                                                 \
53       constexpr const char* message =                                   \
54           "TLS System: " error_message " Failed condition '" #condition \
55           "' in (" file "@" STR(line) ").\n";                           \
56       ::logging::RawCheckFailure(message);                              \
57     }                                                                   \
58   } while (0)
59 
60 namespace base::debug {
61 struct CrashKeyString;
62 }
63 
64 namespace base::allocator::dispatcher {
65 namespace internal {
66 
67 // Allocate memory using POSIX' mmap and unmap functionality. The allocator
68 // implements the allocator interface required by ThreadLocalStorage.
69 struct BASE_EXPORT MMapAllocator {
70 // The minimum size of a memory chunk when allocating. Even for chunks with
71 // fewer bytes, at least AllocationChunkSize bytes are allocated. For mmap, this
72 // is usually the page size of the system.
73 // For various OS-CPU combinations, partition_alloc::PartitionPageSize() is not
74 // constexpr. Hence, we can not use this value but define it locally.
75 #if defined(PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR) && \
76     PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR
77   constexpr static size_t AllocationChunkSize =
78       partition_alloc::PartitionPageSize();
79 #elif BUILDFLAG(IS_APPLE)
80   constexpr static size_t AllocationChunkSize = 16384;
81 #elif BUILDFLAG(IS_ANDROID) && defined(ARCH_CPU_64_BITS)
82   constexpr static size_t AllocationChunkSize = 16384;
83 #elif BUILDFLAG(IS_LINUX) && defined(ARCH_CPU_ARM64)
84   constexpr static size_t AllocationChunkSize = 16384;
85 #else
86   constexpr static size_t AllocationChunkSize = 4096;
87 #endif
88 
89   // Allocate size_in_bytes bytes of raw memory. Return nullptr if allocation
90   // fails.
91   void* AllocateMemory(size_t size_in_bytes);
92   // Free the raw memory pointed to by pointer_to_allocated. Returns a boolean
93   // value indicating if the free was successful.
94   bool FreeMemoryForTesting(void* pointer_to_allocated, size_t size_in_bytes);
95 };
96 
97 // The allocator used by default for the thread local storage.
98 using DefaultAllocator = MMapAllocator;
99 
100 using OnThreadTerminationFunction = void (*)(void*);
101 
102 // The TLS system used by default for the thread local storage. It stores and
103 // retrieves thread specific data pointers.
104 class BASE_EXPORT PThreadTLSSystem {
105  public:
106   PThreadTLSSystem();
107 
108   PThreadTLSSystem(const PThreadTLSSystem&) = delete;
109   PThreadTLSSystem(PThreadTLSSystem&&);
110   PThreadTLSSystem& operator=(const PThreadTLSSystem&) = delete;
111   PThreadTLSSystem& operator=(PThreadTLSSystem&&);
112 
113   // Initialize the TLS system to store a data set for different threads.
114   // @param thread_termination_function An optional function which will be
115   // invoked upon termination of a thread.
116   bool Setup(OnThreadTerminationFunction thread_termination_function,
117              std::string_view instance_id);
118   // Tear down the TLS system. After completing tear down, the thread
119   // termination function passed to Setup will not be invoked anymore.
120   bool TearDownForTesting();
121 
122   // Get the pointer to the data associated to the current thread. Returns
123   // nullptr if the TLS system is not initialized or no data was set before.
124   void* GetThreadSpecificData();
125   // Set the pointer to the data associated to the current thread. Return true
126   // if stored successfully, false otherwise.
127   bool SetThreadSpecificData(void* data);
128 
129  private:
130   base::debug::CrashKeyString* crash_key_ = nullptr;
131   pthread_key_t data_access_key_ = 0;
132 #if DCHECK_IS_ON()
133   // From POSIX standard at https://www.open-std.org/jtc1/sc22/open/n4217.pdf:
134   // The effect of calling pthread_getspecific() or pthread_setspecific() with a
135   // key value not obtained from pthread_key_create() or after key has been
136   // deleted with pthread_key_delete() is undefined.
137   //
138   // Unfortunately, POSIX doesn't define a special value of pthread_key_t
139   // indicating an invalid key which would allow us to detect accesses outside
140   // of initialized state. Hence, to prevent us from drifting into the evil
141   // realm of undefined behaviour we store whether we're somewhere between Setup
142   // and Teardown.
143   std::atomic_bool initialized_{false};
144 #endif
145 };
146 
147 using DefaultTLSSystem = PThreadTLSSystem;
148 
149 // In some scenarios, most notably when testing, the allocator and TLS system
150 // passed to |ThreadLocalStorage| are not copyable and have to be wrapped, i.e.
151 // using std::reference_wrapper. |dereference| is a small helper to retrieve the
152 // underlying value.
153 template <typename T>
dereference(T & ref)154 T& dereference(T& ref) {
155   return ref;
156 }
157 
158 template <typename T>
dereference(std::reference_wrapper<T> & ref)159 T& dereference(std::reference_wrapper<T>& ref) {
160   // std::reference_wrapper requires a valid reference for construction,
161   // therefore, no need in checking here.
162   return ref.get();
163 }
164 
165 // Store thread local data. The data is organized in chunks, where each chunk
166 // holds |ItemsPerChunk|. Each item may be free or used.
167 //
168 // When a thread requests data, the chunks are searched for a free data item,
169 // which is registered for this thread and marked as |used|. Further requests by
170 // this thread will then always return the same item. When a thread terminates,
171 // the item will be reset and return to the pool of free items.
172 //
173 // Upon construction, the first chunk is created. If a thread requests data and
174 // there is no free item available, another chunk is created. Upon destruction,
175 // all memory is freed. Pointers to data items become invalid!
176 //
177 // Constructor and destructor are not thread safe.
178 //
179 // @tparam PayloadType The item type to be stored.
180 // @tparam AllocatorType The allocator being used. An allocator must provide
181 // the following interface:
182 //  void* AllocateMemory(size_t size_in_bytes); // Allocate size_in_bytes bytes
183 //  of raw memory.
184 //  void FreeMemory(void* pointer_to_allocated, size_t size_in_bytes); // Free
185 //  the raw memory pointed to by pointer_to_allocated.
186 // Any failure in allocation or free must terminate the process.
187 // @tparam TLSSystemType The TLS system being used. A TLS system must provide
188 // the following interface:
189 //  bool Setup(OnThreadTerminationFunction thread_termination_function);
190 //  bool Destroy();
191 //  void* GetThreadSpecificData();
192 //  bool SetThreadSpecificData(void* data);
193 // @tparam AllocationChunkSize The minimum size of a memory chunk that the
194 // allocator can handle. We try to size the chunks so that each chunk uses this
195 // size to the maximum.
196 // @tparam IsDestructibleForTesting For testing purposes we allow the destructor
197 // to perform clean up upon destruction. Otherwise, using the destructor will
198 // result in a compilation failure.
199 template <typename PayloadType,
200           typename AllocatorType,
201           typename TLSSystemType,
202           size_t AllocationChunkSize,
203           bool IsDestructibleForTesting>
204 struct ThreadLocalStorage {
ThreadLocalStorageThreadLocalStorage205   explicit ThreadLocalStorage(std::string_view instance_id)
206       : root_(AllocateAndInitializeChunk()) {
207     Initialize(instance_id);
208   }
209 
210   // Create a new instance of |ThreadLocalStorage| using the passed allocator
211   // and TLS system. This initializes the underlying TLS system and creates the
212   // first chunk of data.
ThreadLocalStorageThreadLocalStorage213   ThreadLocalStorage(std::string_view instance_id,
214                      AllocatorType allocator,
215                      TLSSystemType tls_system)
216       : allocator_(std::move(allocator)),
217         tls_system_(std::move(tls_system)),
218         root_(AllocateAndInitializeChunk()) {
219     Initialize(instance_id);
220   }
221 
222   // Deletes an instance of |ThreadLocalStorage| and delete all the data chunks
223   // created.
~ThreadLocalStorageThreadLocalStorage224   ~ThreadLocalStorage() {
225     if constexpr (IsDestructibleForTesting) {
226       TearDownForTesting();
227     } else if constexpr (!IsDestructibleForTesting) {
228       static_assert(
229           IsDestructibleForTesting,
230           "ThreadLocalStorage cannot be destructed outside of test code.");
231     }
232   }
233 
234   // Explicitly prevent all forms of Copy/Move construction/assignment. For an
235   // exact copy of ThreadLocalStorage we would need to copy the mapping of
236   // thread to item, which we can't do at the moment. On the other side, our
237   // atomic members do not support moving out of the box.
238   ThreadLocalStorage(const ThreadLocalStorage&) = delete;
239   ThreadLocalStorage(ThreadLocalStorage&& other) = delete;
240   ThreadLocalStorage& operator=(const ThreadLocalStorage&) = delete;
241   ThreadLocalStorage& operator=(ThreadLocalStorage&&) = delete;
242 
243   // Get the data item for the current thread. If no data is registered so far,
244   // find a free item in the chunks and register it for the current thread.
GetThreadLocalDataThreadLocalStorage245   PayloadType* GetThreadLocalData() {
246     auto& tls_system = dereference(tls_system_);
247 
248     auto* slot = static_cast<SingleSlot*>(tls_system.GetThreadSpecificData());
249 
250     if (slot == nullptr) [[unlikely]] {
251       slot = FindAndAllocateFreeSlot(root_.load(std::memory_order_relaxed));
252 
253       // We might be called in the course of handling a memory allocation. We do
254       // not use CHECK since they might allocate and cause a recursion.
255       TLS_RAW_CHECK("Failed to set thread specific data.",
256                     tls_system.SetThreadSpecificData(slot));
257 
258       // Reset the content to wipe out any previous data.
259       Reset(slot->item);
260     }
261 
262     return &(slot->item);
263   }
264 
265  private:
266   // Encapsulate the payload item and some administrative data.
267   struct SingleSlot {
268     PayloadType item;
269 #if !defined(__cpp_lib_atomic_value_initialization) || \
270     __cpp_lib_atomic_value_initialization < 201911L
271     std::atomic_flag is_used = ATOMIC_FLAG_INIT;
272 #else
273     std::atomic_flag is_used;
274 #endif
275   };
276 
277   template <size_t NumberOfItems>
278   struct ChunkT {
279     SingleSlot slots[NumberOfItems];
280     // Pointer to the next chunk.
281     std::atomic<ChunkT*> next_chunk = nullptr;
282     // Helper flag to ensure we create the next chunk only once in a multi
283     // threaded environment.
284     std::once_flag create_next_chunk_flag;
285   };
286 
287   template <size_t LowerNumberOfItems,
288             size_t UpperNumberOfItems,
289             size_t NumberOfBytes>
CalculateEffectiveNumberOfItemsBinSearchThreadLocalStorage290   static constexpr size_t CalculateEffectiveNumberOfItemsBinSearch() {
291     if constexpr (LowerNumberOfItems == UpperNumberOfItems) {
292       return LowerNumberOfItems;
293     }
294 
295     constexpr size_t CurrentNumberOfItems =
296         (UpperNumberOfItems - LowerNumberOfItems) / 2 + LowerNumberOfItems;
297 
298     if constexpr (sizeof(ChunkT<CurrentNumberOfItems>) > NumberOfBytes) {
299       return CalculateEffectiveNumberOfItemsBinSearch<
300           LowerNumberOfItems, CurrentNumberOfItems, NumberOfBytes>();
301     }
302 
303     if constexpr (sizeof(ChunkT<CurrentNumberOfItems + 1>) < NumberOfBytes) {
304       return CalculateEffectiveNumberOfItemsBinSearch<
305           CurrentNumberOfItems + 1, UpperNumberOfItems, NumberOfBytes>();
306     }
307 
308     return CurrentNumberOfItems;
309   }
310 
311   // Calculate the maximum number of items we can store in one chunk without the
312   // size of the chunk exceeding NumberOfBytes. To avoid things like alignment
313   // and packing tampering with the calculation, instead of calculating the
314   // correct number of items we use sizeof-operator against ChunkT to search for
315   // the correct size. Unfortunately, the number of recursions is limited by the
316   // compiler. Therefore, we use a binary search instead of a simple linear
317   // search.
318   template <size_t MinimumNumberOfItems, size_t NumberOfBytes>
CalculateEffectiveNumberOfItemsThreadLocalStorage319   static constexpr size_t CalculateEffectiveNumberOfItems() {
320     if constexpr (sizeof(ChunkT<MinimumNumberOfItems>) < NumberOfBytes) {
321       constexpr size_t LowerNumberOfItems = MinimumNumberOfItems;
322       constexpr size_t UpperNumberOfItems =
323           NumberOfBytes / sizeof(PayloadType) + 1;
324       return CalculateEffectiveNumberOfItemsBinSearch<
325           LowerNumberOfItems, UpperNumberOfItems, NumberOfBytes>();
326     }
327 
328     return MinimumNumberOfItems;
329   }
330 
331  public:
332   // The minimum number of items per chunk. It should be high enough to
333   // accommodate most items in the root chunk whilst not wasting to much space
334   // on unnecessary items.
335   static constexpr size_t MinimumNumberOfItemsPerChunk = 75;
336   // The effective number of items per chunk. We use the AllocationChunkSize as
337   // a hint to calculate to effective number of items so we occupy one of these
338   // memory chunks to the maximum extent possible.
339   static constexpr size_t ItemsPerChunk =
340       CalculateEffectiveNumberOfItems<MinimumNumberOfItemsPerChunk,
341                                       AllocationChunkSize>();
342 
343  private:
344   using Chunk = ChunkT<ItemsPerChunk>;
345 
346   static_assert(ItemsPerChunk >= MinimumNumberOfItemsPerChunk);
347 
348   // Mark an item's slot ready for reuse. This function is used as thread
349   // termination function in the TLS system. We do not destroy anything at this
350   // point but simply mark the slot as unused.
MarkSlotAsFreeThreadLocalStorage351   static void MarkSlotAsFree(void* data) {
352     // We always store SingleSlots in the TLS system. Therefore, we cast to
353     // SingleSlot and reset the is_used flag.
354     auto* const slot = static_cast<SingleSlot*>(data);
355 
356     // We might be called in the course of handling a memory allocation.
357     // Therefore, do not use CHECK since it might allocate and cause a
358     // recursion.
359     TLS_RAW_CHECK("Received an invalid slot.",
360                   slot && slot->is_used.test_and_set());
361 
362     slot->is_used.clear(std::memory_order_relaxed);
363   }
364 
365   // Perform common initialization during construction of an instance.
InitializeThreadLocalStorage366   void Initialize(std::string_view instance_id) {
367     // The constructor must be called outside of the allocation path. Therefore,
368     // it is secure to verify with CHECK.
369 
370     // Passing MarkSlotAsFree as thread_termination_function we ensure the
371     // slot/item assigned to the finished thread will be returned to the pool of
372     // unused items.
373     CHECK(dereference(tls_system_).Setup(&MarkSlotAsFree, instance_id));
374   }
375 
AllocateAndInitializeChunkThreadLocalStorage376   Chunk* AllocateAndInitializeChunk() {
377     void* const uninitialized_memory =
378         dereference(allocator_).AllocateMemory(sizeof(Chunk));
379 
380     // We might be called in the course of handling a memory allocation. We do
381     // not use CHECK since they might allocate and cause a recursion.
382     TLS_RAW_CHECK("Failed to allocate memory for new chunk.",
383                   uninitialized_memory != nullptr);
384 
385     return new (uninitialized_memory) Chunk{};
386   }
387 
FreeAndDeallocateChunkForTestingThreadLocalStorage388   void FreeAndDeallocateChunkForTesting(Chunk* chunk_to_erase) {
389     chunk_to_erase->~Chunk();
390 
391     // FreeAndDeallocateChunkForTesting must be called outside of the allocation
392     // path. Therefore, it is secure to verify with CHECK.
393     CHECK(dereference(allocator_)
394               .FreeMemoryForTesting(chunk_to_erase, sizeof(Chunk)));
395   }
396 
397   // Find a free slot in the passed chunk, reserve it and return it to the
398   // caller. If no free slot can be found, head on to the next chunk. If the
399   // next chunk doesn't exist, create it.
FindAndAllocateFreeSlotThreadLocalStorage400   SingleSlot* FindAndAllocateFreeSlot(Chunk* const chunk) {
401     SingleSlot* const slot = std::find_if_not(
402         std::begin(chunk->slots), std::end(chunk->slots),
403         [](SingleSlot& candidate_slot) {
404           return candidate_slot.is_used.test_and_set(std::memory_order_relaxed);
405         });
406 
407     // So we found a slot. Happily return it to the caller.
408     if (slot != std::end(chunk->slots)) {
409       return slot;
410     }
411 
412     // Ok, there are no more free slots in this chunk. First, ensure the next
413     // chunk is valid and create one if necessary.
414     std::call_once(chunk->create_next_chunk_flag, [&] {
415       // From https://eel.is/c++draft/thread.once.callonce#3
416       //
417       // Synchronization: For any given once_­flag: all active executions occur
418       // in a total order; completion of an active execution synchronizes with
419       // the start of the next one in this total order; and the returning
420       // execution synchronizes with the return from all passive executions.
421       //
422       // Therefore, we do only a relaxed store here, call_once synchronizes with
423       // other threads.
424       chunk->next_chunk.store(AllocateAndInitializeChunk(),
425                               std::memory_order_relaxed);
426     });
427 
428     return FindAndAllocateFreeSlot(chunk->next_chunk);
429   }
430 
431   template <bool IsDestructibleForTestingP = IsDestructibleForTesting>
432   typename std::enable_if<IsDestructibleForTestingP>::type
TearDownForTestingThreadLocalStorage433   TearDownForTesting() {
434     // The destructor must be called outside of the allocation path. Therefore,
435     // it is secure to verify with CHECK.
436 
437     // All accessing threads must be terminated by now. For additional security
438     // we tear down the TLS system first. This way we ensure that
439     // MarkSlotAsFree is not called anymore and we have no accesses from the
440     // TLS system's side.
441     CHECK(dereference(tls_system_).TearDownForTesting());
442 
443     // Delete all data chunks.
444     for (auto* chunk = root_.load(); chunk != nullptr;) {
445       auto* next_chunk = chunk->next_chunk.load();
446       FreeAndDeallocateChunkForTesting(chunk);
447       chunk = next_chunk;
448     }
449   }
450 
451   // Reset a single item to its default value.
452   // Since items are re-used, they may be accessed from different threads,
453   // causing TSan to trigger. Therefore, the reset is exempt from TSan
454   // instrumentation.
ResetThreadLocalStorage455   DISABLE_TSAN_INSTRUMENTATION void Reset(PayloadType& item) { item = {}; }
456 
457   AllocatorType allocator_;
458   TLSSystemType tls_system_;
459   std::atomic<Chunk*> const root_;
460 };
461 
462 }  // namespace internal
463 
464 // The ThreadLocalStorage visible to the user. This uses the internal default
465 // allocator and TLS system.
466 template <typename StorageType,
467           typename AllocatorType = internal::DefaultAllocator,
468           typename TLSSystemType = internal::DefaultTLSSystem,
469           size_t AllocationChunkSize = AllocatorType::AllocationChunkSize,
470           bool IsDestructibleForTesting = false>
471 using ThreadLocalStorage =
472     internal::ThreadLocalStorage<StorageType,
473                                  AllocatorType,
474                                  TLSSystemType,
475                                  AllocationChunkSize,
476                                  IsDestructibleForTesting>;
477 
478 }  // namespace base::allocator::dispatcher
479 
480 #undef TLS_RAW_CHECK_IMPL
481 #undef TLS_RAW_CHECK
482 #undef STR
483 #undef STR_HELPER
484 
485 #endif  // USE_LOCAL_TLS_EMULATION()
486 #endif  // BASE_ALLOCATOR_DISPATCHER_TLS_H_
487