• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2022 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef BASE_ALLOCATOR_DISPATCHER_TLS_H_
6 #define BASE_ALLOCATOR_DISPATCHER_TLS_H_
7 
8 #include "build/build_config.h"
9 
10 #if BUILDFLAG(IS_POSIX)  // the current allocation mechanism (mmap) and TLS
11                          // support (pthread) are both defined by POSIX
12 #define USE_LOCAL_TLS_EMULATION() true
13 #else
14 #define USE_LOCAL_TLS_EMULATION() false
15 #endif
16 
17 #if USE_LOCAL_TLS_EMULATION()
18 #include <algorithm>
19 #include <atomic>
20 #include <memory>
21 #include <mutex>
22 
23 #include "base/allocator/partition_allocator/src/partition_alloc/partition_alloc_constants.h"
24 #include "base/base_export.h"
25 #include "base/check.h"
26 #include "base/compiler_specific.h"
27 #include "base/strings/string_piece.h"
28 
29 #include <pthread.h>
30 
31 #if HAS_FEATURE(thread_sanitizer)
32 #define DISABLE_TSAN_INSTRUMENTATION __attribute__((no_sanitize("thread")))
33 #else
34 #define DISABLE_TSAN_INSTRUMENTATION
35 #endif
36 
37 #define STR_HELPER(x) #x
38 #define STR(x) STR_HELPER(x)
39 
40 // Verify that a condition holds and cancel the process in case it doesn't. The
41 // functionality is similar to RAW_CHECK but includes more information in the
42 // logged messages. It is non allocating to prevent recursions.
43 #define TLS_RAW_CHECK(error_message, condition) \
44   TLS_RAW_CHECK_IMPL(error_message, condition, __FILE__, __LINE__)
45 
46 #define TLS_RAW_CHECK_IMPL(error_message, condition, file, line)        \
47   do {                                                                  \
48     if (!(condition)) {                                                 \
49       constexpr const char* message =                                   \
50           "TLS System: " error_message " Failed condition '" #condition \
51           "' in (" file "@" STR(line) ").\n";                           \
52       ::logging::RawCheckFailure(message);                              \
53     }                                                                   \
54   } while (0)
55 
56 namespace base::debug {
57 struct CrashKeyString;
58 }
59 
60 namespace base::allocator::dispatcher {
61 namespace internal {
62 
63 // Allocate memory using POSIX' mmap and unmap functionality. The allocator
64 // implements the allocator interface required by ThreadLocalStorage.
65 struct BASE_EXPORT MMapAllocator {
66 // The minimum size of a memory chunk when allocating. Even for chunks with
67 // fewer bytes, at least AllocationChunkSize bytes are allocated. For mmap, this
68 // is usually the page size of the system.
69 // For various OS-CPU combinations, partition_alloc::PartitionPageSize() is not
70 // constexpr. Hence, we can not use this value but define it locally.
71 #if defined(PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR) && \
72     PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR
73   constexpr static size_t AllocationChunkSize =
74       partition_alloc::PartitionPageSize();
75 #elif BUILDFLAG(IS_APPLE)
76   constexpr static size_t AllocationChunkSize = 16384;
77 #elif BUILDFLAG(IS_LINUX) && defined(ARCH_CPU_ARM64)
78   constexpr static size_t AllocationChunkSize = 16384;
79 #else
80   constexpr static size_t AllocationChunkSize = 4096;
81 #endif
82 
83   // Allocate size_in_bytes bytes of raw memory. Return nullptr if allocation
84   // fails.
85   void* AllocateMemory(size_t size_in_bytes);
86   // Free the raw memory pointed to by pointer_to_allocated. Returns a boolean
87   // value indicating if the free was successful.
88   bool FreeMemoryForTesting(void* pointer_to_allocated, size_t size_in_bytes);
89 };
90 
91 // The allocator used by default for the thread local storage.
92 using DefaultAllocator = MMapAllocator;
93 
94 using OnThreadTerminationFunction = void (*)(void*);
95 
96 // The TLS system used by default for the thread local storage. It stores and
97 // retrieves thread specific data pointers.
98 class BASE_EXPORT PThreadTLSSystem {
99  public:
100   PThreadTLSSystem();
101 
102   PThreadTLSSystem(const PThreadTLSSystem&) = delete;
103   PThreadTLSSystem(PThreadTLSSystem&&);
104   PThreadTLSSystem& operator=(const PThreadTLSSystem&) = delete;
105   PThreadTLSSystem& operator=(PThreadTLSSystem&&);
106 
107   // Initialize the TLS system to store a data set for different threads.
108   // @param thread_termination_function An optional function which will be
109   // invoked upon termination of a thread.
110   bool Setup(OnThreadTerminationFunction thread_termination_function,
111              const base::StringPiece instance_id);
112   // Tear down the TLS system. After completing tear down, the thread
113   // termination function passed to Setup will not be invoked anymore.
114   bool TearDownForTesting();
115 
116   // Get the pointer to the data associated to the current thread. Returns
117   // nullptr if the TLS system is not initialized or no data was set before.
118   void* GetThreadSpecificData();
119   // Set the pointer to the data associated to the current thread. Return true
120   // if stored successfully, false otherwise.
121   bool SetThreadSpecificData(void* data);
122 
123  private:
124   base::debug::CrashKeyString* crash_key_ = nullptr;
125   pthread_key_t data_access_key_ = 0;
126 #if DCHECK_IS_ON()
127   // From POSIX standard at https://www.open-std.org/jtc1/sc22/open/n4217.pdf:
128   // The effect of calling pthread_getspecific() or pthread_setspecific() with a
129   // key value not obtained from pthread_key_create() or after key has been
130   // deleted with pthread_key_delete() is undefined.
131   //
132   // Unfortunately, POSIX doesn't define a special value of pthread_key_t
133   // indicating an invalid key which would allow us to detect accesses outside
134   // of initialized state. Hence, to prevent us from drifting into the evil
135   // realm of undefined behaviour we store whether we're somewhere between Setup
136   // and Teardown.
137   std::atomic_bool initialized_{false};
138 #endif
139 };
140 
141 using DefaultTLSSystem = PThreadTLSSystem;
142 
143 // In some scenarios, most notably when testing, the allocator and TLS system
144 // passed to |ThreadLocalStorage| are not copyable and have to be wrapped, i.e.
145 // using std::reference_wrapper. |dereference| is a small helper to retrieve the
146 // underlying value.
147 template <typename T>
dereference(T & ref)148 T& dereference(T& ref) {
149   return ref;
150 }
151 
152 template <typename T>
dereference(std::reference_wrapper<T> & ref)153 T& dereference(std::reference_wrapper<T>& ref) {
154   // std::reference_wrapper requires a valid reference for construction,
155   // therefore, no need in checking here.
156   return ref.get();
157 }
158 
159 // Store thread local data. The data is organized in chunks, where each chunk
160 // holds |ItemsPerChunk|. Each item may be free or used.
161 //
162 // When a thread requests data, the chunks are searched for a free data item,
163 // which is registered for this thread and marked as |used|. Further requests by
164 // this thread will then always return the same item. When a thread terminates,
165 // the item will be reset and return to the pool of free items.
166 //
167 // Upon construction, the first chunk is created. If a thread requests data and
168 // there is no free item available, another chunk is created. Upon destruction,
169 // all memory is freed. Pointers to data items become invalid!
170 //
171 // Constructor and destructor are not thread safe.
172 //
173 // @tparam PayloadType The item type to be stored.
174 // @tparam AllocatorType The allocator being used. An allocator must provide
175 // the following interface:
176 //  void* AllocateMemory(size_t size_in_bytes); // Allocate size_in_bytes bytes
177 //  of raw memory.
178 //  void FreeMemory(void* pointer_to_allocated, size_t size_in_bytes); // Free
179 //  the raw memory pointed to by pointer_to_allocated.
180 // Any failure in allocation or free must terminate the process.
181 // @tparam TLSSystemType The TLS system being used. A TLS system must provide
182 // the following interface:
183 //  bool Setup(OnThreadTerminationFunction thread_termination_function);
184 //  bool Destroy();
185 //  void* GetThreadSpecificData();
186 //  bool SetThreadSpecificData(void* data);
187 // @tparam AllocationChunkSize The minimum size of a memory chunk that the
188 // allocator can handle. We try to size the chunks so that each chunk uses this
189 // size to the maximum.
190 // @tparam IsDestructibleForTesting For testing purposes we allow the destructor
191 // to perform clean up upon destruction. Otherwise, using the destructor will
192 // result in a compilation failure.
193 template <typename PayloadType,
194           typename AllocatorType,
195           typename TLSSystemType,
196           size_t AllocationChunkSize,
197           bool IsDestructibleForTesting>
198 struct ThreadLocalStorage {
ThreadLocalStorageThreadLocalStorage199   explicit ThreadLocalStorage(const base::StringPiece instance_id)
200       : root_(AllocateAndInitializeChunk()) {
201     Initialize(instance_id);
202   }
203 
204   // Create a new instance of |ThreadLocalStorage| using the passed allocator
205   // and TLS system. This initializes the underlying TLS system and creates the
206   // first chunk of data.
ThreadLocalStorageThreadLocalStorage207   ThreadLocalStorage(const base::StringPiece instance_id,
208                      AllocatorType allocator,
209                      TLSSystemType tls_system)
210       : allocator_(std::move(allocator)),
211         tls_system_(std::move(tls_system)),
212         root_(AllocateAndInitializeChunk()) {
213     Initialize(instance_id);
214   }
215 
216   // Deletes an instance of |ThreadLocalStorage| and delete all the data chunks
217   // created.
~ThreadLocalStorageThreadLocalStorage218   ~ThreadLocalStorage() {
219     if constexpr (IsDestructibleForTesting) {
220       TearDownForTesting();
221     } else if constexpr (!IsDestructibleForTesting) {
222       static_assert(
223           IsDestructibleForTesting,
224           "ThreadLocalStorage cannot be destructed outside of test code.");
225     }
226   }
227 
228   // Explicitly prevent all forms of Copy/Move construction/assignment. For an
229   // exact copy of ThreadLocalStorage we would need to copy the mapping of
230   // thread to item, which we can't do at the moment. On the other side, our
231   // atomic members do not support moving out of the box.
232   ThreadLocalStorage(const ThreadLocalStorage&) = delete;
233   ThreadLocalStorage(ThreadLocalStorage&& other) = delete;
234   ThreadLocalStorage& operator=(const ThreadLocalStorage&) = delete;
235   ThreadLocalStorage& operator=(ThreadLocalStorage&&) = delete;
236 
237   // Get the data item for the current thread. If no data is registered so far,
238   // find a free item in the chunks and register it for the current thread.
GetThreadLocalDataThreadLocalStorage239   PayloadType* GetThreadLocalData() {
240     auto& tls_system = dereference(tls_system_);
241 
242     auto* slot = static_cast<SingleSlot*>(tls_system.GetThreadSpecificData());
243 
244     if (UNLIKELY(slot == nullptr)) {
245       slot = FindAndAllocateFreeSlot(root_.load(std::memory_order_relaxed));
246 
247       // We might be called in the course of handling a memory allocation. We do
248       // not use CHECK since they might allocate and cause a recursion.
249       TLS_RAW_CHECK("Failed to set thread specific data.",
250                     tls_system.SetThreadSpecificData(slot));
251 
252       // Reset the content to wipe out any previous data.
253       Reset(slot->item);
254     }
255 
256     return &(slot->item);
257   }
258 
259  private:
260   // Encapsulate the payload item and some administrative data.
261   struct SingleSlot {
262     PayloadType item;
263 #if !defined(__cpp_lib_atomic_value_initialization) || \
264     __cpp_lib_atomic_value_initialization < 201911L
265     std::atomic_flag is_used = ATOMIC_FLAG_INIT;
266 #else
267     std::atomic_flag is_used;
268 #endif
269   };
270 
271   template <size_t NumberOfItems>
272   struct ChunkT {
273     SingleSlot slots[NumberOfItems];
274     // Pointer to the next chunk.
275     std::atomic<ChunkT*> next_chunk = nullptr;
276     // Helper flag to ensure we create the next chunk only once in a multi
277     // threaded environment.
278     std::once_flag create_next_chunk_flag;
279   };
280 
281   template <size_t LowerNumberOfItems,
282             size_t UpperNumberOfItems,
283             size_t NumberOfBytes>
CalculateEffectiveNumberOfItemsBinSearchThreadLocalStorage284   static constexpr size_t CalculateEffectiveNumberOfItemsBinSearch() {
285     if constexpr (LowerNumberOfItems == UpperNumberOfItems) {
286       return LowerNumberOfItems;
287     }
288 
289     constexpr size_t CurrentNumberOfItems =
290         (UpperNumberOfItems - LowerNumberOfItems) / 2 + LowerNumberOfItems;
291 
292     if constexpr (sizeof(ChunkT<CurrentNumberOfItems>) > NumberOfBytes) {
293       return CalculateEffectiveNumberOfItemsBinSearch<
294           LowerNumberOfItems, CurrentNumberOfItems, NumberOfBytes>();
295     }
296 
297     if constexpr (sizeof(ChunkT<CurrentNumberOfItems + 1>) < NumberOfBytes) {
298       return CalculateEffectiveNumberOfItemsBinSearch<
299           CurrentNumberOfItems + 1, UpperNumberOfItems, NumberOfBytes>();
300     }
301 
302     return CurrentNumberOfItems;
303   }
304 
305   // Calculate the maximum number of items we can store in one chunk without the
306   // size of the chunk exceeding NumberOfBytes. To avoid things like alignment
307   // and packing tampering with the calculation, instead of calculating the
308   // correct number of items we use sizeof-operator against ChunkT to search for
309   // the correct size. Unfortunately, the number of recursions is limited by the
310   // compiler. Therefore, we use a binary search instead of a simple linear
311   // search.
312   template <size_t MinimumNumberOfItems, size_t NumberOfBytes>
CalculateEffectiveNumberOfItemsThreadLocalStorage313   static constexpr size_t CalculateEffectiveNumberOfItems() {
314     if constexpr (sizeof(ChunkT<MinimumNumberOfItems>) < NumberOfBytes) {
315       constexpr size_t LowerNumberOfItems = MinimumNumberOfItems;
316       constexpr size_t UpperNumberOfItems =
317           NumberOfBytes / sizeof(PayloadType) + 1;
318       return CalculateEffectiveNumberOfItemsBinSearch<
319           LowerNumberOfItems, UpperNumberOfItems, NumberOfBytes>();
320     }
321 
322     return MinimumNumberOfItems;
323   }
324 
325  public:
326   // The minimum number of items per chunk. It should be high enough to
327   // accommodate most items in the root chunk whilst not wasting to much space
328   // on unnecessary items.
329   static constexpr size_t MinimumNumberOfItemsPerChunk = 75;
330   // The effective number of items per chunk. We use the AllocationChunkSize as
331   // a hint to calculate to effective number of items so we occupy one of these
332   // memory chunks to the maximum extent possible.
333   static constexpr size_t ItemsPerChunk =
334       CalculateEffectiveNumberOfItems<MinimumNumberOfItemsPerChunk,
335                                       AllocationChunkSize>();
336 
337  private:
338   using Chunk = ChunkT<ItemsPerChunk>;
339 
340   static_assert(ItemsPerChunk >= MinimumNumberOfItemsPerChunk);
341 
342   // Mark an item's slot ready for reuse. This function is used as thread
343   // termination function in the TLS system. We do not destroy anything at this
344   // point but simply mark the slot as unused.
MarkSlotAsFreeThreadLocalStorage345   static void MarkSlotAsFree(void* data) {
346     // We always store SingleSlots in the TLS system. Therefore, we cast to
347     // SingleSlot and reset the is_used flag.
348     auto* const slot = static_cast<SingleSlot*>(data);
349 
350     // We might be called in the course of handling a memory allocation.
351     // Therefore, do not use CHECK since it might allocate and cause a
352     // recursion.
353     TLS_RAW_CHECK("Received an invalid slot.",
354                   slot && slot->is_used.test_and_set());
355 
356     slot->is_used.clear(std::memory_order_relaxed);
357   }
358 
359   // Perform common initialization during construction of an instance.
InitializeThreadLocalStorage360   void Initialize(const base::StringPiece instance_id) {
361     // The constructor must be called outside of the allocation path. Therefore,
362     // it is secure to verify with CHECK.
363 
364     // Passing MarkSlotAsFree as thread_termination_function we ensure the
365     // slot/item assigned to the finished thread will be returned to the pool of
366     // unused items.
367     CHECK(dereference(tls_system_).Setup(&MarkSlotAsFree, instance_id));
368   }
369 
AllocateAndInitializeChunkThreadLocalStorage370   Chunk* AllocateAndInitializeChunk() {
371     void* const uninitialized_memory =
372         dereference(allocator_).AllocateMemory(sizeof(Chunk));
373 
374     // We might be called in the course of handling a memory allocation. We do
375     // not use CHECK since they might allocate and cause a recursion.
376     TLS_RAW_CHECK("Failed to allocate memory for new chunk.",
377                   uninitialized_memory != nullptr);
378 
379     return new (uninitialized_memory) Chunk{};
380   }
381 
FreeAndDeallocateChunkForTestingThreadLocalStorage382   void FreeAndDeallocateChunkForTesting(Chunk* chunk_to_erase) {
383     chunk_to_erase->~Chunk();
384 
385     // FreeAndDeallocateChunkForTesting must be called outside of the allocation
386     // path. Therefore, it is secure to verify with CHECK.
387     CHECK(dereference(allocator_)
388               .FreeMemoryForTesting(chunk_to_erase, sizeof(Chunk)));
389   }
390 
391   // Find a free slot in the passed chunk, reserve it and return it to the
392   // caller. If no free slot can be found, head on to the next chunk. If the
393   // next chunk doesn't exist, create it.
FindAndAllocateFreeSlotThreadLocalStorage394   SingleSlot* FindAndAllocateFreeSlot(Chunk* const chunk) {
395     SingleSlot* const slot = std::find_if_not(
396         std::begin(chunk->slots), std::end(chunk->slots),
397         [](SingleSlot& candidate_slot) {
398           return candidate_slot.is_used.test_and_set(std::memory_order_relaxed);
399         });
400 
401     // So we found a slot. Happily return it to the caller.
402     if (slot != std::end(chunk->slots)) {
403       return slot;
404     }
405 
406     // Ok, there are no more free slots in this chunk. First, ensure the next
407     // chunk is valid and create one if necessary.
408     std::call_once(chunk->create_next_chunk_flag, [&] {
409       // From https://eel.is/c++draft/thread.once.callonce#3
410       //
411       // Synchronization: For any given once_­flag: all active executions occur
412       // in a total order; completion of an active execution synchronizes with
413       // the start of the next one in this total order; and the returning
414       // execution synchronizes with the return from all passive executions.
415       //
416       // Therefore, we do only a relaxed store here, call_once synchronizes with
417       // other threads.
418       chunk->next_chunk.store(AllocateAndInitializeChunk(),
419                               std::memory_order_relaxed);
420     });
421 
422     return FindAndAllocateFreeSlot(chunk->next_chunk);
423   }
424 
425   template <bool IsDestructibleForTestingP = IsDestructibleForTesting>
426   typename std::enable_if<IsDestructibleForTestingP>::type
TearDownForTestingThreadLocalStorage427   TearDownForTesting() {
428     // The destructor must be called outside of the allocation path. Therefore,
429     // it is secure to verify with CHECK.
430 
431     // All accessing threads must be terminated by now. For additional security
432     // we tear down the TLS system first. This way we ensure that
433     // MarkSlotAsFree is not called anymore and we have no accesses from the
434     // TLS system's side.
435     CHECK(dereference(tls_system_).TearDownForTesting());
436 
437     // Delete all data chunks.
438     for (auto* chunk = root_.load(); chunk != nullptr;) {
439       auto* next_chunk = chunk->next_chunk.load();
440       FreeAndDeallocateChunkForTesting(chunk);
441       chunk = next_chunk;
442     }
443   }
444 
445   // Reset a single item to its default value.
446   // Since items are re-used, they may be accessed from different threads,
447   // causing TSan to trigger. Therefore, the reset is exempt from TSan
448   // instrumentation.
ResetThreadLocalStorage449   DISABLE_TSAN_INSTRUMENTATION void Reset(PayloadType& item) { item = {}; }
450 
451   AllocatorType allocator_;
452   TLSSystemType tls_system_;
453   std::atomic<Chunk*> const root_;
454 };
455 
456 }  // namespace internal
457 
458 // The ThreadLocalStorage visible to the user. This uses the internal default
459 // allocator and TLS system.
460 template <typename StorageType,
461           typename AllocatorType = internal::DefaultAllocator,
462           typename TLSSystemType = internal::DefaultTLSSystem,
463           size_t AllocationChunkSize = AllocatorType::AllocationChunkSize,
464           bool IsDestructibleForTesting = false>
465 using ThreadLocalStorage =
466     internal::ThreadLocalStorage<StorageType,
467                                  AllocatorType,
468                                  TLSSystemType,
469                                  AllocationChunkSize,
470                                  IsDestructibleForTesting>;
471 
472 }  // namespace base::allocator::dispatcher
473 
474 #undef TLS_RAW_CHECK_IMPL
475 #undef TLS_RAW_CHECK
476 #undef STR
477 #undef STR_HELPER
478 
479 #endif  // USE_LOCAL_TLS_EMULATION()
480 #endif  // BASE_ALLOCATOR_DISPATCHER_TLS_H_
481