1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2022 Google Inc. All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file or at 6 // https://developers.google.com/open-source/licenses/bsd 7 // 8 // This file defines the internal class ThreadSafeArena 9 10 #ifndef GOOGLE_PROTOBUF_THREAD_SAFE_ARENA_H__ 11 #define GOOGLE_PROTOBUF_THREAD_SAFE_ARENA_H__ 12 13 #include <atomic> 14 #include <cstddef> 15 #include <cstdint> 16 #include <type_traits> 17 #include <vector> 18 19 #include "absl/base/attributes.h" 20 #include "absl/synchronization/mutex.h" 21 #include "google/protobuf/arena_align.h" 22 #include "google/protobuf/arena_allocation_policy.h" 23 #include "google/protobuf/arena_cleanup.h" 24 #include "google/protobuf/arenaz_sampler.h" 25 #include "google/protobuf/port.h" 26 #include "google/protobuf/serial_arena.h" 27 28 // Must be included last. 29 #include "google/protobuf/port_def.inc" 30 31 namespace google { 32 namespace protobuf { 33 namespace internal { 34 35 // This class provides the core Arena memory allocation library. Different 36 // implementations only need to implement the public interface below. 37 // Arena is not a template type as that would only be useful if all protos 38 // in turn would be templates, which will/cannot happen. However separating 39 // the memory allocation part from the cruft of the API users expect we can 40 // use #ifdef the select the best implementation based on hardware / OS. 41 class PROTOBUF_EXPORT ThreadSafeArena { 42 public: 43 ThreadSafeArena(); 44 45 ThreadSafeArena(char* mem, size_t size); 46 47 explicit ThreadSafeArena(void* mem, size_t size, 48 const AllocationPolicy& policy); 49 50 // All protos have pointers back to the arena hence Arena must have 51 // pointer stability. 52 ThreadSafeArena(const ThreadSafeArena&) = delete; 53 ThreadSafeArena& operator=(const ThreadSafeArena&) = delete; 54 ThreadSafeArena(ThreadSafeArena&&) = delete; 55 ThreadSafeArena& operator=(ThreadSafeArena&&) = delete; 56 57 // Destructor deletes all owned heap allocated objects, and destructs objects 58 // that have non-trivial destructors, except for proto2 message objects whose 59 // destructors can be skipped. Also, frees all blocks except the initial block 60 // if it was passed in. 61 ~ThreadSafeArena(); 62 63 uint64_t Reset(); 64 65 uint64_t SpaceAllocated() const; 66 uint64_t SpaceUsed() const; 67 68 template <AllocationClient alloc_client = AllocationClient::kDefault> AllocateAligned(size_t n)69 void* AllocateAligned(size_t n) { 70 SerialArena* arena; 71 if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(&arena))) { 72 return arena->AllocateAligned<alloc_client>(n); 73 } else { 74 return AllocateAlignedFallback<alloc_client>(n); 75 } 76 } 77 ReturnArrayMemory(void * p,size_t size)78 void ReturnArrayMemory(void* p, size_t size) { 79 SerialArena* arena; 80 if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(&arena))) { 81 arena->ReturnArrayMemory(p, size); 82 } 83 } 84 85 // This function allocates n bytes if the common happy case is true and 86 // returns true. Otherwise does nothing and returns false. This strange 87 // semantics is necessary to allow callers to program functions that only 88 // have fallback function calls in tail position. This substantially improves 89 // code for the happy path. MaybeAllocateAligned(size_t n,void ** out)90 PROTOBUF_NDEBUG_INLINE bool MaybeAllocateAligned(size_t n, void** out) { 91 SerialArena* arena; 92 if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(&arena))) { 93 return arena->MaybeAllocateAligned(n, out); 94 } 95 return false; 96 } 97 98 void* AllocateAlignedWithCleanup(size_t n, size_t align, 99 void (*destructor)(void*)); 100 101 // Add object pointer and cleanup function pointer to the list. 102 void AddCleanup(void* elem, void (*cleanup)(void*)); 103 104 void* AllocateFromStringBlock(); 105 106 std::vector<void*> PeekCleanupListForTesting(); 107 108 private: 109 friend class ArenaBenchmark; 110 friend class TcParser; 111 friend class SerialArena; 112 friend struct SerialArenaChunkHeader; 113 friend class cleanup::ChunkList; 114 static uint64_t GetNextLifeCycleId(); 115 116 class SerialArenaChunk; 117 118 // Returns a new SerialArenaChunk that has {id, serial} at slot 0. It may 119 // grow based on "prev_num_slots". 120 static SerialArenaChunk* NewSerialArenaChunk(uint32_t prev_capacity, void* id, 121 SerialArena* serial); 122 static SerialArenaChunk* SentrySerialArenaChunk(); 123 124 // Returns the first ArenaBlock* for the first SerialArena. If users provide 125 // one, use it if it's acceptable. Otherwise returns a sentry block. 126 ArenaBlock* FirstBlock(void* buf, size_t size); 127 // Same as the above but returns a valid block if "policy" is not default. 128 ArenaBlock* FirstBlock(void* buf, size_t size, 129 const AllocationPolicy& policy); 130 131 // Adds SerialArena to the chunked list. May create a new chunk. 132 void AddSerialArena(void* id, SerialArena* serial); 133 134 void UnpoisonAllArenaBlocks() const; 135 136 // Members are declared here to track sizeof(ThreadSafeArena) and hotness 137 // centrally. 138 139 // Unique for each arena. Changes on Reset(). 140 uint64_t tag_and_id_ = 0; 141 142 TaggedAllocationPolicyPtr alloc_policy_; // Tagged pointer to AllocPolicy. 143 ThreadSafeArenaStatsHandle arena_stats_; 144 145 // Adding a new chunk to head_ must be protected by mutex_. 146 absl::Mutex mutex_; 147 // Pointer to a linked list of SerialArenaChunk. 148 std::atomic<SerialArenaChunk*> head_{nullptr}; 149 150 void* first_owner_; 151 // Must be declared after alloc_policy_; otherwise, it may lose info on 152 // user-provided initial block. 153 SerialArena first_arena_; 154 155 static_assert(std::is_trivially_destructible<SerialArena>{}, 156 "SerialArena needs to be trivially destructible."); 157 AllocPolicy()158 const AllocationPolicy* AllocPolicy() const { return alloc_policy_.get(); } 159 void InitializeWithPolicy(const AllocationPolicy& policy); 160 void* AllocateAlignedWithCleanupFallback(size_t n, size_t align, 161 void (*destructor)(void*)); 162 163 void Init(); 164 165 // Delete or Destruct all objects owned by the arena. 166 void CleanupList(); 167 CacheSerialArena(SerialArena * serial)168 inline void CacheSerialArena(SerialArena* serial) { 169 thread_cache().last_serial_arena = serial; 170 thread_cache().last_lifecycle_id_seen = tag_and_id_; 171 } 172 GetSerialArenaFast(SerialArena ** arena)173 PROTOBUF_NDEBUG_INLINE bool GetSerialArenaFast(SerialArena** arena) { 174 // If this thread already owns a block in this arena then try to use that. 175 // This fast path optimizes the case where multiple threads allocate from 176 // the same arena. 177 ThreadCache* tc = &thread_cache(); 178 if (PROTOBUF_PREDICT_TRUE(tc->last_lifecycle_id_seen == tag_and_id_)) { 179 *arena = tc->last_serial_arena; 180 return true; 181 } 182 return false; 183 } 184 185 // Finds SerialArena or creates one if not found. When creating a new one, 186 // create a big enough block to accommodate n bytes. 187 SerialArena* GetSerialArenaFallback(size_t n); 188 189 SerialArena* GetSerialArena(); 190 191 template <AllocationClient alloc_client = AllocationClient::kDefault> 192 void* AllocateAlignedFallback(size_t n); 193 194 // Executes callback function over SerialArenaChunk. Passes const 195 // SerialArenaChunk*. 196 template <typename Callback> 197 void WalkConstSerialArenaChunk(Callback fn) const; 198 199 // Executes callback function over SerialArenaChunk. 200 template <typename Callback> 201 void WalkSerialArenaChunk(Callback fn); 202 203 // Visits SerialArena and calls "fn", including "first_arena" and ones on 204 // chunks. Do not rely on the order of visit. The callback function should 205 // accept `const SerialArena*`. 206 template <typename Callback> 207 void VisitSerialArena(Callback fn) const; 208 209 // Releases all memory except the first block which it returns. The first 210 // block might be owned by the user and thus need some extra checks before 211 // deleting. 212 SizedPtr Free(); 213 214 // ThreadCache is accessed very frequently, so we align it such that it's 215 // located within a single cache line. 216 static constexpr size_t kThreadCacheAlignment = 32; 217 218 #ifdef _MSC_VER 219 #pragma warning(disable : 4324) 220 #endif 221 struct alignas(kThreadCacheAlignment) ThreadCache { 222 // Number of per-thread lifecycle IDs to reserve. Must be power of two. 223 // To reduce contention on a global atomic, each thread reserves a batch of 224 // IDs. The following number is calculated based on a stress test with 225 // ~6500 threads all frequently allocating a new arena. 226 static constexpr size_t kPerThreadIds = 256; 227 // Next lifecycle ID available to this thread. We need to reserve a new 228 // batch, if `next_lifecycle_id & (kPerThreadIds - 1) == 0`. 229 uint64_t next_lifecycle_id{0}; 230 // The ThreadCache is considered valid as long as this matches the 231 // lifecycle_id of the arena being used. 232 uint64_t last_lifecycle_id_seen{static_cast<uint64_t>(-1)}; 233 SerialArena* last_serial_arena{nullptr}; 234 }; 235 static_assert(sizeof(ThreadCache) <= kThreadCacheAlignment, 236 "ThreadCache may span several cache lines"); 237 238 // Lifecycle_id can be highly contended variable in a situation of lots of 239 // arena creation. Make sure that other global variables are not sharing the 240 // cacheline. 241 #ifdef _MSC_VER 242 #pragma warning(disable : 4324) 243 #endif 244 using LifecycleId = uint64_t; 245 alignas(kCacheAlignment) ABSL_CONST_INIT 246 static std::atomic<LifecycleId> lifecycle_id_; 247 #if defined(PROTOBUF_NO_THREADLOCAL) 248 // iOS does not support __thread keyword so we use a custom thread local 249 // storage class we implemented. 250 static ThreadCache& thread_cache(); 251 #elif defined(PROTOBUF_USE_DLLS) && defined(_WIN32) 252 // Thread local variables cannot be exposed through MSVC DLL interface but we 253 // can wrap them in static functions. 254 static ThreadCache& thread_cache(); 255 #else 256 PROTOBUF_CONSTINIT static PROTOBUF_THREAD_LOCAL ThreadCache thread_cache_; thread_cache()257 static ThreadCache& thread_cache() { return thread_cache_; } 258 #endif 259 260 public: 261 // kBlockHeaderSize is sizeof(ArenaBlock), aligned up to the nearest multiple 262 // of 8 to protect the invariant that pos is always at a multiple of 8. 263 static constexpr size_t kBlockHeaderSize = SerialArena::kBlockHeaderSize; 264 static constexpr size_t kSerialArenaSize = 265 (sizeof(SerialArena) + 7) & static_cast<size_t>(-8); 266 static constexpr size_t kAllocPolicySize = 267 ArenaAlignDefault::Ceil(sizeof(AllocationPolicy)); 268 static constexpr size_t kMaxCleanupNodeSize = 16; 269 static_assert(kBlockHeaderSize % 8 == 0, 270 "kBlockHeaderSize must be a multiple of 8."); 271 static_assert(kSerialArenaSize % 8 == 0, 272 "kSerialArenaSize must be a multiple of 8."); 273 }; 274 275 } // namespace internal 276 } // namespace protobuf 277 } // namespace google 278 279 #include "google/protobuf/port_undef.inc" 280 281 #endif // GOOGLE_PROTOBUF_THREAD_SAFE_ARENA_H__ 282