1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 #include "google/protobuf/arena.h"
9
10 #include <algorithm>
11 #include <atomic>
12 #include <cstddef>
13 #include <cstdint>
14 #include <limits>
15 #include <string>
16 #include <vector>
17
18 #include "absl/base/attributes.h"
19 #include "absl/base/prefetch.h"
20 #include "absl/container/internal/layout.h"
21 #include "absl/log/absl_check.h"
22 #include "absl/log/absl_log.h"
23 #include "absl/synchronization/mutex.h"
24 #include "absl/types/span.h"
25 #include "google/protobuf/arena_allocation_policy.h"
26 #include "google/protobuf/arena_cleanup.h"
27 #include "google/protobuf/arenaz_sampler.h"
28 #include "google/protobuf/port.h"
29 #include "google/protobuf/serial_arena.h"
30 #include "google/protobuf/string_block.h"
31 #include "google/protobuf/thread_safe_arena.h"
32
33
34 // Must be included last.
35 #include "google/protobuf/port_def.inc"
36
37 namespace google {
38 namespace protobuf {
39 namespace internal {
40 namespace {
41
42 #if defined(__GNUC__) && __GNUC__ >= 5
43 // kSentryArenaBlock is used for arenas which can be referenced pre-main. So,
44 // constexpr is required.
45 constexpr ArenaBlock kSentryArenaBlock;
46
SentryArenaBlock()47 ArenaBlock* SentryArenaBlock() {
48 // const_cast<> is okay as kSentryArenaBlock will never be mutated.
49 return const_cast<ArenaBlock*>(&kSentryArenaBlock);
50 }
51 #else
52 // TODO Remove this once we're not using GCC 4.9 for tests.
53 // There is a compiler bug in this version that causes the above constexpr to
54 // fail. This version is no longer in our support window, but we use it in
55 // some of our aarch64 docker images.
56 ArenaBlock* SentryArenaBlock() {
57 static const ArenaBlock kSentryArenaBlock;
58 // const_cast<> is okay as kSentryArenaBlock will never be mutated.
59 return const_cast<ArenaBlock*>(&kSentryArenaBlock);
60 }
61 #endif
62
AllocationSize(size_t last_size,size_t start_size,size_t max_size)63 inline size_t AllocationSize(size_t last_size, size_t start_size,
64 size_t max_size) {
65 if (last_size == 0) return start_size;
66 // Double the current block size, up to a limit.
67 return std::min(2 * last_size, max_size);
68 }
69
AllocateMemory(const AllocationPolicy & policy,size_t size)70 SizedPtr AllocateMemory(const AllocationPolicy& policy, size_t size) {
71 if (policy.block_alloc == nullptr) {
72 return AllocateAtLeast(size);
73 }
74 return {policy.block_alloc(size), size};
75 }
76
AllocateBlock(const AllocationPolicy * policy_ptr,size_t last_size,size_t min_bytes)77 SizedPtr AllocateBlock(const AllocationPolicy* policy_ptr, size_t last_size,
78 size_t min_bytes) {
79 AllocationPolicy policy; // default policy
80 if (policy_ptr) policy = *policy_ptr;
81 size_t size =
82 AllocationSize(last_size, policy.start_block_size, policy.max_block_size);
83 // Verify that min_bytes + kBlockHeaderSize won't overflow.
84 ABSL_CHECK_LE(min_bytes, std::numeric_limits<size_t>::max() -
85 SerialArena::kBlockHeaderSize);
86 size = std::max(size, SerialArena::kBlockHeaderSize + min_bytes);
87
88 return AllocateMemory(policy, size);
89 }
90
AllocateCleanupChunk(const AllocationPolicy * policy_ptr,size_t last_size)91 SizedPtr AllocateCleanupChunk(const AllocationPolicy* policy_ptr,
92 size_t last_size) {
93 constexpr size_t kStartSize = 64;
94 constexpr size_t kMaxSize = 4 << 10;
95 static_assert(kStartSize % sizeof(cleanup::CleanupNode) == 0, "");
96
97 const size_t size = AllocationSize(last_size, kStartSize, kMaxSize);
98 if (policy_ptr == nullptr) return AllocateAtLeast(size);
99 return AllocateMemory(*policy_ptr, size);
100 }
101
102 class GetDeallocator {
103 public:
GetDeallocator(const AllocationPolicy * policy)104 explicit GetDeallocator(const AllocationPolicy* policy)
105 : dealloc_(policy ? policy->block_dealloc : nullptr) {}
106
operator ()(SizedPtr mem) const107 void operator()(SizedPtr mem) const {
108 if (dealloc_) {
109 dealloc_(mem.p, mem.n);
110 } else {
111 internal::SizedDelete(mem.p, mem.n);
112 }
113 }
114
115 private:
116 void (*dealloc_)(void*, size_t);
117 };
118
119 } // namespace
120
121 namespace cleanup {
122 struct ChunkList::Chunk {
Firstgoogle::protobuf::internal::cleanup::ChunkList::Chunk123 CleanupNode* First() { return reinterpret_cast<CleanupNode*>(this + 1); }
Lastgoogle::protobuf::internal::cleanup::ChunkList::Chunk124 CleanupNode* Last() { return First() + Capacity() - 1; }
Capacitygoogle::protobuf::internal::cleanup::ChunkList::Chunk125 static size_t Capacity(size_t size) {
126 return (size - sizeof(Chunk)) / sizeof(CleanupNode);
127 }
Capacitygoogle::protobuf::internal::cleanup::ChunkList::Chunk128 size_t Capacity() const { return Capacity(size); }
129
130 Chunk* next;
131 size_t size;
132 // Cleanup nodes follow.
133 };
134
AddFallback(void * elem,void (* destructor)(void *),SerialArena & arena)135 void ChunkList::AddFallback(void* elem, void (*destructor)(void*),
136 SerialArena& arena) {
137 ABSL_DCHECK_EQ(next_, limit_);
138 SizedPtr mem = AllocateCleanupChunk(arena.parent_.AllocPolicy(),
139 head_ == nullptr ? 0 : head_->size);
140 arena.AddSpaceAllocated(mem.n);
141 head_ = new (mem.p) Chunk{head_, mem.n};
142 next_ = head_->First();
143 prefetch_ptr_ = reinterpret_cast<char*>(next_);
144 limit_ = next_ + Chunk::Capacity(mem.n);
145 AddFromExisting(elem, destructor);
146 }
147
Cleanup(const SerialArena & arena)148 void ChunkList::Cleanup(const SerialArena& arena) {
149 Chunk* c = head_;
150 if (c == nullptr) return;
151 GetDeallocator deallocator(arena.parent_.AllocPolicy());
152
153 // Iterate backwards in order to destroy in the right order.
154 CleanupNode* it = next_ - 1;
155 while (true) {
156 CleanupNode* first = c->First();
157 // A prefetch distance of 8 here was chosen arbitrarily.
158 constexpr int kPrefetchDistance = 8;
159 CleanupNode* prefetch = it;
160 // Prefetch the first kPrefetchDistance nodes.
161 for (int i = 0; prefetch >= first && i < kPrefetchDistance;
162 --prefetch, ++i) {
163 prefetch->Prefetch();
164 }
165 // For the middle nodes, run destructor and prefetch the node
166 // kPrefetchDistance after the current one.
167 for (; prefetch >= first; --it, --prefetch) {
168 it->Destroy();
169 prefetch->Prefetch();
170 }
171 // Note: we could consider prefetching `next` chunk earlier.
172 absl::PrefetchToLocalCacheNta(c->next);
173 // Destroy the rest without prefetching.
174 for (; it >= first; --it) {
175 it->Destroy();
176 }
177 Chunk* next = c->next;
178 deallocator({c, c->size});
179 if (next == nullptr) return;
180 c = next;
181 it = c->Last();
182 };
183 }
184
PeekForTesting()185 std::vector<void*> ChunkList::PeekForTesting() {
186 std::vector<void*> ret;
187 Chunk* c = head_;
188 if (c == nullptr) return ret;
189 // Iterate backwards to match destruction order.
190 CleanupNode* it = next_ - 1;
191 while (true) {
192 CleanupNode* first = c->First();
193 for (; it >= first; --it) {
194 ret.push_back(it->elem);
195 }
196 c = c->next;
197 if (c == nullptr) return ret;
198 it = c->Last();
199 };
200 }
201 } // namespace cleanup
202
203 // It is guaranteed that this is constructed in `b`. IOW, this is not the first
204 // arena and `b` cannot be sentry.
SerialArena(ArenaBlock * b,ThreadSafeArena & parent)205 SerialArena::SerialArena(ArenaBlock* b, ThreadSafeArena& parent)
206 : ptr_{b->Pointer(kBlockHeaderSize + ThreadSafeArena::kSerialArenaSize)},
207 limit_{b->Limit()},
208 prefetch_ptr_(
209 b->Pointer(kBlockHeaderSize + ThreadSafeArena::kSerialArenaSize)),
210 head_{b},
211 space_allocated_{b->size},
212 parent_{parent} {
213 ABSL_DCHECK(!b->IsSentry());
214 }
215
216 // It is guaranteed that this is the first SerialArena. Use sentry block.
SerialArena(ThreadSafeArena & parent)217 SerialArena::SerialArena(ThreadSafeArena& parent)
218 : head_{SentryArenaBlock()}, parent_{parent} {}
219
220 // It is guaranteed that this is the first SerialArena but `b` may be user
221 // provided or newly allocated to store AllocationPolicy.
SerialArena(FirstSerialArena,ArenaBlock * b,ThreadSafeArena & parent)222 SerialArena::SerialArena(FirstSerialArena, ArenaBlock* b,
223 ThreadSafeArena& parent)
224 : head_{b}, space_allocated_{b->size}, parent_{parent} {
225 if (b->IsSentry()) return;
226 set_range(b->Pointer(kBlockHeaderSize), b->Limit());
227 }
228
PeekCleanupListForTesting()229 std::vector<void*> SerialArena::PeekCleanupListForTesting() {
230 return cleanup_list_.PeekForTesting();
231 }
232
PeekCleanupListForTesting()233 std::vector<void*> ThreadSafeArena::PeekCleanupListForTesting() {
234 return GetSerialArena()->PeekCleanupListForTesting();
235 }
236
Init(ArenaBlock * b,size_t offset)237 void SerialArena::Init(ArenaBlock* b, size_t offset) {
238 set_range(b->Pointer(offset), b->Limit());
239 head_.store(b, std::memory_order_relaxed);
240 space_used_.store(0, std::memory_order_relaxed);
241 space_allocated_.store(b->size, std::memory_order_relaxed);
242 cached_block_length_ = 0;
243 cached_blocks_ = nullptr;
244 string_block_.store(nullptr, std::memory_order_relaxed);
245 string_block_unused_.store(0, std::memory_order_relaxed);
246 }
247
New(SizedPtr mem,ThreadSafeArena & parent)248 SerialArena* SerialArena::New(SizedPtr mem, ThreadSafeArena& parent) {
249 ABSL_DCHECK_LE(kBlockHeaderSize + ThreadSafeArena::kSerialArenaSize, mem.n);
250 ThreadSafeArenaStats::RecordAllocateStats(parent.arena_stats_.MutableStats(),
251 /*used=*/0, /*allocated=*/mem.n,
252 /*wasted=*/0);
253 auto b = new (mem.p) ArenaBlock{nullptr, mem.n};
254 return new (b->Pointer(kBlockHeaderSize)) SerialArena(b, parent);
255 }
256
257 template <typename Deallocator>
Free(Deallocator deallocator)258 SizedPtr SerialArena::Free(Deallocator deallocator) {
259 FreeStringBlocks();
260
261 ArenaBlock* b = head();
262 SizedPtr mem = {b, b->size};
263 while (b->next) {
264 b = b->next; // We must first advance before deleting this block
265 deallocator(mem);
266 mem = {b, b->size};
267 }
268 return mem;
269 }
270
271 PROTOBUF_NOINLINE
AllocateAlignedFallback(size_t n)272 void* SerialArena::AllocateAlignedFallback(size_t n) {
273 AllocateNewBlock(n);
274 void* ret = nullptr;
275 bool res = MaybeAllocateAligned(n, &ret);
276 ABSL_DCHECK(res);
277 return ret;
278 }
279
280 PROTOBUF_NOINLINE
AllocateFromStringBlockFallback()281 void* SerialArena::AllocateFromStringBlockFallback() {
282 ABSL_DCHECK_EQ(string_block_unused_.load(std::memory_order_relaxed), 0U);
283 StringBlock* sb = string_block_.load(std::memory_order_relaxed);
284 if (sb) {
285 AddSpaceUsed(sb->effective_size());
286 }
287
288 void* ptr;
289 StringBlock* new_sb;
290 size_t size = StringBlock::NextSize(sb);
291 if (MaybeAllocateAligned(size, &ptr)) {
292 // Correct space_used_ to avoid double counting
293 AddSpaceUsed(-size);
294 new_sb = StringBlock::Emplace(ptr, size, sb);
295 } else {
296 new_sb = StringBlock::New(sb);
297 AddSpaceAllocated(new_sb->allocated_size());
298 }
299 string_block_.store(new_sb, std::memory_order_release);
300 size_t unused = new_sb->effective_size() - sizeof(std::string);
301 string_block_unused_.store(unused, std::memory_order_relaxed);
302 return new_sb->AtOffset(unused);
303 }
304
305 PROTOBUF_NOINLINE
AllocateAlignedWithCleanupFallback(size_t n,size_t align,void (* destructor)(void *))306 void* SerialArena::AllocateAlignedWithCleanupFallback(
307 size_t n, size_t align, void (*destructor)(void*)) {
308 size_t required = AlignUpTo(n, align);
309 AllocateNewBlock(required);
310 return AllocateAlignedWithCleanup(n, align, destructor);
311 }
312
AllocateNewBlock(size_t n)313 void SerialArena::AllocateNewBlock(size_t n) {
314 size_t used = 0;
315 size_t wasted = 0;
316 ArenaBlock* old_head = head();
317 if (!old_head->IsSentry()) {
318 // Record how much used in this block.
319 used = static_cast<size_t>(ptr() - old_head->Pointer(kBlockHeaderSize));
320 wasted = old_head->size - used - kBlockHeaderSize;
321 AddSpaceUsed(used);
322 }
323
324 // TODO: Evaluate if pushing unused space into the cached blocks is a
325 // win. In preliminary testing showed increased memory savings as expected,
326 // but with a CPU regression. The regression might have been an artifact of
327 // the microbenchmark.
328
329 auto mem = AllocateBlock(parent_.AllocPolicy(), old_head->size, n);
330 AddSpaceAllocated(mem.n);
331 ThreadSafeArenaStats::RecordAllocateStats(parent_.arena_stats_.MutableStats(),
332 /*used=*/used,
333 /*allocated=*/mem.n, wasted);
334 auto* new_head = new (mem.p) ArenaBlock{old_head, mem.n};
335 set_range(new_head->Pointer(kBlockHeaderSize), new_head->Limit());
336 // Previous writes must take effect before writing new head.
337 head_.store(new_head, std::memory_order_release);
338
339 PROTOBUF_POISON_MEMORY_REGION(ptr(), limit_ - ptr());
340 }
341
SpaceUsed() const342 uint64_t SerialArena::SpaceUsed() const {
343 // Note: the calculation below technically causes a race with
344 // AllocateNewBlock when called from another thread (which happens in
345 // ThreadSafeArena::SpaceUsed). However, worst-case space_used_ will have
346 // stale data and the calculation will incorrectly assume 100%
347 // usage of the *current* block.
348 // TODO Consider eliminating this race in exchange for a possible
349 // performance hit on ARM (see cl/455186837).
350
351 uint64_t space_used = 0;
352 StringBlock* sb = string_block_.load(std::memory_order_acquire);
353 if (sb) {
354 size_t unused = string_block_unused_.load(std::memory_order_relaxed);
355 space_used += sb->effective_size() - unused;
356 }
357 const ArenaBlock* h = head_.load(std::memory_order_acquire);
358 if (h->IsSentry()) return space_used;
359
360 const uint64_t current_block_size = h->size;
361 space_used += std::min(
362 static_cast<uint64_t>(
363 ptr() - const_cast<ArenaBlock*>(h)->Pointer(kBlockHeaderSize)),
364 current_block_size);
365 return space_used + space_used_.load(std::memory_order_relaxed);
366 }
367
FreeStringBlocks(StringBlock * string_block,size_t unused_bytes)368 size_t SerialArena::FreeStringBlocks(StringBlock* string_block,
369 size_t unused_bytes) {
370 ABSL_DCHECK(string_block != nullptr);
371 StringBlock* next = string_block->next();
372 absl::PrefetchToLocalCacheNta(next);
373 std::string* end = string_block->end();
374 for (std::string* s = string_block->AtOffset(unused_bytes); s != end; ++s) {
375 s->~basic_string();
376 }
377 size_t deallocated = StringBlock::Delete(string_block);
378
379 while ((string_block = next) != nullptr) {
380 next = string_block->next();
381 absl::PrefetchToLocalCacheNta(next);
382 for (std::string& s : *string_block) {
383 s.~basic_string();
384 }
385 deallocated += StringBlock::Delete(string_block);
386 }
387 return deallocated;
388 }
389
390 // Stores arrays of void* and SerialArena* instead of linked list of
391 // SerialArena* to speed up traversing all SerialArena. The cost of walk is non
392 // trivial when there are many nodes. Separately storing "ids" minimizes cache
393 // footprints and more efficient when looking for matching arena.
394 //
395 // Uses absl::container_internal::Layout to emulate the following:
396 //
397 // struct SerialArenaChunk {
398 // struct SerialArenaChunkHeader {
399 // SerialArenaChunk* next_chunk;
400 // uint32_t capacity;
401 // std::atomic<uint32_t> size;
402 // } header;
403 // std::atomic<void*> ids[];
404 // std::atomic<SerialArena*> arenas[];
405 // };
406 //
407 // where the size of "ids" and "arenas" is determined at runtime; hence the use
408 // of Layout.
409 struct SerialArenaChunkHeader {
SerialArenaChunkHeadergoogle::protobuf::internal::SerialArenaChunkHeader410 constexpr SerialArenaChunkHeader(uint32_t capacity, uint32_t size)
411 : next_chunk(nullptr), capacity(capacity), size(size) {}
412
413 ThreadSafeArena::SerialArenaChunk* next_chunk;
414 uint32_t capacity;
415 std::atomic<uint32_t> size;
416 };
417
418 class ThreadSafeArena::SerialArenaChunk {
419 public:
SerialArenaChunk(uint32_t capacity,void * me,SerialArena * serial)420 SerialArenaChunk(uint32_t capacity, void* me, SerialArena* serial) {
421 // We use `layout`/`ids`/`arenas` local variables to avoid recomputing
422 // offsets if we were to call id(i)/arena(i) repeatedly.
423 const layout_type layout = Layout(capacity);
424 new (layout.Pointer<kHeader>(ptr())) SerialArenaChunkHeader{capacity, 1};
425
426 std::atomic<void*>* ids = layout.Pointer<kIds>(ptr());
427 new (&ids[0]) std::atomic<void*>{me};
428 for (uint32_t i = 1; i < capacity; ++i) {
429 new (&ids[i]) std::atomic<void*>{nullptr};
430 }
431
432 std::atomic<SerialArena*>* arenas = layout.Pointer<kArenas>(ptr());
433 new (&arenas[0]) std::atomic<SerialArena*>{serial};
434 for (uint32_t i = 1; i < capacity; ++i) {
435 new (&arenas[i]) std::atomic<SerialArena*>{nullptr};
436 }
437 }
438
IsSentry() const439 bool IsSentry() const { return capacity() == 0; }
440
441 // next_chunk
next_chunk() const442 const SerialArenaChunk* next_chunk() const { return header().next_chunk; }
next_chunk()443 SerialArenaChunk* next_chunk() { return header().next_chunk; }
set_next(SerialArenaChunk * next_chunk)444 void set_next(SerialArenaChunk* next_chunk) {
445 header().next_chunk = next_chunk;
446 }
447
448 // capacity
capacity() const449 uint32_t capacity() const { return header().capacity; }
set_capacity(uint32_t capacity)450 void set_capacity(uint32_t capacity) { header().capacity = capacity; }
451
452 // ids: returns up to size().
ids() const453 absl::Span<const std::atomic<void*>> ids() const {
454 return Layout().Slice<kIds>(ptr()).first(safe_size());
455 }
ids()456 absl::Span<std::atomic<void*>> ids() {
457 return Layout().Slice<kIds>(ptr()).first(safe_size());
458 }
id(uint32_t i)459 std::atomic<void*>& id(uint32_t i) {
460 ABSL_DCHECK_LT(i, capacity());
461 return Layout().Pointer<kIds>(ptr())[i];
462 }
463
464 // arenas: returns up to size().
arenas() const465 absl::Span<const std::atomic<SerialArena*>> arenas() const {
466 return Layout().Slice<kArenas>(ptr()).first(safe_size());
467 }
arenas()468 absl::Span<std::atomic<SerialArena*>> arenas() {
469 return Layout().Slice<kArenas>(ptr()).first(safe_size());
470 }
arena(uint32_t i) const471 const std::atomic<SerialArena*>& arena(uint32_t i) const {
472 ABSL_DCHECK_LT(i, capacity());
473 return Layout().Pointer<kArenas>(ptr())[i];
474 }
arena(uint32_t i)475 std::atomic<SerialArena*>& arena(uint32_t i) {
476 ABSL_DCHECK_LT(i, capacity());
477 return Layout().Pointer<kArenas>(ptr())[i];
478 }
479
480 // Tries to insert {id, serial} to head chunk. Returns false if the head is
481 // already full.
482 //
483 // Note that the updating "size", "id", "arena" is individually atomic but
484 // those are not protected by a mutex. This is acceptable because concurrent
485 // lookups from SpaceUsed or SpaceAllocated accept inaccuracy due to race. On
486 // other paths, either race is not possible (GetSerialArenaFallback) or must
487 // be prevented by users (CleanupList, Free).
insert(void * me,SerialArena * serial)488 bool insert(void* me, SerialArena* serial) {
489 uint32_t idx = size().fetch_add(1, std::memory_order_relaxed);
490 // Bail out if this chunk is full.
491 if (idx >= capacity()) {
492 // Write old value back to avoid potential overflow.
493 size().store(capacity(), std::memory_order_relaxed);
494 return false;
495 }
496
497 id(idx).store(me, std::memory_order_relaxed);
498 arena(idx).store(serial, std::memory_order_release);
499 return true;
500 }
501
AllocSize(size_t n)502 constexpr static size_t AllocSize(size_t n) { return Layout(n).AllocSize(); }
503
504 private:
505 constexpr static int kHeader = 0;
506 constexpr static int kIds = 1;
507 constexpr static int kArenas = 2;
508
509 using layout_type = absl::container_internal::Layout<
510 SerialArenaChunkHeader, std::atomic<void*>, std::atomic<SerialArena*>>;
511
ptr() const512 const char* ptr() const { return reinterpret_cast<const char*>(this); }
ptr()513 char* ptr() { return reinterpret_cast<char*>(this); }
514
header()515 SerialArenaChunkHeader& header() {
516 return *layout_type::Partial().Pointer<kHeader>(ptr());
517 }
header() const518 const SerialArenaChunkHeader& header() const {
519 return *layout_type::Partial().Pointer<kHeader>(ptr());
520 }
521
size()522 std::atomic<uint32_t>& size() { return header().size; }
size() const523 const std::atomic<uint32_t>& size() const { return header().size; }
524
525 // Returns the size capped by the capacity as fetch_add may result in a size
526 // greater than capacity.
safe_size() const527 uint32_t safe_size() const {
528 return std::min(capacity(), size().load(std::memory_order_relaxed));
529 }
530
Layout(size_t n)531 constexpr static layout_type Layout(size_t n) {
532 return layout_type(/*header*/ 1, /*ids*/ n, /*arenas*/ n);
533 }
Layout() const534 layout_type Layout() const { return Layout(capacity()); }
535 };
536
537 constexpr SerialArenaChunkHeader kSentryArenaChunk = {0, 0};
538
SentrySerialArenaChunk()539 ThreadSafeArena::SerialArenaChunk* ThreadSafeArena::SentrySerialArenaChunk() {
540 // const_cast is okay because the sentry chunk is never mutated. Also,
541 // reinterpret_cast is acceptable here as it should be identical to
542 // SerialArenaChunk with zero payload. This is a necessary trick to
543 // constexpr initialize kSentryArenaChunk.
544 return reinterpret_cast<SerialArenaChunk*>(
545 const_cast<SerialArenaChunkHeader*>(&kSentryArenaChunk));
546 }
547
548
549 alignas(kCacheAlignment) ABSL_CONST_INIT
550 std::atomic<ThreadSafeArena::LifecycleId> ThreadSafeArena::lifecycle_id_{0};
551 #if defined(PROTOBUF_NO_THREADLOCAL)
thread_cache()552 ThreadSafeArena::ThreadCache& ThreadSafeArena::thread_cache() {
553 static internal::ThreadLocalStorage<ThreadCache>* thread_cache_ =
554 new internal::ThreadLocalStorage<ThreadCache>();
555 return *thread_cache_->Get();
556 }
557 #elif defined(PROTOBUF_USE_DLLS) && defined(_WIN32)
thread_cache()558 ThreadSafeArena::ThreadCache& ThreadSafeArena::thread_cache() {
559 static PROTOBUF_THREAD_LOCAL ThreadCache thread_cache;
560 return thread_cache;
561 }
562 #else
563 PROTOBUF_CONSTINIT PROTOBUF_THREAD_LOCAL
564 ThreadSafeArena::ThreadCache ThreadSafeArena::thread_cache_;
565 #endif
566
ThreadSafeArena()567 ThreadSafeArena::ThreadSafeArena() : first_arena_(*this) { Init(); }
568
ThreadSafeArena(char * mem,size_t size)569 ThreadSafeArena::ThreadSafeArena(char* mem, size_t size)
570 : first_arena_(FirstSerialArena{}, FirstBlock(mem, size), *this) {
571 Init();
572 }
573
ThreadSafeArena(void * mem,size_t size,const AllocationPolicy & policy)574 ThreadSafeArena::ThreadSafeArena(void* mem, size_t size,
575 const AllocationPolicy& policy)
576 : first_arena_(FirstSerialArena{}, FirstBlock(mem, size, policy), *this) {
577 InitializeWithPolicy(policy);
578 }
579
FirstBlock(void * buf,size_t size)580 ArenaBlock* ThreadSafeArena::FirstBlock(void* buf, size_t size) {
581 ABSL_DCHECK_EQ(reinterpret_cast<uintptr_t>(buf) & 7, 0u);
582 if (buf == nullptr || size <= kBlockHeaderSize) {
583 return SentryArenaBlock();
584 }
585 // Record user-owned block.
586 alloc_policy_.set_is_user_owned_initial_block(true);
587 return new (buf) ArenaBlock{nullptr, size};
588 }
589
FirstBlock(void * buf,size_t size,const AllocationPolicy & policy)590 ArenaBlock* ThreadSafeArena::FirstBlock(void* buf, size_t size,
591 const AllocationPolicy& policy) {
592 if (policy.IsDefault()) return FirstBlock(buf, size);
593
594 ABSL_DCHECK_EQ(reinterpret_cast<uintptr_t>(buf) & 7, 0u);
595
596 SizedPtr mem;
597 if (buf == nullptr || size < kBlockHeaderSize + kAllocPolicySize) {
598 mem = AllocateBlock(&policy, 0, kAllocPolicySize);
599 } else {
600 mem = {buf, size};
601 // Record user-owned block.
602 alloc_policy_.set_is_user_owned_initial_block(true);
603 }
604
605 return new (mem.p) ArenaBlock{nullptr, mem.n};
606 }
607
InitializeWithPolicy(const AllocationPolicy & policy)608 void ThreadSafeArena::InitializeWithPolicy(const AllocationPolicy& policy) {
609 Init();
610
611 if (policy.IsDefault()) return;
612
613 #ifndef NDEBUG
614 const uint64_t old_alloc_policy = alloc_policy_.get_raw();
615 // If there was a policy (e.g., in Reset()), make sure flags were preserved.
616 #define ABSL_DCHECK_POLICY_FLAGS_() \
617 if (old_alloc_policy > 3) \
618 ABSL_CHECK_EQ(old_alloc_policy & 3, alloc_policy_.get_raw() & 3)
619 #else
620 #define ABSL_DCHECK_POLICY_FLAGS_()
621 #endif // NDEBUG
622
623 // We ensured enough space so this cannot fail.
624 void* p;
625 if (!first_arena_.MaybeAllocateAligned(kAllocPolicySize, &p)) {
626 ABSL_LOG(FATAL) << "MaybeAllocateAligned cannot fail here.";
627 return;
628 }
629 new (p) AllocationPolicy{policy};
630 // Low bits store flags, so they mustn't be overwritten.
631 ABSL_DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(p) & 3);
632 alloc_policy_.set_policy(reinterpret_cast<AllocationPolicy*>(p));
633 ABSL_DCHECK_POLICY_FLAGS_();
634
635 #undef ABSL_DCHECK_POLICY_FLAGS_
636 }
637
GetNextLifeCycleId()638 uint64_t ThreadSafeArena::GetNextLifeCycleId() {
639 ThreadCache& tc = thread_cache();
640 uint64_t id = tc.next_lifecycle_id;
641 constexpr uint64_t kInc = ThreadCache::kPerThreadIds;
642 if (PROTOBUF_PREDICT_FALSE((id & (kInc - 1)) == 0)) {
643 // On platforms that don't support uint64_t atomics we can certainly not
644 // afford to increment by large intervals and expect uniqueness due to
645 // wrapping, hence we only add by 1.
646 id = lifecycle_id_.fetch_add(1, std::memory_order_relaxed) * kInc;
647 }
648 tc.next_lifecycle_id = id + 1;
649 return id;
650 }
651
652 // We assume that #threads / arena is bimodal; i.e. majority small ones are
653 // single threaded but some big ones are highly concurrent. To balance between
654 // memory overhead and minimum pointer chasing, we start with few entries and
655 // exponentially (4x) grow with a limit (255 entries). Note that parameters are
656 // picked for x64 architectures as hint and the actual size is calculated by
657 // Layout.
NewSerialArenaChunk(uint32_t prev_capacity,void * id,SerialArena * serial)658 ThreadSafeArena::SerialArenaChunk* ThreadSafeArena::NewSerialArenaChunk(
659 uint32_t prev_capacity, void* id, SerialArena* serial) {
660 constexpr size_t kMaxBytes = 4096; // Can hold up to 255 entries.
661 constexpr size_t kGrowthFactor = 4;
662 constexpr size_t kHeaderSize = SerialArenaChunk::AllocSize(0);
663 constexpr size_t kEntrySize = SerialArenaChunk::AllocSize(1) - kHeaderSize;
664
665 // On x64 arch: {4, 16, 64, 256, 256, ...} * 16.
666 size_t prev_bytes = SerialArenaChunk::AllocSize(prev_capacity);
667 size_t next_bytes = std::min(kMaxBytes, prev_bytes * kGrowthFactor);
668 uint32_t next_capacity =
669 static_cast<uint32_t>(next_bytes - kHeaderSize) / kEntrySize;
670 // Growth based on bytes needs to be adjusted by AllocSize.
671 next_bytes = SerialArenaChunk::AllocSize(next_capacity);
672
673 // If we allocate bigger memory than requested, we should expand
674 // size to use that extra space, and add extra entries permitted
675 // by the extra space.
676 SizedPtr mem = AllocateAtLeast(next_bytes);
677 next_capacity = static_cast<uint32_t>(mem.n - kHeaderSize) / kEntrySize;
678 ABSL_DCHECK_LE(SerialArenaChunk::AllocSize(next_capacity), mem.n);
679 return new (mem.p) SerialArenaChunk{next_capacity, id, serial};
680 }
681
682 // Tries to reserve an entry by atomic fetch_add. If the head chunk is already
683 // full (size >= capacity), acquires the mutex and adds a new head.
AddSerialArena(void * id,SerialArena * serial)684 void ThreadSafeArena::AddSerialArena(void* id, SerialArena* serial) {
685 SerialArenaChunk* head = head_.load(std::memory_order_acquire);
686 // Fast path without acquiring mutex.
687 if (!head->IsSentry() && head->insert(id, serial)) {
688 return;
689 }
690
691 // Slow path with acquiring mutex.
692 absl::MutexLock lock(&mutex_);
693
694 // Refetch and if someone else installed a new head, try allocating on that!
695 SerialArenaChunk* new_head = head_.load(std::memory_order_acquire);
696 if (new_head != head) {
697 if (new_head->insert(id, serial)) return;
698 // Update head to link to the latest one.
699 head = new_head;
700 }
701
702 new_head = NewSerialArenaChunk(head->capacity(), id, serial);
703 new_head->set_next(head);
704
705 // Use "std::memory_order_release" to make sure prior stores are visible after
706 // this one.
707 head_.store(new_head, std::memory_order_release);
708 }
709
UnpoisonAllArenaBlocks() const710 void ThreadSafeArena::UnpoisonAllArenaBlocks() const {
711 VisitSerialArena([](const SerialArena* serial) {
712 for (const auto* b = serial->head(); b != nullptr && !b->IsSentry();
713 b = b->next) {
714 PROTOBUF_UNPOISON_MEMORY_REGION(b, b->size);
715 }
716 });
717 }
718
Init()719 void ThreadSafeArena::Init() {
720 tag_and_id_ = GetNextLifeCycleId();
721 arena_stats_ = Sample();
722 head_.store(SentrySerialArenaChunk(), std::memory_order_relaxed);
723 first_owner_ = &thread_cache();
724
725 // Record allocation for the first block that was either user-provided or
726 // newly allocated.
727 ThreadSafeArenaStats::RecordAllocateStats(
728 arena_stats_.MutableStats(),
729 /*used=*/0,
730 /*allocated=*/first_arena_.SpaceAllocated(),
731 /*wasted=*/0);
732
733 CacheSerialArena(&first_arena_);
734 }
735
~ThreadSafeArena()736 ThreadSafeArena::~ThreadSafeArena() {
737 // Have to do this in a first pass, because some of the destructors might
738 // refer to memory in other blocks.
739 CleanupList();
740
741 auto mem = Free();
742 if (alloc_policy_.is_user_owned_initial_block()) {
743 // Unpoison the initial block, now that it's going back to the user.
744 PROTOBUF_UNPOISON_MEMORY_REGION(mem.p, mem.n);
745 } else if (mem.n > 0) {
746 GetDeallocator(alloc_policy_.get())(mem);
747 }
748 }
749
Free()750 SizedPtr ThreadSafeArena::Free() {
751 auto deallocator = GetDeallocator(alloc_policy_.get());
752
753 WalkSerialArenaChunk([&](SerialArenaChunk* chunk) {
754 absl::Span<std::atomic<SerialArena*>> span = chunk->arenas();
755 // Walks arenas backward to handle the first serial arena the last. Freeing
756 // in reverse-order to the order in which objects were created may not be
757 // necessary to Free and we should revisit this. (b/247560530)
758 for (auto it = span.rbegin(); it != span.rend(); ++it) {
759 SerialArena* serial = it->load(std::memory_order_relaxed);
760 ABSL_DCHECK_NE(serial, nullptr);
761 // Always frees the first block of "serial" as it cannot be user-provided.
762 SizedPtr mem = serial->Free(deallocator);
763 ABSL_DCHECK_NE(mem.p, nullptr);
764 deallocator(mem);
765 }
766
767 // Delete the chunk as we're done with it.
768 internal::SizedDelete(chunk,
769 SerialArenaChunk::AllocSize(chunk->capacity()));
770 });
771
772 // The first block of the first arena is special and let the caller handle it.
773 return first_arena_.Free(deallocator);
774 }
775
Reset()776 uint64_t ThreadSafeArena::Reset() {
777 const size_t space_allocated = SpaceAllocated();
778
779 // Have to do this in a first pass, because some of the destructors might
780 // refer to memory in other blocks.
781 CleanupList();
782 // Reset the first arena's cleanup list.
783 first_arena_.cleanup_list_ = cleanup::ChunkList();
784
785 // Discard all blocks except the first one. Whether it is user-provided or
786 // allocated, always reuse the first block for the first arena.
787 auto mem = Free();
788
789 // Reset the first arena with the first block. This avoids redundant
790 // free / allocation and re-allocating for AllocationPolicy. Adjust offset if
791 // we need to preserve alloc_policy_.
792 if (alloc_policy_.is_user_owned_initial_block() ||
793 alloc_policy_.get() != nullptr) {
794 size_t offset = alloc_policy_.get() == nullptr
795 ? kBlockHeaderSize
796 : kBlockHeaderSize + kAllocPolicySize;
797 first_arena_.Init(new (mem.p) ArenaBlock{nullptr, mem.n}, offset);
798 } else {
799 first_arena_.Init(SentryArenaBlock(), 0);
800 }
801
802 // Since the first block and potential alloc_policy on the first block is
803 // preserved, this can be initialized by Init().
804 Init();
805
806 return space_allocated;
807 }
808
AllocateAlignedWithCleanup(size_t n,size_t align,void (* destructor)(void *))809 void* ThreadSafeArena::AllocateAlignedWithCleanup(size_t n, size_t align,
810 void (*destructor)(void*)) {
811 SerialArena* arena;
812 if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(&arena))) {
813 return arena->AllocateAlignedWithCleanup(n, align, destructor);
814 } else {
815 return AllocateAlignedWithCleanupFallback(n, align, destructor);
816 }
817 }
818
AddCleanup(void * elem,void (* cleanup)(void *))819 void ThreadSafeArena::AddCleanup(void* elem, void (*cleanup)(void*)) {
820 GetSerialArena()->AddCleanup(elem, cleanup);
821 }
822
GetSerialArena()823 SerialArena* ThreadSafeArena::GetSerialArena() {
824 SerialArena* arena;
825 if (PROTOBUF_PREDICT_FALSE(!GetSerialArenaFast(&arena))) {
826 arena = GetSerialArenaFallback(kMaxCleanupNodeSize);
827 }
828 return arena;
829 }
830
831 PROTOBUF_NOINLINE
AllocateAlignedWithCleanupFallback(size_t n,size_t align,void (* destructor)(void *))832 void* ThreadSafeArena::AllocateAlignedWithCleanupFallback(
833 size_t n, size_t align, void (*destructor)(void*)) {
834 return GetSerialArenaFallback(n + kMaxCleanupNodeSize)
835 ->AllocateAlignedWithCleanup(n, align, destructor);
836 }
837
838 PROTOBUF_NOINLINE
AllocateFromStringBlock()839 void* ThreadSafeArena::AllocateFromStringBlock() {
840 return GetSerialArena()->AllocateFromStringBlock();
841 }
842
843 template <typename Callback>
WalkConstSerialArenaChunk(Callback fn) const844 void ThreadSafeArena::WalkConstSerialArenaChunk(Callback fn) const {
845 const SerialArenaChunk* chunk = head_.load(std::memory_order_acquire);
846
847 for (; !chunk->IsSentry(); chunk = chunk->next_chunk()) {
848 // Prefetch the next chunk.
849 absl::PrefetchToLocalCache(chunk->next_chunk());
850 fn(chunk);
851 }
852 }
853
854 template <typename Callback>
WalkSerialArenaChunk(Callback fn)855 void ThreadSafeArena::WalkSerialArenaChunk(Callback fn) {
856 // By omitting an Acquire barrier we help the sanitizer that any user code
857 // that doesn't properly synchronize Reset() or the destructor will throw a
858 // TSAN warning.
859 SerialArenaChunk* chunk = head_.load(std::memory_order_relaxed);
860
861 while (!chunk->IsSentry()) {
862 // Cache next chunk in case this chunk is destroyed.
863 SerialArenaChunk* next_chunk = chunk->next_chunk();
864 // Prefetch the next chunk.
865 absl::PrefetchToLocalCache(next_chunk);
866 fn(chunk);
867 chunk = next_chunk;
868 }
869 }
870
871 template <typename Callback>
VisitSerialArena(Callback fn) const872 void ThreadSafeArena::VisitSerialArena(Callback fn) const {
873 // In most cases, arenas are single-threaded and "first_arena_" should be
874 // sufficient.
875 fn(&first_arena_);
876
877 WalkConstSerialArenaChunk([&fn](const SerialArenaChunk* chunk) {
878 for (const auto& each : chunk->arenas()) {
879 const SerialArena* serial = each.load(std::memory_order_acquire);
880 // It is possible that newly added SerialArena is not updated although
881 // size was. This is acceptable for SpaceAllocated and SpaceUsed.
882 if (serial == nullptr) continue;
883 fn(serial);
884 }
885 });
886 }
887
SpaceAllocated() const888 uint64_t ThreadSafeArena::SpaceAllocated() const {
889 uint64_t space_allocated = 0;
890 VisitSerialArena([&space_allocated](const SerialArena* serial) {
891 space_allocated += serial->SpaceAllocated();
892 });
893 return space_allocated;
894 }
895
SpaceUsed() const896 uint64_t ThreadSafeArena::SpaceUsed() const {
897 // `first_arena_` doesn't have kSerialArenaSize overhead, so adjust it here.
898 uint64_t space_used = kSerialArenaSize;
899 VisitSerialArena([&space_used](const SerialArena* serial) {
900 // SerialArena on chunks directly allocated from the block and needs to be
901 // subtracted from SpaceUsed.
902 space_used += serial->SpaceUsed() - kSerialArenaSize;
903 });
904 return space_used - (alloc_policy_.get() ? sizeof(AllocationPolicy) : 0);
905 }
906
907 template <AllocationClient alloc_client>
AllocateAlignedFallback(size_t n)908 PROTOBUF_NOINLINE void* ThreadSafeArena::AllocateAlignedFallback(size_t n) {
909 return GetSerialArenaFallback(n)->AllocateAligned<alloc_client>(n);
910 }
911
912 template void* ThreadSafeArena::AllocateAlignedFallback<
913 AllocationClient::kDefault>(size_t);
914 template void*
915 ThreadSafeArena::AllocateAlignedFallback<AllocationClient::kArray>(size_t);
916
CleanupList()917 void ThreadSafeArena::CleanupList() {
918 #ifdef PROTOBUF_ASAN
919 UnpoisonAllArenaBlocks();
920 #endif
921
922 WalkSerialArenaChunk([](SerialArenaChunk* chunk) {
923 absl::Span<std::atomic<SerialArena*>> span = chunk->arenas();
924 // Walks arenas backward to handle the first serial arena the last.
925 // Destroying in reverse-order to the construction is often assumed by users
926 // and required not to break inter-object dependencies. (b/247560530)
927 for (auto it = span.rbegin(); it != span.rend(); ++it) {
928 SerialArena* serial = it->load(std::memory_order_relaxed);
929 ABSL_DCHECK_NE(serial, nullptr);
930 serial->CleanupList();
931 }
932 });
933 // First arena must be cleaned up last. (b/247560530)
934 first_arena_.CleanupList();
935 }
936
937 PROTOBUF_NOINLINE
GetSerialArenaFallback(size_t n)938 SerialArena* ThreadSafeArena::GetSerialArenaFallback(size_t n) {
939 void* const id = &thread_cache();
940 if (id == first_owner_) {
941 CacheSerialArena(&first_arena_);
942 return &first_arena_;
943 }
944
945 // Search matching SerialArena.
946 SerialArena* serial = nullptr;
947 WalkConstSerialArenaChunk([&serial, id](const SerialArenaChunk* chunk) {
948 absl::Span<const std::atomic<void*>> ids = chunk->ids();
949 for (uint32_t i = 0; i < ids.size(); ++i) {
950 if (ids[i].load(std::memory_order_relaxed) == id) {
951 serial = chunk->arena(i).load(std::memory_order_relaxed);
952 ABSL_DCHECK_NE(serial, nullptr);
953 break;
954 }
955 }
956 });
957
958 if (!serial) {
959 // This thread doesn't have any SerialArena, which also means it doesn't
960 // have any blocks yet. So we'll allocate its first block now. It must be
961 // big enough to host SerialArena and the pending request.
962 serial = SerialArena::New(
963 AllocateBlock(alloc_policy_.get(), 0, n + kSerialArenaSize), *this);
964
965 AddSerialArena(id, serial);
966 }
967
968 CacheSerialArena(serial);
969 return serial;
970 }
971
972 } // namespace internal
973
Allocate(size_t n)974 void* Arena::Allocate(size_t n) { return impl_.AllocateAligned(n); }
975
AllocateForArray(size_t n)976 void* Arena::AllocateForArray(size_t n) {
977 return impl_.AllocateAligned<internal::AllocationClient::kArray>(n);
978 }
979
AllocateAlignedWithCleanup(size_t n,size_t align,void (* destructor)(void *))980 void* Arena::AllocateAlignedWithCleanup(size_t n, size_t align,
981 void (*destructor)(void*)) {
982 return impl_.AllocateAlignedWithCleanup(n, align, destructor);
983 }
984
PeekCleanupListForTesting()985 std::vector<void*> Arena::PeekCleanupListForTesting() {
986 return impl_.PeekCleanupListForTesting();
987 }
988
989 } // namespace protobuf
990 } // namespace google
991
992 #include "google/protobuf/port_undef.inc"
993