• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 #include "google/protobuf/arena.h"
9 
10 #include <algorithm>
11 #include <atomic>
12 #include <cstddef>
13 #include <cstdint>
14 #include <limits>
15 #include <string>
16 #include <vector>
17 
18 #include "absl/base/attributes.h"
19 #include "absl/base/prefetch.h"
20 #include "absl/container/internal/layout.h"
21 #include "absl/log/absl_check.h"
22 #include "absl/log/absl_log.h"
23 #include "absl/synchronization/mutex.h"
24 #include "absl/types/span.h"
25 #include "google/protobuf/arena_allocation_policy.h"
26 #include "google/protobuf/arena_cleanup.h"
27 #include "google/protobuf/arenaz_sampler.h"
28 #include "google/protobuf/port.h"
29 #include "google/protobuf/serial_arena.h"
30 #include "google/protobuf/string_block.h"
31 #include "google/protobuf/thread_safe_arena.h"
32 
33 
34 // Must be included last.
35 #include "google/protobuf/port_def.inc"
36 
37 namespace google {
38 namespace protobuf {
39 namespace internal {
40 namespace {
41 
42 #if defined(__GNUC__) && __GNUC__ >= 5
43 // kSentryArenaBlock is used for arenas which can be referenced pre-main. So,
44 // constexpr is required.
45 constexpr ArenaBlock kSentryArenaBlock;
46 
SentryArenaBlock()47 ArenaBlock* SentryArenaBlock() {
48   // const_cast<> is okay as kSentryArenaBlock will never be mutated.
49   return const_cast<ArenaBlock*>(&kSentryArenaBlock);
50 }
51 #else
52 // TODO Remove this once we're not using GCC 4.9 for tests.
53 // There is a compiler bug in this version that causes the above constexpr to
54 // fail.  This version is no longer in our support window, but we use it in
55 // some of our aarch64 docker images.
56 ArenaBlock* SentryArenaBlock() {
57   static const ArenaBlock kSentryArenaBlock;
58   // const_cast<> is okay as kSentryArenaBlock will never be mutated.
59   return const_cast<ArenaBlock*>(&kSentryArenaBlock);
60 }
61 #endif
62 
AllocationSize(size_t last_size,size_t start_size,size_t max_size)63 inline size_t AllocationSize(size_t last_size, size_t start_size,
64                              size_t max_size) {
65   if (last_size == 0) return start_size;
66   // Double the current block size, up to a limit.
67   return std::min(2 * last_size, max_size);
68 }
69 
AllocateMemory(const AllocationPolicy & policy,size_t size)70 SizedPtr AllocateMemory(const AllocationPolicy& policy, size_t size) {
71   if (policy.block_alloc == nullptr) {
72     return AllocateAtLeast(size);
73   }
74   return {policy.block_alloc(size), size};
75 }
76 
AllocateBlock(const AllocationPolicy * policy_ptr,size_t last_size,size_t min_bytes)77 SizedPtr AllocateBlock(const AllocationPolicy* policy_ptr, size_t last_size,
78                        size_t min_bytes) {
79   AllocationPolicy policy;  // default policy
80   if (policy_ptr) policy = *policy_ptr;
81   size_t size =
82       AllocationSize(last_size, policy.start_block_size, policy.max_block_size);
83   // Verify that min_bytes + kBlockHeaderSize won't overflow.
84   ABSL_CHECK_LE(min_bytes, std::numeric_limits<size_t>::max() -
85                                SerialArena::kBlockHeaderSize);
86   size = std::max(size, SerialArena::kBlockHeaderSize + min_bytes);
87 
88   return AllocateMemory(policy, size);
89 }
90 
AllocateCleanupChunk(const AllocationPolicy * policy_ptr,size_t last_size)91 SizedPtr AllocateCleanupChunk(const AllocationPolicy* policy_ptr,
92                               size_t last_size) {
93   constexpr size_t kStartSize = 64;
94   constexpr size_t kMaxSize = 4 << 10;
95   static_assert(kStartSize % sizeof(cleanup::CleanupNode) == 0, "");
96 
97   const size_t size = AllocationSize(last_size, kStartSize, kMaxSize);
98   if (policy_ptr == nullptr) return AllocateAtLeast(size);
99   return AllocateMemory(*policy_ptr, size);
100 }
101 
102 class GetDeallocator {
103  public:
GetDeallocator(const AllocationPolicy * policy)104   explicit GetDeallocator(const AllocationPolicy* policy)
105       : dealloc_(policy ? policy->block_dealloc : nullptr) {}
106 
operator ()(SizedPtr mem) const107   void operator()(SizedPtr mem) const {
108     if (dealloc_) {
109       dealloc_(mem.p, mem.n);
110     } else {
111       internal::SizedDelete(mem.p, mem.n);
112     }
113   }
114 
115  private:
116   void (*dealloc_)(void*, size_t);
117 };
118 
119 }  // namespace
120 
121 namespace cleanup {
122 struct ChunkList::Chunk {
Firstgoogle::protobuf::internal::cleanup::ChunkList::Chunk123   CleanupNode* First() { return reinterpret_cast<CleanupNode*>(this + 1); }
Lastgoogle::protobuf::internal::cleanup::ChunkList::Chunk124   CleanupNode* Last() { return First() + Capacity() - 1; }
Capacitygoogle::protobuf::internal::cleanup::ChunkList::Chunk125   static size_t Capacity(size_t size) {
126     return (size - sizeof(Chunk)) / sizeof(CleanupNode);
127   }
Capacitygoogle::protobuf::internal::cleanup::ChunkList::Chunk128   size_t Capacity() const { return Capacity(size); }
129 
130   Chunk* next;
131   size_t size;
132   // Cleanup nodes follow.
133 };
134 
AddFallback(void * elem,void (* destructor)(void *),SerialArena & arena)135 void ChunkList::AddFallback(void* elem, void (*destructor)(void*),
136                             SerialArena& arena) {
137   ABSL_DCHECK_EQ(next_, limit_);
138   SizedPtr mem = AllocateCleanupChunk(arena.parent_.AllocPolicy(),
139                                       head_ == nullptr ? 0 : head_->size);
140   arena.AddSpaceAllocated(mem.n);
141   head_ = new (mem.p) Chunk{head_, mem.n};
142   next_ = head_->First();
143   prefetch_ptr_ = reinterpret_cast<char*>(next_);
144   limit_ = next_ + Chunk::Capacity(mem.n);
145   AddFromExisting(elem, destructor);
146 }
147 
Cleanup(const SerialArena & arena)148 void ChunkList::Cleanup(const SerialArena& arena) {
149   Chunk* c = head_;
150   if (c == nullptr) return;
151   GetDeallocator deallocator(arena.parent_.AllocPolicy());
152 
153   // Iterate backwards in order to destroy in the right order.
154   CleanupNode* it = next_ - 1;
155   while (true) {
156     CleanupNode* first = c->First();
157     // A prefetch distance of 8 here was chosen arbitrarily.
158     constexpr int kPrefetchDistance = 8;
159     CleanupNode* prefetch = it;
160     // Prefetch the first kPrefetchDistance nodes.
161     for (int i = 0; prefetch >= first && i < kPrefetchDistance;
162          --prefetch, ++i) {
163       prefetch->Prefetch();
164     }
165     // For the middle nodes, run destructor and prefetch the node
166     // kPrefetchDistance after the current one.
167     for (; prefetch >= first; --it, --prefetch) {
168       it->Destroy();
169       prefetch->Prefetch();
170     }
171     // Note: we could consider prefetching `next` chunk earlier.
172     absl::PrefetchToLocalCacheNta(c->next);
173     // Destroy the rest without prefetching.
174     for (; it >= first; --it) {
175       it->Destroy();
176     }
177     Chunk* next = c->next;
178     deallocator({c, c->size});
179     if (next == nullptr) return;
180     c = next;
181     it = c->Last();
182   };
183 }
184 
PeekForTesting()185 std::vector<void*> ChunkList::PeekForTesting() {
186   std::vector<void*> ret;
187   Chunk* c = head_;
188   if (c == nullptr) return ret;
189   // Iterate backwards to match destruction order.
190   CleanupNode* it = next_ - 1;
191   while (true) {
192     CleanupNode* first = c->First();
193     for (; it >= first; --it) {
194       ret.push_back(it->elem);
195     }
196     c = c->next;
197     if (c == nullptr) return ret;
198     it = c->Last();
199   };
200 }
201 }  // namespace cleanup
202 
203 // It is guaranteed that this is constructed in `b`. IOW, this is not the first
204 // arena and `b` cannot be sentry.
SerialArena(ArenaBlock * b,ThreadSafeArena & parent)205 SerialArena::SerialArena(ArenaBlock* b, ThreadSafeArena& parent)
206     : ptr_{b->Pointer(kBlockHeaderSize + ThreadSafeArena::kSerialArenaSize)},
207       limit_{b->Limit()},
208       prefetch_ptr_(
209           b->Pointer(kBlockHeaderSize + ThreadSafeArena::kSerialArenaSize)),
210       head_{b},
211       space_allocated_{b->size},
212       parent_{parent} {
213   ABSL_DCHECK(!b->IsSentry());
214 }
215 
216 // It is guaranteed that this is the first SerialArena. Use sentry block.
SerialArena(ThreadSafeArena & parent)217 SerialArena::SerialArena(ThreadSafeArena& parent)
218     : head_{SentryArenaBlock()}, parent_{parent} {}
219 
220 // It is guaranteed that this is the first SerialArena but `b` may be user
221 // provided or newly allocated to store AllocationPolicy.
SerialArena(FirstSerialArena,ArenaBlock * b,ThreadSafeArena & parent)222 SerialArena::SerialArena(FirstSerialArena, ArenaBlock* b,
223                          ThreadSafeArena& parent)
224     : head_{b}, space_allocated_{b->size}, parent_{parent} {
225   if (b->IsSentry()) return;
226   set_range(b->Pointer(kBlockHeaderSize), b->Limit());
227 }
228 
PeekCleanupListForTesting()229 std::vector<void*> SerialArena::PeekCleanupListForTesting() {
230   return cleanup_list_.PeekForTesting();
231 }
232 
PeekCleanupListForTesting()233 std::vector<void*> ThreadSafeArena::PeekCleanupListForTesting() {
234   return GetSerialArena()->PeekCleanupListForTesting();
235 }
236 
Init(ArenaBlock * b,size_t offset)237 void SerialArena::Init(ArenaBlock* b, size_t offset) {
238   set_range(b->Pointer(offset), b->Limit());
239   head_.store(b, std::memory_order_relaxed);
240   space_used_.store(0, std::memory_order_relaxed);
241   space_allocated_.store(b->size, std::memory_order_relaxed);
242   cached_block_length_ = 0;
243   cached_blocks_ = nullptr;
244   string_block_.store(nullptr, std::memory_order_relaxed);
245   string_block_unused_.store(0, std::memory_order_relaxed);
246 }
247 
New(SizedPtr mem,ThreadSafeArena & parent)248 SerialArena* SerialArena::New(SizedPtr mem, ThreadSafeArena& parent) {
249   ABSL_DCHECK_LE(kBlockHeaderSize + ThreadSafeArena::kSerialArenaSize, mem.n);
250   ThreadSafeArenaStats::RecordAllocateStats(parent.arena_stats_.MutableStats(),
251                                             /*used=*/0, /*allocated=*/mem.n,
252                                             /*wasted=*/0);
253   auto b = new (mem.p) ArenaBlock{nullptr, mem.n};
254   return new (b->Pointer(kBlockHeaderSize)) SerialArena(b, parent);
255 }
256 
257 template <typename Deallocator>
Free(Deallocator deallocator)258 SizedPtr SerialArena::Free(Deallocator deallocator) {
259   FreeStringBlocks();
260 
261   ArenaBlock* b = head();
262   SizedPtr mem = {b, b->size};
263   while (b->next) {
264     b = b->next;  // We must first advance before deleting this block
265     deallocator(mem);
266     mem = {b, b->size};
267   }
268   return mem;
269 }
270 
271 PROTOBUF_NOINLINE
AllocateAlignedFallback(size_t n)272 void* SerialArena::AllocateAlignedFallback(size_t n) {
273   AllocateNewBlock(n);
274   void* ret = nullptr;
275   bool res = MaybeAllocateAligned(n, &ret);
276   ABSL_DCHECK(res);
277   return ret;
278 }
279 
280 PROTOBUF_NOINLINE
AllocateFromStringBlockFallback()281 void* SerialArena::AllocateFromStringBlockFallback() {
282   ABSL_DCHECK_EQ(string_block_unused_.load(std::memory_order_relaxed), 0U);
283   StringBlock* sb = string_block_.load(std::memory_order_relaxed);
284   if (sb) {
285     AddSpaceUsed(sb->effective_size());
286   }
287 
288   void* ptr;
289   StringBlock* new_sb;
290   size_t size = StringBlock::NextSize(sb);
291   if (MaybeAllocateAligned(size, &ptr)) {
292     // Correct space_used_ to avoid double counting
293     AddSpaceUsed(-size);
294     new_sb = StringBlock::Emplace(ptr, size, sb);
295   } else {
296     new_sb = StringBlock::New(sb);
297     AddSpaceAllocated(new_sb->allocated_size());
298   }
299   string_block_.store(new_sb, std::memory_order_release);
300   size_t unused = new_sb->effective_size() - sizeof(std::string);
301   string_block_unused_.store(unused, std::memory_order_relaxed);
302   return new_sb->AtOffset(unused);
303 }
304 
305 PROTOBUF_NOINLINE
AllocateAlignedWithCleanupFallback(size_t n,size_t align,void (* destructor)(void *))306 void* SerialArena::AllocateAlignedWithCleanupFallback(
307     size_t n, size_t align, void (*destructor)(void*)) {
308   size_t required = AlignUpTo(n, align);
309   AllocateNewBlock(required);
310   return AllocateAlignedWithCleanup(n, align, destructor);
311 }
312 
AllocateNewBlock(size_t n)313 void SerialArena::AllocateNewBlock(size_t n) {
314   size_t used = 0;
315   size_t wasted = 0;
316   ArenaBlock* old_head = head();
317   if (!old_head->IsSentry()) {
318     // Record how much used in this block.
319     used = static_cast<size_t>(ptr() - old_head->Pointer(kBlockHeaderSize));
320     wasted = old_head->size - used - kBlockHeaderSize;
321     AddSpaceUsed(used);
322   }
323 
324   // TODO: Evaluate if pushing unused space into the cached blocks is a
325   // win. In preliminary testing showed increased memory savings as expected,
326   // but with a CPU regression. The regression might have been an artifact of
327   // the microbenchmark.
328 
329   auto mem = AllocateBlock(parent_.AllocPolicy(), old_head->size, n);
330   AddSpaceAllocated(mem.n);
331   ThreadSafeArenaStats::RecordAllocateStats(parent_.arena_stats_.MutableStats(),
332                                             /*used=*/used,
333                                             /*allocated=*/mem.n, wasted);
334   auto* new_head = new (mem.p) ArenaBlock{old_head, mem.n};
335   set_range(new_head->Pointer(kBlockHeaderSize), new_head->Limit());
336   // Previous writes must take effect before writing new head.
337   head_.store(new_head, std::memory_order_release);
338 
339   PROTOBUF_POISON_MEMORY_REGION(ptr(), limit_ - ptr());
340 }
341 
SpaceUsed() const342 uint64_t SerialArena::SpaceUsed() const {
343   // Note: the calculation below technically causes a race with
344   // AllocateNewBlock when called from another thread (which happens in
345   // ThreadSafeArena::SpaceUsed).  However, worst-case space_used_ will have
346   // stale data and the calculation will incorrectly assume 100%
347   // usage of the *current* block.
348   // TODO Consider eliminating this race in exchange for a possible
349   // performance hit on ARM (see cl/455186837).
350 
351   uint64_t space_used = 0;
352   StringBlock* sb = string_block_.load(std::memory_order_acquire);
353   if (sb) {
354     size_t unused = string_block_unused_.load(std::memory_order_relaxed);
355     space_used += sb->effective_size() - unused;
356   }
357   const ArenaBlock* h = head_.load(std::memory_order_acquire);
358   if (h->IsSentry()) return space_used;
359 
360   const uint64_t current_block_size = h->size;
361   space_used += std::min(
362       static_cast<uint64_t>(
363           ptr() - const_cast<ArenaBlock*>(h)->Pointer(kBlockHeaderSize)),
364       current_block_size);
365   return space_used + space_used_.load(std::memory_order_relaxed);
366 }
367 
FreeStringBlocks(StringBlock * string_block,size_t unused_bytes)368 size_t SerialArena::FreeStringBlocks(StringBlock* string_block,
369                                      size_t unused_bytes) {
370   ABSL_DCHECK(string_block != nullptr);
371   StringBlock* next = string_block->next();
372   absl::PrefetchToLocalCacheNta(next);
373   std::string* end = string_block->end();
374   for (std::string* s = string_block->AtOffset(unused_bytes); s != end; ++s) {
375     s->~basic_string();
376   }
377   size_t deallocated = StringBlock::Delete(string_block);
378 
379   while ((string_block = next) != nullptr) {
380     next = string_block->next();
381     absl::PrefetchToLocalCacheNta(next);
382     for (std::string& s : *string_block) {
383       s.~basic_string();
384     }
385     deallocated += StringBlock::Delete(string_block);
386   }
387   return deallocated;
388 }
389 
390 // Stores arrays of void* and SerialArena* instead of linked list of
391 // SerialArena* to speed up traversing all SerialArena. The cost of walk is non
392 // trivial when there are many nodes. Separately storing "ids" minimizes cache
393 // footprints and more efficient when looking for matching arena.
394 //
395 // Uses absl::container_internal::Layout to emulate the following:
396 //
397 // struct SerialArenaChunk {
398 //   struct SerialArenaChunkHeader {
399 //     SerialArenaChunk* next_chunk;
400 //     uint32_t capacity;
401 //     std::atomic<uint32_t> size;
402 //   } header;
403 //   std::atomic<void*> ids[];
404 //   std::atomic<SerialArena*> arenas[];
405 // };
406 //
407 // where the size of "ids" and "arenas" is determined at runtime; hence the use
408 // of Layout.
409 struct SerialArenaChunkHeader {
SerialArenaChunkHeadergoogle::protobuf::internal::SerialArenaChunkHeader410   constexpr SerialArenaChunkHeader(uint32_t capacity, uint32_t size)
411       : next_chunk(nullptr), capacity(capacity), size(size) {}
412 
413   ThreadSafeArena::SerialArenaChunk* next_chunk;
414   uint32_t capacity;
415   std::atomic<uint32_t> size;
416 };
417 
418 class ThreadSafeArena::SerialArenaChunk {
419  public:
SerialArenaChunk(uint32_t capacity,void * me,SerialArena * serial)420   SerialArenaChunk(uint32_t capacity, void* me, SerialArena* serial) {
421     // We use `layout`/`ids`/`arenas` local variables to avoid recomputing
422     // offsets if we were to call id(i)/arena(i) repeatedly.
423     const layout_type layout = Layout(capacity);
424     new (layout.Pointer<kHeader>(ptr())) SerialArenaChunkHeader{capacity, 1};
425 
426     std::atomic<void*>* ids = layout.Pointer<kIds>(ptr());
427     new (&ids[0]) std::atomic<void*>{me};
428     for (uint32_t i = 1; i < capacity; ++i) {
429       new (&ids[i]) std::atomic<void*>{nullptr};
430     }
431 
432     std::atomic<SerialArena*>* arenas = layout.Pointer<kArenas>(ptr());
433     new (&arenas[0]) std::atomic<SerialArena*>{serial};
434     for (uint32_t i = 1; i < capacity; ++i) {
435       new (&arenas[i]) std::atomic<SerialArena*>{nullptr};
436     }
437   }
438 
IsSentry() const439   bool IsSentry() const { return capacity() == 0; }
440 
441   // next_chunk
next_chunk() const442   const SerialArenaChunk* next_chunk() const { return header().next_chunk; }
next_chunk()443   SerialArenaChunk* next_chunk() { return header().next_chunk; }
set_next(SerialArenaChunk * next_chunk)444   void set_next(SerialArenaChunk* next_chunk) {
445     header().next_chunk = next_chunk;
446   }
447 
448   // capacity
capacity() const449   uint32_t capacity() const { return header().capacity; }
set_capacity(uint32_t capacity)450   void set_capacity(uint32_t capacity) { header().capacity = capacity; }
451 
452   // ids: returns up to size().
ids() const453   absl::Span<const std::atomic<void*>> ids() const {
454     return Layout().Slice<kIds>(ptr()).first(safe_size());
455   }
ids()456   absl::Span<std::atomic<void*>> ids() {
457     return Layout().Slice<kIds>(ptr()).first(safe_size());
458   }
id(uint32_t i)459   std::atomic<void*>& id(uint32_t i) {
460     ABSL_DCHECK_LT(i, capacity());
461     return Layout().Pointer<kIds>(ptr())[i];
462   }
463 
464   // arenas: returns up to size().
arenas() const465   absl::Span<const std::atomic<SerialArena*>> arenas() const {
466     return Layout().Slice<kArenas>(ptr()).first(safe_size());
467   }
arenas()468   absl::Span<std::atomic<SerialArena*>> arenas() {
469     return Layout().Slice<kArenas>(ptr()).first(safe_size());
470   }
arena(uint32_t i) const471   const std::atomic<SerialArena*>& arena(uint32_t i) const {
472     ABSL_DCHECK_LT(i, capacity());
473     return Layout().Pointer<kArenas>(ptr())[i];
474   }
arena(uint32_t i)475   std::atomic<SerialArena*>& arena(uint32_t i) {
476     ABSL_DCHECK_LT(i, capacity());
477     return Layout().Pointer<kArenas>(ptr())[i];
478   }
479 
480   // Tries to insert {id, serial} to head chunk. Returns false if the head is
481   // already full.
482   //
483   // Note that the updating "size", "id", "arena" is individually atomic but
484   // those are not protected by a mutex. This is acceptable because concurrent
485   // lookups from SpaceUsed or SpaceAllocated accept inaccuracy due to race. On
486   // other paths, either race is not possible (GetSerialArenaFallback) or must
487   // be prevented by users (CleanupList, Free).
insert(void * me,SerialArena * serial)488   bool insert(void* me, SerialArena* serial) {
489     uint32_t idx = size().fetch_add(1, std::memory_order_relaxed);
490     // Bail out if this chunk is full.
491     if (idx >= capacity()) {
492       // Write old value back to avoid potential overflow.
493       size().store(capacity(), std::memory_order_relaxed);
494       return false;
495     }
496 
497     id(idx).store(me, std::memory_order_relaxed);
498     arena(idx).store(serial, std::memory_order_release);
499     return true;
500   }
501 
AllocSize(size_t n)502   constexpr static size_t AllocSize(size_t n) { return Layout(n).AllocSize(); }
503 
504  private:
505   constexpr static int kHeader = 0;
506   constexpr static int kIds = 1;
507   constexpr static int kArenas = 2;
508 
509   using layout_type = absl::container_internal::Layout<
510       SerialArenaChunkHeader, std::atomic<void*>, std::atomic<SerialArena*>>;
511 
ptr() const512   const char* ptr() const { return reinterpret_cast<const char*>(this); }
ptr()513   char* ptr() { return reinterpret_cast<char*>(this); }
514 
header()515   SerialArenaChunkHeader& header() {
516     return *layout_type::Partial().Pointer<kHeader>(ptr());
517   }
header() const518   const SerialArenaChunkHeader& header() const {
519     return *layout_type::Partial().Pointer<kHeader>(ptr());
520   }
521 
size()522   std::atomic<uint32_t>& size() { return header().size; }
size() const523   const std::atomic<uint32_t>& size() const { return header().size; }
524 
525   // Returns the size capped by the capacity as fetch_add may result in a size
526   // greater than capacity.
safe_size() const527   uint32_t safe_size() const {
528     return std::min(capacity(), size().load(std::memory_order_relaxed));
529   }
530 
Layout(size_t n)531   constexpr static layout_type Layout(size_t n) {
532     return layout_type(/*header*/ 1, /*ids*/ n, /*arenas*/ n);
533   }
Layout() const534   layout_type Layout() const { return Layout(capacity()); }
535 };
536 
537 constexpr SerialArenaChunkHeader kSentryArenaChunk = {0, 0};
538 
SentrySerialArenaChunk()539 ThreadSafeArena::SerialArenaChunk* ThreadSafeArena::SentrySerialArenaChunk() {
540   // const_cast is okay because the sentry chunk is never mutated. Also,
541   // reinterpret_cast is acceptable here as it should be identical to
542   // SerialArenaChunk with zero payload. This is a necessary trick to
543   // constexpr initialize kSentryArenaChunk.
544   return reinterpret_cast<SerialArenaChunk*>(
545       const_cast<SerialArenaChunkHeader*>(&kSentryArenaChunk));
546 }
547 
548 
549 alignas(kCacheAlignment) ABSL_CONST_INIT
550     std::atomic<ThreadSafeArena::LifecycleId> ThreadSafeArena::lifecycle_id_{0};
551 #if defined(PROTOBUF_NO_THREADLOCAL)
thread_cache()552 ThreadSafeArena::ThreadCache& ThreadSafeArena::thread_cache() {
553   static internal::ThreadLocalStorage<ThreadCache>* thread_cache_ =
554       new internal::ThreadLocalStorage<ThreadCache>();
555   return *thread_cache_->Get();
556 }
557 #elif defined(PROTOBUF_USE_DLLS) && defined(_WIN32)
thread_cache()558 ThreadSafeArena::ThreadCache& ThreadSafeArena::thread_cache() {
559   static PROTOBUF_THREAD_LOCAL ThreadCache thread_cache;
560   return thread_cache;
561 }
562 #else
563 PROTOBUF_CONSTINIT PROTOBUF_THREAD_LOCAL
564     ThreadSafeArena::ThreadCache ThreadSafeArena::thread_cache_;
565 #endif
566 
ThreadSafeArena()567 ThreadSafeArena::ThreadSafeArena() : first_arena_(*this) { Init(); }
568 
ThreadSafeArena(char * mem,size_t size)569 ThreadSafeArena::ThreadSafeArena(char* mem, size_t size)
570     : first_arena_(FirstSerialArena{}, FirstBlock(mem, size), *this) {
571   Init();
572 }
573 
ThreadSafeArena(void * mem,size_t size,const AllocationPolicy & policy)574 ThreadSafeArena::ThreadSafeArena(void* mem, size_t size,
575                                  const AllocationPolicy& policy)
576     : first_arena_(FirstSerialArena{}, FirstBlock(mem, size, policy), *this) {
577   InitializeWithPolicy(policy);
578 }
579 
FirstBlock(void * buf,size_t size)580 ArenaBlock* ThreadSafeArena::FirstBlock(void* buf, size_t size) {
581   ABSL_DCHECK_EQ(reinterpret_cast<uintptr_t>(buf) & 7, 0u);
582   if (buf == nullptr || size <= kBlockHeaderSize) {
583     return SentryArenaBlock();
584   }
585   // Record user-owned block.
586   alloc_policy_.set_is_user_owned_initial_block(true);
587   return new (buf) ArenaBlock{nullptr, size};
588 }
589 
FirstBlock(void * buf,size_t size,const AllocationPolicy & policy)590 ArenaBlock* ThreadSafeArena::FirstBlock(void* buf, size_t size,
591                                         const AllocationPolicy& policy) {
592   if (policy.IsDefault()) return FirstBlock(buf, size);
593 
594   ABSL_DCHECK_EQ(reinterpret_cast<uintptr_t>(buf) & 7, 0u);
595 
596   SizedPtr mem;
597   if (buf == nullptr || size < kBlockHeaderSize + kAllocPolicySize) {
598     mem = AllocateBlock(&policy, 0, kAllocPolicySize);
599   } else {
600     mem = {buf, size};
601     // Record user-owned block.
602     alloc_policy_.set_is_user_owned_initial_block(true);
603   }
604 
605   return new (mem.p) ArenaBlock{nullptr, mem.n};
606 }
607 
InitializeWithPolicy(const AllocationPolicy & policy)608 void ThreadSafeArena::InitializeWithPolicy(const AllocationPolicy& policy) {
609   Init();
610 
611   if (policy.IsDefault()) return;
612 
613 #ifndef NDEBUG
614   const uint64_t old_alloc_policy = alloc_policy_.get_raw();
615   // If there was a policy (e.g., in Reset()), make sure flags were preserved.
616 #define ABSL_DCHECK_POLICY_FLAGS_() \
617   if (old_alloc_policy > 3)         \
618   ABSL_CHECK_EQ(old_alloc_policy & 3, alloc_policy_.get_raw() & 3)
619 #else
620 #define ABSL_DCHECK_POLICY_FLAGS_()
621 #endif  // NDEBUG
622 
623   // We ensured enough space so this cannot fail.
624   void* p;
625   if (!first_arena_.MaybeAllocateAligned(kAllocPolicySize, &p)) {
626     ABSL_LOG(FATAL) << "MaybeAllocateAligned cannot fail here.";
627     return;
628   }
629   new (p) AllocationPolicy{policy};
630   // Low bits store flags, so they mustn't be overwritten.
631   ABSL_DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(p) & 3);
632   alloc_policy_.set_policy(reinterpret_cast<AllocationPolicy*>(p));
633   ABSL_DCHECK_POLICY_FLAGS_();
634 
635 #undef ABSL_DCHECK_POLICY_FLAGS_
636 }
637 
GetNextLifeCycleId()638 uint64_t ThreadSafeArena::GetNextLifeCycleId() {
639   ThreadCache& tc = thread_cache();
640   uint64_t id = tc.next_lifecycle_id;
641   constexpr uint64_t kInc = ThreadCache::kPerThreadIds;
642   if (PROTOBUF_PREDICT_FALSE((id & (kInc - 1)) == 0)) {
643     // On platforms that don't support uint64_t atomics we can certainly not
644     // afford to increment by large intervals and expect uniqueness due to
645     // wrapping, hence we only add by 1.
646     id = lifecycle_id_.fetch_add(1, std::memory_order_relaxed) * kInc;
647   }
648   tc.next_lifecycle_id = id + 1;
649   return id;
650 }
651 
652 // We assume that #threads / arena is bimodal; i.e. majority small ones are
653 // single threaded but some big ones are highly concurrent. To balance between
654 // memory overhead and minimum pointer chasing, we start with few entries and
655 // exponentially (4x) grow with a limit (255 entries). Note that parameters are
656 // picked for x64 architectures as hint and the actual size is calculated by
657 // Layout.
NewSerialArenaChunk(uint32_t prev_capacity,void * id,SerialArena * serial)658 ThreadSafeArena::SerialArenaChunk* ThreadSafeArena::NewSerialArenaChunk(
659     uint32_t prev_capacity, void* id, SerialArena* serial) {
660   constexpr size_t kMaxBytes = 4096;  // Can hold up to 255 entries.
661   constexpr size_t kGrowthFactor = 4;
662   constexpr size_t kHeaderSize = SerialArenaChunk::AllocSize(0);
663   constexpr size_t kEntrySize = SerialArenaChunk::AllocSize(1) - kHeaderSize;
664 
665   // On x64 arch: {4, 16, 64, 256, 256, ...} * 16.
666   size_t prev_bytes = SerialArenaChunk::AllocSize(prev_capacity);
667   size_t next_bytes = std::min(kMaxBytes, prev_bytes * kGrowthFactor);
668   uint32_t next_capacity =
669       static_cast<uint32_t>(next_bytes - kHeaderSize) / kEntrySize;
670   // Growth based on bytes needs to be adjusted by AllocSize.
671   next_bytes = SerialArenaChunk::AllocSize(next_capacity);
672 
673   // If we allocate bigger memory than requested, we should expand
674   // size to use that extra space, and add extra entries permitted
675   // by the extra space.
676   SizedPtr mem = AllocateAtLeast(next_bytes);
677   next_capacity = static_cast<uint32_t>(mem.n - kHeaderSize) / kEntrySize;
678   ABSL_DCHECK_LE(SerialArenaChunk::AllocSize(next_capacity), mem.n);
679   return new (mem.p) SerialArenaChunk{next_capacity, id, serial};
680 }
681 
682 // Tries to reserve an entry by atomic fetch_add. If the head chunk is already
683 // full (size >= capacity), acquires the mutex and adds a new head.
AddSerialArena(void * id,SerialArena * serial)684 void ThreadSafeArena::AddSerialArena(void* id, SerialArena* serial) {
685   SerialArenaChunk* head = head_.load(std::memory_order_acquire);
686   // Fast path without acquiring mutex.
687   if (!head->IsSentry() && head->insert(id, serial)) {
688     return;
689   }
690 
691   // Slow path with acquiring mutex.
692   absl::MutexLock lock(&mutex_);
693 
694   // Refetch and if someone else installed a new head, try allocating on that!
695   SerialArenaChunk* new_head = head_.load(std::memory_order_acquire);
696   if (new_head != head) {
697     if (new_head->insert(id, serial)) return;
698     // Update head to link to the latest one.
699     head = new_head;
700   }
701 
702   new_head = NewSerialArenaChunk(head->capacity(), id, serial);
703   new_head->set_next(head);
704 
705   // Use "std::memory_order_release" to make sure prior stores are visible after
706   // this one.
707   head_.store(new_head, std::memory_order_release);
708 }
709 
UnpoisonAllArenaBlocks() const710 void ThreadSafeArena::UnpoisonAllArenaBlocks() const {
711   VisitSerialArena([](const SerialArena* serial) {
712     for (const auto* b = serial->head(); b != nullptr && !b->IsSentry();
713          b = b->next) {
714       PROTOBUF_UNPOISON_MEMORY_REGION(b, b->size);
715     }
716   });
717 }
718 
Init()719 void ThreadSafeArena::Init() {
720   tag_and_id_ = GetNextLifeCycleId();
721   arena_stats_ = Sample();
722   head_.store(SentrySerialArenaChunk(), std::memory_order_relaxed);
723   first_owner_ = &thread_cache();
724 
725   // Record allocation for the first block that was either user-provided or
726   // newly allocated.
727   ThreadSafeArenaStats::RecordAllocateStats(
728       arena_stats_.MutableStats(),
729       /*used=*/0,
730       /*allocated=*/first_arena_.SpaceAllocated(),
731       /*wasted=*/0);
732 
733   CacheSerialArena(&first_arena_);
734 }
735 
~ThreadSafeArena()736 ThreadSafeArena::~ThreadSafeArena() {
737   // Have to do this in a first pass, because some of the destructors might
738   // refer to memory in other blocks.
739   CleanupList();
740 
741   auto mem = Free();
742   if (alloc_policy_.is_user_owned_initial_block()) {
743     // Unpoison the initial block, now that it's going back to the user.
744     PROTOBUF_UNPOISON_MEMORY_REGION(mem.p, mem.n);
745   } else if (mem.n > 0) {
746     GetDeallocator(alloc_policy_.get())(mem);
747   }
748 }
749 
Free()750 SizedPtr ThreadSafeArena::Free() {
751   auto deallocator = GetDeallocator(alloc_policy_.get());
752 
753   WalkSerialArenaChunk([&](SerialArenaChunk* chunk) {
754     absl::Span<std::atomic<SerialArena*>> span = chunk->arenas();
755     // Walks arenas backward to handle the first serial arena the last. Freeing
756     // in reverse-order to the order in which objects were created may not be
757     // necessary to Free and we should revisit this. (b/247560530)
758     for (auto it = span.rbegin(); it != span.rend(); ++it) {
759       SerialArena* serial = it->load(std::memory_order_relaxed);
760       ABSL_DCHECK_NE(serial, nullptr);
761       // Always frees the first block of "serial" as it cannot be user-provided.
762       SizedPtr mem = serial->Free(deallocator);
763       ABSL_DCHECK_NE(mem.p, nullptr);
764       deallocator(mem);
765     }
766 
767     // Delete the chunk as we're done with it.
768     internal::SizedDelete(chunk,
769                           SerialArenaChunk::AllocSize(chunk->capacity()));
770   });
771 
772   // The first block of the first arena is special and let the caller handle it.
773   return first_arena_.Free(deallocator);
774 }
775 
Reset()776 uint64_t ThreadSafeArena::Reset() {
777   const size_t space_allocated = SpaceAllocated();
778 
779   // Have to do this in a first pass, because some of the destructors might
780   // refer to memory in other blocks.
781   CleanupList();
782   // Reset the first arena's cleanup list.
783   first_arena_.cleanup_list_ = cleanup::ChunkList();
784 
785   // Discard all blocks except the first one. Whether it is user-provided or
786   // allocated, always reuse the first block for the first arena.
787   auto mem = Free();
788 
789   // Reset the first arena with the first block. This avoids redundant
790   // free / allocation and re-allocating for AllocationPolicy. Adjust offset if
791   // we need to preserve alloc_policy_.
792   if (alloc_policy_.is_user_owned_initial_block() ||
793       alloc_policy_.get() != nullptr) {
794     size_t offset = alloc_policy_.get() == nullptr
795                         ? kBlockHeaderSize
796                         : kBlockHeaderSize + kAllocPolicySize;
797     first_arena_.Init(new (mem.p) ArenaBlock{nullptr, mem.n}, offset);
798   } else {
799     first_arena_.Init(SentryArenaBlock(), 0);
800   }
801 
802   // Since the first block and potential alloc_policy on the first block is
803   // preserved, this can be initialized by Init().
804   Init();
805 
806   return space_allocated;
807 }
808 
AllocateAlignedWithCleanup(size_t n,size_t align,void (* destructor)(void *))809 void* ThreadSafeArena::AllocateAlignedWithCleanup(size_t n, size_t align,
810                                                   void (*destructor)(void*)) {
811   SerialArena* arena;
812   if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(&arena))) {
813     return arena->AllocateAlignedWithCleanup(n, align, destructor);
814   } else {
815     return AllocateAlignedWithCleanupFallback(n, align, destructor);
816   }
817 }
818 
AddCleanup(void * elem,void (* cleanup)(void *))819 void ThreadSafeArena::AddCleanup(void* elem, void (*cleanup)(void*)) {
820   GetSerialArena()->AddCleanup(elem, cleanup);
821 }
822 
GetSerialArena()823 SerialArena* ThreadSafeArena::GetSerialArena() {
824   SerialArena* arena;
825   if (PROTOBUF_PREDICT_FALSE(!GetSerialArenaFast(&arena))) {
826     arena = GetSerialArenaFallback(kMaxCleanupNodeSize);
827   }
828   return arena;
829 }
830 
831 PROTOBUF_NOINLINE
AllocateAlignedWithCleanupFallback(size_t n,size_t align,void (* destructor)(void *))832 void* ThreadSafeArena::AllocateAlignedWithCleanupFallback(
833     size_t n, size_t align, void (*destructor)(void*)) {
834   return GetSerialArenaFallback(n + kMaxCleanupNodeSize)
835       ->AllocateAlignedWithCleanup(n, align, destructor);
836 }
837 
838 PROTOBUF_NOINLINE
AllocateFromStringBlock()839 void* ThreadSafeArena::AllocateFromStringBlock() {
840   return GetSerialArena()->AllocateFromStringBlock();
841 }
842 
843 template <typename Callback>
WalkConstSerialArenaChunk(Callback fn) const844 void ThreadSafeArena::WalkConstSerialArenaChunk(Callback fn) const {
845   const SerialArenaChunk* chunk = head_.load(std::memory_order_acquire);
846 
847   for (; !chunk->IsSentry(); chunk = chunk->next_chunk()) {
848     // Prefetch the next chunk.
849     absl::PrefetchToLocalCache(chunk->next_chunk());
850     fn(chunk);
851   }
852 }
853 
854 template <typename Callback>
WalkSerialArenaChunk(Callback fn)855 void ThreadSafeArena::WalkSerialArenaChunk(Callback fn) {
856   // By omitting an Acquire barrier we help the sanitizer that any user code
857   // that doesn't properly synchronize Reset() or the destructor will throw a
858   // TSAN warning.
859   SerialArenaChunk* chunk = head_.load(std::memory_order_relaxed);
860 
861   while (!chunk->IsSentry()) {
862     // Cache next chunk in case this chunk is destroyed.
863     SerialArenaChunk* next_chunk = chunk->next_chunk();
864     // Prefetch the next chunk.
865     absl::PrefetchToLocalCache(next_chunk);
866     fn(chunk);
867     chunk = next_chunk;
868   }
869 }
870 
871 template <typename Callback>
VisitSerialArena(Callback fn) const872 void ThreadSafeArena::VisitSerialArena(Callback fn) const {
873   // In most cases, arenas are single-threaded and "first_arena_" should be
874   // sufficient.
875   fn(&first_arena_);
876 
877   WalkConstSerialArenaChunk([&fn](const SerialArenaChunk* chunk) {
878     for (const auto& each : chunk->arenas()) {
879       const SerialArena* serial = each.load(std::memory_order_acquire);
880       // It is possible that newly added SerialArena is not updated although
881       // size was. This is acceptable for SpaceAllocated and SpaceUsed.
882       if (serial == nullptr) continue;
883       fn(serial);
884     }
885   });
886 }
887 
SpaceAllocated() const888 uint64_t ThreadSafeArena::SpaceAllocated() const {
889   uint64_t space_allocated = 0;
890   VisitSerialArena([&space_allocated](const SerialArena* serial) {
891     space_allocated += serial->SpaceAllocated();
892   });
893   return space_allocated;
894 }
895 
SpaceUsed() const896 uint64_t ThreadSafeArena::SpaceUsed() const {
897   // `first_arena_` doesn't have kSerialArenaSize overhead, so adjust it here.
898   uint64_t space_used = kSerialArenaSize;
899   VisitSerialArena([&space_used](const SerialArena* serial) {
900     // SerialArena on chunks directly allocated from the block and needs to be
901     // subtracted from SpaceUsed.
902     space_used += serial->SpaceUsed() - kSerialArenaSize;
903   });
904   return space_used - (alloc_policy_.get() ? sizeof(AllocationPolicy) : 0);
905 }
906 
907 template <AllocationClient alloc_client>
AllocateAlignedFallback(size_t n)908 PROTOBUF_NOINLINE void* ThreadSafeArena::AllocateAlignedFallback(size_t n) {
909   return GetSerialArenaFallback(n)->AllocateAligned<alloc_client>(n);
910 }
911 
912 template void* ThreadSafeArena::AllocateAlignedFallback<
913     AllocationClient::kDefault>(size_t);
914 template void*
915     ThreadSafeArena::AllocateAlignedFallback<AllocationClient::kArray>(size_t);
916 
CleanupList()917 void ThreadSafeArena::CleanupList() {
918 #ifdef PROTOBUF_ASAN
919   UnpoisonAllArenaBlocks();
920 #endif
921 
922   WalkSerialArenaChunk([](SerialArenaChunk* chunk) {
923     absl::Span<std::atomic<SerialArena*>> span = chunk->arenas();
924     // Walks arenas backward to handle the first serial arena the last.
925     // Destroying in reverse-order to the construction is often assumed by users
926     // and required not to break inter-object dependencies. (b/247560530)
927     for (auto it = span.rbegin(); it != span.rend(); ++it) {
928       SerialArena* serial = it->load(std::memory_order_relaxed);
929       ABSL_DCHECK_NE(serial, nullptr);
930       serial->CleanupList();
931     }
932   });
933   // First arena must be cleaned up last. (b/247560530)
934   first_arena_.CleanupList();
935 }
936 
937 PROTOBUF_NOINLINE
GetSerialArenaFallback(size_t n)938 SerialArena* ThreadSafeArena::GetSerialArenaFallback(size_t n) {
939   void* const id = &thread_cache();
940   if (id == first_owner_) {
941     CacheSerialArena(&first_arena_);
942     return &first_arena_;
943   }
944 
945   // Search matching SerialArena.
946   SerialArena* serial = nullptr;
947   WalkConstSerialArenaChunk([&serial, id](const SerialArenaChunk* chunk) {
948     absl::Span<const std::atomic<void*>> ids = chunk->ids();
949     for (uint32_t i = 0; i < ids.size(); ++i) {
950       if (ids[i].load(std::memory_order_relaxed) == id) {
951         serial = chunk->arena(i).load(std::memory_order_relaxed);
952         ABSL_DCHECK_NE(serial, nullptr);
953         break;
954       }
955     }
956   });
957 
958   if (!serial) {
959     // This thread doesn't have any SerialArena, which also means it doesn't
960     // have any blocks yet.  So we'll allocate its first block now. It must be
961     // big enough to host SerialArena and the pending request.
962     serial = SerialArena::New(
963         AllocateBlock(alloc_policy_.get(), 0, n + kSerialArenaSize), *this);
964 
965     AddSerialArena(id, serial);
966   }
967 
968   CacheSerialArena(serial);
969   return serial;
970 }
971 
972 }  // namespace internal
973 
Allocate(size_t n)974 void* Arena::Allocate(size_t n) { return impl_.AllocateAligned(n); }
975 
AllocateForArray(size_t n)976 void* Arena::AllocateForArray(size_t n) {
977   return impl_.AllocateAligned<internal::AllocationClient::kArray>(n);
978 }
979 
AllocateAlignedWithCleanup(size_t n,size_t align,void (* destructor)(void *))980 void* Arena::AllocateAlignedWithCleanup(size_t n, size_t align,
981                                         void (*destructor)(void*)) {
982   return impl_.AllocateAlignedWithCleanup(n, align, destructor);
983 }
984 
PeekCleanupListForTesting()985 std::vector<void*> Arena::PeekCleanupListForTesting() {
986   return impl_.PeekCleanupListForTesting();
987 }
988 
989 }  // namespace protobuf
990 }  // namespace google
991 
992 #include "google/protobuf/port_undef.inc"
993