• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2022 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 //
8 // This file defines the internal class SerialArena
9 
10 #ifndef GOOGLE_PROTOBUF_SERIAL_ARENA_H__
11 #define GOOGLE_PROTOBUF_SERIAL_ARENA_H__
12 
13 #include <algorithm>
14 #include <atomic>
15 #include <cstddef>
16 #include <cstdint>
17 #include <string>
18 #include <vector>
19 
20 #include "absl/base/attributes.h"
21 #include "absl/base/optimization.h"
22 #include "absl/base/prefetch.h"
23 #include "absl/log/absl_check.h"
24 #include "absl/numeric/bits.h"
25 #include "google/protobuf/arena_align.h"
26 #include "google/protobuf/arena_cleanup.h"
27 #include "google/protobuf/port.h"
28 #include "google/protobuf/string_block.h"
29 
30 // Must be included last.
31 #include "google/protobuf/port_def.inc"
32 
33 namespace google {
34 namespace protobuf {
35 namespace internal {
36 
37 // Arena blocks are variable length malloc-ed objects.  The following structure
38 // describes the common header for all blocks.
39 struct ArenaBlock {
40   // For the sentry block with zero-size where ptr_/limit_ both point to `this`.
ArenaBlockArenaBlock41   constexpr ArenaBlock() : next(nullptr), size(0) {}
42 
ArenaBlockArenaBlock43   ArenaBlock(ArenaBlock* next, size_t size) : next(next), size(size) {
44     ABSL_DCHECK_GT(size, sizeof(ArenaBlock));
45   }
46 
PointerArenaBlock47   char* Pointer(size_t n) {
48     ABSL_DCHECK_LE(n, size);
49     return reinterpret_cast<char*>(this) + n;
50   }
LimitArenaBlock51   char* Limit() { return Pointer(size & static_cast<size_t>(-8)); }
52 
IsSentryArenaBlock53   bool IsSentry() const { return size == 0; }
54 
55   ArenaBlock* const next;
56   const size_t size;
57   // data follows
58 };
59 
60 enum class AllocationClient { kDefault, kArray };
61 
62 class ThreadSafeArena;
63 
64 // Tag type used to invoke the constructor of the first SerialArena.
65 struct FirstSerialArena {
66   explicit FirstSerialArena() = default;
67 };
68 
69 // A simple arena allocator. Calls to allocate functions must be properly
70 // serialized by the caller, hence this class cannot be used as a general
71 // purpose allocator in a multi-threaded program. It serves as a building block
72 // for ThreadSafeArena, which provides a thread-safe arena allocator.
73 //
74 // This class manages
75 // 1) Arena bump allocation + owning memory blocks.
76 // 2) Maintaining a cleanup list.
77 // It delegates the actual memory allocation back to ThreadSafeArena, which
78 // contains the information on block growth policy and backing memory allocation
79 // used.
80 class PROTOBUF_EXPORT SerialArena {
81  public:
82   static constexpr size_t kBlockHeaderSize =
83       ArenaAlignDefault::Ceil(sizeof(ArenaBlock));
84 
CleanupList()85   void CleanupList() { cleanup_list_.Cleanup(*this); }
SpaceAllocated()86   uint64_t SpaceAllocated() const {
87     return space_allocated_.load(std::memory_order_relaxed);
88   }
89   uint64_t SpaceUsed() const;
90 
91   // See comments on `cached_blocks_` member for details.
TryAllocateFromCachedBlock(size_t size)92   PROTOBUF_ALWAYS_INLINE void* TryAllocateFromCachedBlock(size_t size) {
93     if (PROTOBUF_PREDICT_FALSE(size < 16)) return nullptr;
94     // We round up to the next larger block in case the memory doesn't match
95     // the pattern we are looking for.
96     const size_t index = absl::bit_width(size - 1) - 4;
97 
98     if (PROTOBUF_PREDICT_FALSE(index >= cached_block_length_)) return nullptr;
99     auto& cached_head = cached_blocks_[index];
100     if (cached_head == nullptr) return nullptr;
101 
102     void* ret = cached_head;
103     PROTOBUF_UNPOISON_MEMORY_REGION(ret, size);
104     cached_head = cached_head->next;
105     return ret;
106   }
107 
108   // In kArray mode we look through cached blocks.
109   // We do not do this by default because most non-array allocations will not
110   // have the right size and will fail to find an appropriate cached block.
111   //
112   // TODO: Evaluate if we should use cached blocks for message types of
113   // the right size. We can statically know if the allocation size can benefit
114   // from it.
115   template <AllocationClient alloc_client = AllocationClient::kDefault>
AllocateAligned(size_t n)116   void* AllocateAligned(size_t n) {
117     ABSL_DCHECK(internal::ArenaAlignDefault::IsAligned(n));
118     ABSL_DCHECK_GE(limit_, ptr());
119 
120     if (alloc_client == AllocationClient::kArray) {
121       if (void* res = TryAllocateFromCachedBlock(n)) {
122         return res;
123       }
124     }
125 
126     void* ptr;
127     if (PROTOBUF_PREDICT_TRUE(MaybeAllocateAligned(n, &ptr))) {
128       return ptr;
129     }
130     return AllocateAlignedFallback(n);
131   }
132 
133  private:
AlignUpTo(size_t n,size_t a)134   static inline PROTOBUF_ALWAYS_INLINE constexpr size_t AlignUpTo(size_t n,
135                                                                   size_t a) {
136     // We are wasting space by over allocating align - 8 bytes. Compared to a
137     // dedicated function that takes current alignment in consideration.  Such a
138     // scheme would only waste (align - 8)/2 bytes on average, but requires a
139     // dedicated function in the outline arena allocation functions. Possibly
140     // re-evaluate tradeoffs later.
141     return a <= 8 ? ArenaAlignDefault::Ceil(n) : ArenaAlignAs(a).Padded(n);
142   }
143 
AlignTo(void * p,size_t a)144   static inline PROTOBUF_ALWAYS_INLINE void* AlignTo(void* p, size_t a) {
145     return (a <= ArenaAlignDefault::align)
146                ? ArenaAlignDefault::CeilDefaultAligned(p)
147                : ArenaAlignAs(a).CeilDefaultAligned(p);
148   }
149 
150   // See comments on `cached_blocks_` member for details.
ReturnArrayMemory(void * p,size_t size)151   void ReturnArrayMemory(void* p, size_t size) {
152     // We only need to check for 32-bit platforms.
153     // In 64-bit platforms the minimum allocation size from Repeated*Field will
154     // be 16 guaranteed.
155     if (sizeof(void*) < 8) {
156       if (PROTOBUF_PREDICT_FALSE(size < 16)) return;
157     } else {
158       PROTOBUF_ASSUME(size >= 16);
159     }
160 
161     // We round down to the next smaller block in case the memory doesn't match
162     // the pattern we are looking for. eg, someone might have called Reserve()
163     // on the repeated field.
164     const size_t index = absl::bit_width(size) - 5;
165 
166     if (PROTOBUF_PREDICT_FALSE(index >= cached_block_length_)) {
167       // We can't put this object on the freelist so make this object the
168       // freelist. It is guaranteed it is larger than the one we have, and
169       // large enough to hold another allocation of `size`.
170       CachedBlock** new_list = static_cast<CachedBlock**>(p);
171       size_t new_size = size / sizeof(CachedBlock*);
172 
173       std::copy(cached_blocks_, cached_blocks_ + cached_block_length_,
174                 new_list);
175 
176       // We need to unpoison this memory before filling it in case it has been
177       // poisoned by another sanitizer client.
178       PROTOBUF_UNPOISON_MEMORY_REGION(
179           new_list + cached_block_length_,
180           (new_size - cached_block_length_) * sizeof(CachedBlock*));
181 
182       std::fill(new_list + cached_block_length_, new_list + new_size, nullptr);
183 
184       cached_blocks_ = new_list;
185       // Make the size fit in uint8_t. This is the power of two, so we don't
186       // need anything larger.
187       cached_block_length_ =
188           static_cast<uint8_t>(std::min(size_t{64}, new_size));
189 
190       return;
191     }
192 
193     auto& cached_head = cached_blocks_[index];
194     auto* new_node = static_cast<CachedBlock*>(p);
195     new_node->next = cached_head;
196     cached_head = new_node;
197     PROTOBUF_POISON_MEMORY_REGION(p, size);
198   }
199 
200  public:
201   // Allocate space if the current region provides enough space.
MaybeAllocateAligned(size_t n,void ** out)202   bool MaybeAllocateAligned(size_t n, void** out) {
203     ABSL_DCHECK(internal::ArenaAlignDefault::IsAligned(n));
204     ABSL_DCHECK_GE(limit_, ptr());
205     char* ret = ptr();
206     // ret + n may point out of the block bounds, or ret may be nullptr.
207     // Both computations have undefined behavior when done on pointers,
208     // so do them on uintptr_t instead.
209     if (PROTOBUF_PREDICT_FALSE(reinterpret_cast<uintptr_t>(ret) + n >
210                                reinterpret_cast<uintptr_t>(limit_))) {
211       return false;
212     }
213     PROTOBUF_UNPOISON_MEMORY_REGION(ret, n);
214     *out = ret;
215     char* next = ret + n;
216     set_ptr(next);
217     MaybePrefetchData(next);
218     return true;
219   }
220 
221   // If there is enough space in the current block, allocate space for one
222   // std::string object and register for destruction. The object has not been
223   // constructed and the memory returned is uninitialized.
MaybeAllocateStringWithCleanup()224   PROTOBUF_ALWAYS_INLINE void* MaybeAllocateStringWithCleanup() {
225     void* p;
226     return MaybeAllocateString(p) ? p : nullptr;
227   }
228 
229   PROTOBUF_ALWAYS_INLINE
AllocateAlignedWithCleanup(size_t n,size_t align,void (* destructor)(void *))230   void* AllocateAlignedWithCleanup(size_t n, size_t align,
231                                    void (*destructor)(void*)) {
232     n = ArenaAlignDefault::Ceil(n);
233     char* ret = ArenaAlignAs(align).CeilDefaultAligned(ptr());
234     // See the comment in MaybeAllocateAligned re uintptr_t.
235     if (PROTOBUF_PREDICT_FALSE(reinterpret_cast<uintptr_t>(ret) + n >
236                                reinterpret_cast<uintptr_t>(limit_))) {
237       return AllocateAlignedWithCleanupFallback(n, align, destructor);
238     }
239     PROTOBUF_UNPOISON_MEMORY_REGION(ret, n);
240     char* next = ret + n;
241     set_ptr(next);
242     AddCleanup(ret, destructor);
243     ABSL_DCHECK_GE(limit_, ptr());
244     MaybePrefetchData(next);
245     return ret;
246   }
247 
248   PROTOBUF_ALWAYS_INLINE
AddCleanup(void * elem,void (* destructor)(void *))249   void AddCleanup(void* elem, void (*destructor)(void*)) {
250     cleanup_list_.Add(elem, destructor, *this);
251     MaybePrefetchCleanup();
252   }
253 
254   ABSL_ATTRIBUTE_RETURNS_NONNULL void* AllocateFromStringBlock();
255 
256   std::vector<void*> PeekCleanupListForTesting();
257 
258  private:
259   friend class ThreadSafeArena;
260   friend class cleanup::ChunkList;
261 
262   // See comments for cached_blocks_.
263   struct CachedBlock {
264     // Simple linked list.
265     CachedBlock* next;
266   };
267 
268   static constexpr ptrdiff_t kPrefetchDataDegree = ABSL_CACHELINE_SIZE * 16;
269   static constexpr ptrdiff_t kPrefetchCleanupDegree = ABSL_CACHELINE_SIZE * 6;
270 
271   // Constructor is private as only New() should be used.
272   inline SerialArena(ArenaBlock* b, ThreadSafeArena& parent);
273 
274   // Constructors to handle the first SerialArena.
275   inline explicit SerialArena(ThreadSafeArena& parent);
276   inline SerialArena(FirstSerialArena, ArenaBlock* b, ThreadSafeArena& parent);
277 
278   bool MaybeAllocateString(void*& p);
279   ABSL_ATTRIBUTE_RETURNS_NONNULL void* AllocateFromStringBlockFallback();
280 
281   // Prefetch the next prefetch_degree bytes after `prefetch_ptr` and
282   // up to `limit`, if `next` is within prefetch_degree bytes of `prefetch_ptr`.
283   PROTOBUF_ALWAYS_INLINE
MaybePrefetchImpl(const ptrdiff_t prefetch_degree,const char * next,const char * limit,const char * prefetch_ptr)284   static const char* MaybePrefetchImpl(const ptrdiff_t prefetch_degree,
285                                        const char* next, const char* limit,
286                                        const char* prefetch_ptr) {
287     if (PROTOBUF_PREDICT_TRUE(prefetch_ptr - next > prefetch_degree))
288       return prefetch_ptr;
289     if (PROTOBUF_PREDICT_TRUE(prefetch_ptr < limit)) {
290       prefetch_ptr = std::max(next, prefetch_ptr);
291       ABSL_DCHECK(prefetch_ptr != nullptr);
292       const char* end = std::min(limit, prefetch_ptr + prefetch_degree);
293       for (; prefetch_ptr < end; prefetch_ptr += ABSL_CACHELINE_SIZE) {
294         absl::PrefetchToLocalCacheForWrite(prefetch_ptr);
295       }
296     }
297     return prefetch_ptr;
298   }
299   PROTOBUF_ALWAYS_INLINE
MaybePrefetchData(const char * next)300   void MaybePrefetchData(const char* next) {
301     ABSL_DCHECK(static_cast<const void*>(prefetch_ptr_) == nullptr ||
302                 static_cast<const void*>(prefetch_ptr_) >= head());
303     prefetch_ptr_ =
304         MaybePrefetchImpl(kPrefetchDataDegree, next, limit_, prefetch_ptr_);
305   }
306   PROTOBUF_ALWAYS_INLINE
MaybePrefetchCleanup()307   void MaybePrefetchCleanup() {
308     ABSL_DCHECK(static_cast<const void*>(cleanup_list_.prefetch_ptr_) ==
309                     nullptr ||
310                 static_cast<const void*>(cleanup_list_.prefetch_ptr_) >=
311                     cleanup_list_.head_);
312     cleanup_list_.prefetch_ptr_ = MaybePrefetchImpl(
313         kPrefetchCleanupDegree, reinterpret_cast<char*>(cleanup_list_.next_),
314         reinterpret_cast<char*>(cleanup_list_.limit_),
315         cleanup_list_.prefetch_ptr_);
316   }
317 
318   // Creates a new SerialArena inside mem using the remaining memory as for
319   // future allocations.
320   // The `parent` arena must outlive the serial arena, which is guaranteed
321   // because the parent manages the lifetime of the serial arenas.
322   static SerialArena* New(SizedPtr mem, ThreadSafeArena& parent);
323   // Free SerialArena returning the memory passed in to New.
324   template <typename Deallocator>
325   SizedPtr Free(Deallocator deallocator);
326 
FreeStringBlocks()327   size_t FreeStringBlocks() {
328     // On the active block delete all strings skipping the unused instances.
329     size_t unused_bytes = string_block_unused_.load(std::memory_order_relaxed);
330     if (StringBlock* sb = string_block_.load(std::memory_order_relaxed)) {
331       return FreeStringBlocks(sb, unused_bytes);
332     }
333     return 0;
334   }
335   static size_t FreeStringBlocks(StringBlock* string_block, size_t unused);
336 
337   // Adds 'used` to space_used_ in relaxed atomic order.
AddSpaceUsed(size_t space_used)338   void AddSpaceUsed(size_t space_used) {
339     space_used_.store(space_used_.load(std::memory_order_relaxed) + space_used,
340                       std::memory_order_relaxed);
341   }
342 
343   // Adds 'allocated` to space_allocated_ in relaxed atomic order.
AddSpaceAllocated(size_t space_allocated)344   void AddSpaceAllocated(size_t space_allocated) {
345     space_allocated_.store(
346         space_allocated_.load(std::memory_order_relaxed) + space_allocated,
347         std::memory_order_relaxed);
348   }
349 
350   // Helper getters/setters to handle relaxed operations on atomic variables.
head()351   ArenaBlock* head() { return head_.load(std::memory_order_relaxed); }
head()352   const ArenaBlock* head() const {
353     return head_.load(std::memory_order_relaxed);
354   }
355 
ptr()356   char* ptr() { return ptr_.load(std::memory_order_relaxed); }
ptr()357   const char* ptr() const { return ptr_.load(std::memory_order_relaxed); }
set_ptr(char * ptr)358   void set_ptr(char* ptr) { return ptr_.store(ptr, std::memory_order_relaxed); }
set_range(char * ptr,char * limit)359   PROTOBUF_ALWAYS_INLINE void set_range(char* ptr, char* limit) {
360     set_ptr(ptr);
361     prefetch_ptr_ = ptr;
362     limit_ = limit;
363   }
364 
365   void* AllocateAlignedFallback(size_t n);
366   void* AllocateAlignedWithCleanupFallback(size_t n, size_t align,
367                                            void (*destructor)(void*));
368   void AddCleanupFallback(void* elem, void (*destructor)(void*));
369   inline void AllocateNewBlock(size_t n);
370   inline void Init(ArenaBlock* b, size_t offset);
371 
372   // Members are declared here to track sizeof(SerialArena) and hotness
373   // centrally. They are (roughly) laid out in descending order of hotness.
374 
375   // Next pointer to allocate from.  Always 8-byte aligned.  Points inside
376   // head_ (and head_->pos will always be non-canonical).  We keep these
377   // here to reduce indirection.
378   std::atomic<char*> ptr_{nullptr};
379   // Limiting address up to which memory can be allocated from the head block.
380   char* limit_ = nullptr;
381   // Current prefetch positions. Data from `ptr_` up to but not including
382   // `prefetch_ptr_` is software prefetched.
383   const char* prefetch_ptr_ = nullptr;
384 
385   // Chunked linked list for managing cleanup for arena elements.
386   cleanup::ChunkList cleanup_list_;
387 
388   // The active string block.
389   std::atomic<StringBlock*> string_block_{nullptr};
390 
391   // The number of unused bytes in string_block_.
392   // We allocate from `effective_size()` down to 0 inside `string_block_`.
393   // `unused  == 0` means that `string_block_` is exhausted. (or null).
394   std::atomic<size_t> string_block_unused_{0};
395 
396   std::atomic<ArenaBlock*> head_{nullptr};  // Head of linked list of blocks.
397   std::atomic<size_t> space_used_{0};       // Necessary for metrics.
398   std::atomic<size_t> space_allocated_{0};
399   ThreadSafeArena& parent_;
400 
401   // Repeated*Field and Arena play together to reduce memory consumption by
402   // reusing blocks. Currently, natural growth of the repeated field types makes
403   // them allocate blocks of size `8 + 2^N, N>=3`.
404   // When the repeated field grows returns the previous block and we put it in
405   // this free list.
406   // `cached_blocks_[i]` points to the free list for blocks of size `8+2^(i+3)`.
407   // The array of freelists is grown when needed in `ReturnArrayMemory()`.
408   uint8_t cached_block_length_ = 0;
409   CachedBlock** cached_blocks_ = nullptr;
410 };
411 
MaybeAllocateString(void * & p)412 inline PROTOBUF_ALWAYS_INLINE bool SerialArena::MaybeAllocateString(void*& p) {
413   // Check how many unused instances are in the current block.
414   size_t unused_bytes = string_block_unused_.load(std::memory_order_relaxed);
415   if (PROTOBUF_PREDICT_TRUE(unused_bytes != 0)) {
416     unused_bytes -= sizeof(std::string);
417     string_block_unused_.store(unused_bytes, std::memory_order_relaxed);
418     p = string_block_.load(std::memory_order_relaxed)->AtOffset(unused_bytes);
419     return true;
420   }
421   return false;
422 }
423 
424 ABSL_ATTRIBUTE_RETURNS_NONNULL inline PROTOBUF_ALWAYS_INLINE void*
AllocateFromStringBlock()425 SerialArena::AllocateFromStringBlock() {
426   void* p;
427   if (ABSL_PREDICT_TRUE(MaybeAllocateString(p))) return p;
428   return AllocateFromStringBlockFallback();
429 }
430 
431 }  // namespace internal
432 }  // namespace protobuf
433 }  // namespace google
434 
435 #include "google/protobuf/port_undef.inc"
436 
437 #endif  // GOOGLE_PROTOBUF_SERIAL_ARENA_H__
438