• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // This file defines an Arena allocator for better allocation performance.
32 
33 #ifndef GOOGLE_PROTOBUF_ARENA_IMPL_H__
34 #define GOOGLE_PROTOBUF_ARENA_IMPL_H__
35 
36 #include <atomic>
37 #include <limits>
38 
39 #include <google/protobuf/stubs/common.h>
40 #include <google/protobuf/stubs/logging.h>
41 
42 #ifdef ADDRESS_SANITIZER
43 #include <sanitizer/asan_interface.h>
44 #endif  // ADDRESS_SANITIZER
45 
46 #include <google/protobuf/port_def.inc>
47 
48 
49 namespace google {
50 namespace protobuf {
51 
52 struct ArenaOptions;
53 
54 namespace internal {
55 
AlignUpTo8(size_t n)56 inline size_t AlignUpTo8(size_t n) {
57   // Align n to next multiple of 8 (from Hacker's Delight, Chapter 3.)
58   return (n + 7) & static_cast<size_t>(-8);
59 }
60 
61 using LifecycleIdAtomic = uint64_t;
62 
63 void PROTOBUF_EXPORT ArenaFree(void* object, size_t size);
64 
65 // MetricsCollector collects stats for a particular arena.
66 class PROTOBUF_EXPORT ArenaMetricsCollector {
67  public:
68   virtual ~ArenaMetricsCollector();
69 
70   // Invoked when the arena is about to be destroyed. This method will
71   // typically finalize any metric collection and delete the collector.
72   // space_allocated is the space used by the arena.
73   virtual void OnDestroy(uint64 space_allocated) = 0;
74 
75   // OnReset() is called when the associated arena is reset.
76   // space_allocated is the space used by the arena just before the reset.
77   virtual void OnReset(uint64 space_allocated) = 0;
78 
79   // Does OnAlloc() need to be called?  If false, metric collection overhead
80   // will be reduced since we will not do extra work per allocation.
81   virtual bool RecordAllocs() = 0;
82 
83   // OnAlloc is called when an allocation happens.
84   // type_info is promised to be static - its lifetime extends to
85   // match program's lifetime (It is given by typeid operator).
86   // Note: typeid(void) will be passed as allocated_type every time we
87   // intentionally want to avoid monitoring an allocation. (i.e. internal
88   // allocations for managing the arena)
89   virtual void OnAlloc(const std::type_info* allocated_type,
90                        uint64 alloc_size) = 0;
91 };
92 
93 class ArenaImpl;
94 
95 // A thread-unsafe Arena that can only be used within its owning thread.
96 class PROTOBUF_EXPORT SerialArena {
97  public:
98   // Blocks are variable length malloc-ed objects.  The following structure
99   // describes the common header for all blocks.
100   class PROTOBUF_EXPORT Block {
101    public:
Block(size_t size,Block * next,bool special,bool user_owned)102     Block(size_t size, Block* next, bool special, bool user_owned)
103         : next_and_bits_(reinterpret_cast<uintptr_t>(next) | (special ? 1 : 0) |
104                          (user_owned ? 2 : 0)),
105           pos_(kBlockHeaderSize),
106           size_(size) {
107       GOOGLE_DCHECK_EQ(reinterpret_cast<uintptr_t>(next) & 3, 0u);
108     }
109 
Pointer(size_t n)110     char* Pointer(size_t n) {
111       GOOGLE_DCHECK(n <= size_);
112       return reinterpret_cast<char*>(this) + n;
113     }
114 
115     // One of the blocks may be special. This is either a user-supplied
116     // initial block, or a block we created at startup to hold Options info.
117     // A special block is not deleted by Reset.
special()118     bool special() const { return (next_and_bits_ & 1) != 0; }
119 
120     // Whether or not this current block is owned by the user.
121     // Only special blocks can be user_owned.
user_owned()122     bool user_owned() const { return (next_and_bits_ & 2) != 0; }
123 
next()124     Block* next() const {
125       const uintptr_t bottom_bits = 3;
126       return reinterpret_cast<Block*>(next_and_bits_ & ~bottom_bits);
127     }
128 
clear_next()129     void clear_next() {
130       next_and_bits_ &= 3;  // Set next to nullptr, preserve bottom bits.
131     }
132 
pos()133     size_t pos() const { return pos_; }
size()134     size_t size() const { return size_; }
set_pos(size_t pos)135     void set_pos(size_t pos) { pos_ = pos; }
136 
137    private:
138     // Holds pointer to next block for this thread + special/user_owned bits.
139     uintptr_t next_and_bits_;
140 
141     size_t pos_;
142     size_t size_;
143     // data follows
144   };
145 
146   // The allocate/free methods here are a little strange, since SerialArena is
147   // allocated inside a Block which it also manages.  This is to avoid doing
148   // an extra allocation for the SerialArena itself.
149 
150   // Creates a new SerialArena inside Block* and returns it.
151   static SerialArena* New(Block* b, void* owner, ArenaImpl* arena);
152 
153   void CleanupList();
154   uint64 SpaceUsed() const;
155 
HasSpace(size_t n)156   bool HasSpace(size_t n) { return n <= static_cast<size_t>(limit_ - ptr_); }
157 
AllocateAligned(size_t n)158   void* AllocateAligned(size_t n) {
159     GOOGLE_DCHECK_EQ(internal::AlignUpTo8(n), n);  // Must be already aligned.
160     GOOGLE_DCHECK_GE(limit_, ptr_);
161     if (PROTOBUF_PREDICT_FALSE(!HasSpace(n))) {
162       return AllocateAlignedFallback(n);
163     }
164     void* ret = ptr_;
165     ptr_ += n;
166 #ifdef ADDRESS_SANITIZER
167     ASAN_UNPOISON_MEMORY_REGION(ret, n);
168 #endif  // ADDRESS_SANITIZER
169     return ret;
170   }
171 
172   // Allocate space if the current region provides enough space.
MaybeAllocateAligned(size_t n,void ** out)173   bool MaybeAllocateAligned(size_t n, void** out) {
174     GOOGLE_DCHECK_EQ(internal::AlignUpTo8(n), n);  // Must be already aligned.
175     GOOGLE_DCHECK_GE(limit_, ptr_);
176     if (PROTOBUF_PREDICT_FALSE(!HasSpace(n))) return false;
177     void* ret = ptr_;
178     ptr_ += n;
179 #ifdef ADDRESS_SANITIZER
180     ASAN_UNPOISON_MEMORY_REGION(ret, n);
181 #endif  // ADDRESS_SANITIZER
182     *out = ret;
183     return true;
184   }
185 
AddCleanup(void * elem,void (* cleanup)(void *))186   void AddCleanup(void* elem, void (*cleanup)(void*)) {
187     if (PROTOBUF_PREDICT_FALSE(cleanup_ptr_ == cleanup_limit_)) {
188       AddCleanupFallback(elem, cleanup);
189       return;
190     }
191     cleanup_ptr_->elem = elem;
192     cleanup_ptr_->cleanup = cleanup;
193     cleanup_ptr_++;
194   }
195 
AllocateAlignedAndAddCleanup(size_t n,void (* cleanup)(void *))196   void* AllocateAlignedAndAddCleanup(size_t n, void (*cleanup)(void*)) {
197     void* ret = AllocateAligned(n);
198     AddCleanup(ret, cleanup);
199     return ret;
200   }
201 
head()202   Block* head() const { return head_; }
owner()203   void* owner() const { return owner_; }
next()204   SerialArena* next() const { return next_; }
set_next(SerialArena * next)205   void set_next(SerialArena* next) { next_ = next; }
206   static Block* NewBlock(Block* last_block, size_t min_bytes, ArenaImpl* arena);
207 
208  private:
209   // Node contains the ptr of the object to be cleaned up and the associated
210   // cleanup function ptr.
211   struct CleanupNode {
212     void* elem;              // Pointer to the object to be cleaned up.
213     void (*cleanup)(void*);  // Function pointer to the destructor or deleter.
214   };
215 
216   // Cleanup uses a chunked linked list, to reduce pointer chasing.
217   struct CleanupChunk {
SizeOfCleanupChunk218     static size_t SizeOf(size_t i) {
219       return sizeof(CleanupChunk) + (sizeof(CleanupNode) * (i - 1));
220     }
221     size_t size;           // Total elements in the list.
222     CleanupChunk* next;    // Next node in the list.
223     CleanupNode nodes[1];  // True length is |size|.
224   };
225 
226   ArenaImpl* arena_;       // Containing arena.
227   void* owner_;            // &ThreadCache of this thread;
228   Block* head_;            // Head of linked list of blocks.
229   CleanupChunk* cleanup_;  // Head of cleanup list.
230   SerialArena* next_;      // Next SerialArena in this linked list.
231 
232   // Next pointer to allocate from.  Always 8-byte aligned.  Points inside
233   // head_ (and head_->pos will always be non-canonical).  We keep these
234   // here to reduce indirection.
235   char* ptr_;
236   char* limit_;
237 
238   // Next CleanupList members to append to.  These point inside cleanup_.
239   CleanupNode* cleanup_ptr_;
240   CleanupNode* cleanup_limit_;
241 
242   void* AllocateAlignedFallback(size_t n);
243   void AddCleanupFallback(void* elem, void (*cleanup)(void*));
244   void CleanupListFallback();
245 
246  public:
247   static constexpr size_t kBlockHeaderSize =
248       (sizeof(Block) + 7) & static_cast<size_t>(-8);
249 };
250 
251 // This class provides the core Arena memory allocation library. Different
252 // implementations only need to implement the public interface below.
253 // Arena is not a template type as that would only be useful if all protos
254 // in turn would be templates, which will/cannot happen. However separating
255 // the memory allocation part from the cruft of the API users expect we can
256 // use #ifdef the select the best implementation based on hardware / OS.
257 class PROTOBUF_EXPORT ArenaImpl {
258  public:
259   static const size_t kDefaultStartBlockSize = 256;
260   static const size_t kDefaultMaxBlockSize = 8192;
261 
ArenaImpl()262   ArenaImpl() { Init(false); }
263 
ArenaImpl(char * mem,size_t size)264   ArenaImpl(char* mem, size_t size) {
265     GOOGLE_DCHECK_EQ(reinterpret_cast<uintptr_t>(mem) & 7, 0u);
266     Init(false);
267 
268     // Ignore initial block if it is too small.
269     if (mem != nullptr && size >= kBlockHeaderSize + kSerialArenaSize) {
270       SetInitialBlock(new (mem) SerialArena::Block(size, nullptr, true, true));
271     }
272   }
273 
274   explicit ArenaImpl(const ArenaOptions& options);
275 
276   // Destructor deletes all owned heap allocated objects, and destructs objects
277   // that have non-trivial destructors, except for proto2 message objects whose
278   // destructors can be skipped. Also, frees all blocks except the initial block
279   // if it was passed in.
280   ~ArenaImpl();
281 
282   uint64 Reset();
283 
284   uint64 SpaceAllocated() const;
285   uint64 SpaceUsed() const;
286 
AllocateAligned(size_t n)287   void* AllocateAligned(size_t n) {
288     SerialArena* arena;
289     if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(&arena))) {
290       return arena->AllocateAligned(n);
291     } else {
292       return AllocateAlignedFallback(n);
293     }
294   }
295 
296   // This function allocates n bytes if the common happy case is true and
297   // returns true. Otherwise does nothing and returns false. This strange
298   // semantics is necessary to allow callers to program functions that only
299   // have fallback function calls in tail position. This substantially improves
300   // code for the happy path.
MaybeAllocateAligned(size_t n,void ** out)301   PROTOBUF_ALWAYS_INLINE bool MaybeAllocateAligned(size_t n, void** out) {
302     SerialArena* a;
303     if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFromThreadCache(&a))) {
304       return a->MaybeAllocateAligned(n, out);
305     }
306     return false;
307   }
308 
309   void* AllocateAlignedAndAddCleanup(size_t n, void (*cleanup)(void*));
310 
311   // Add object pointer and cleanup function pointer to the list.
312   void AddCleanup(void* elem, void (*cleanup)(void*));
313 
RecordAlloc(const std::type_info * allocated_type,size_t n)314   inline void RecordAlloc(const std::type_info* allocated_type,
315                           size_t n) const {
316     if (PROTOBUF_PREDICT_FALSE(record_allocs())) {
317       options_->metrics_collector->OnAlloc(allocated_type, n);
318     }
319   }
320 
321   std::pair<void*, size_t> NewBuffer(size_t last_size, size_t min_bytes);
322 
323  private:
324   // Pointer to a linked list of SerialArena.
325   std::atomic<SerialArena*> threads_;
326   std::atomic<SerialArena*> hint_;       // Fast thread-local block access
327   std::atomic<size_t> space_allocated_;  // Total size of all allocated blocks.
328 
329   // Unique for each arena. Changes on Reset().
330   // Least-significant-bit is 1 iff allocations should be recorded.
331   uint64 lifecycle_id_;
332 
333   struct Options {
334     size_t start_block_size;
335     size_t max_block_size;
336     void* (*block_alloc)(size_t);
337     void (*block_dealloc)(void*, size_t);
338     ArenaMetricsCollector* metrics_collector;
339   };
340 
341   Options* options_ = nullptr;
342 
343   void* AllocateAlignedFallback(size_t n);
344   void* AllocateAlignedAndAddCleanupFallback(size_t n, void (*cleanup)(void*));
345   void AddCleanupFallback(void* elem, void (*cleanup)(void*));
346 
347   void Init(bool record_allocs);
348   void SetInitialBlock(
349       SerialArena::Block* block);  // Can be called right after Init()
350 
351   // Return true iff allocations should be recorded in a metrics collector.
record_allocs()352   inline bool record_allocs() const { return lifecycle_id_ & 1; }
353 
354   // Invoke fn(b) for every Block* b.
355   template <typename Functor>
PerBlock(Functor fn)356   void PerBlock(Functor fn) {
357     // By omitting an Acquire barrier we ensure that any user code that doesn't
358     // properly synchronize Reset() or the destructor will throw a TSAN warning.
359     SerialArena* serial = threads_.load(std::memory_order_relaxed);
360     while (serial) {
361       // fn() may delete blocks and arenas, so fetch next pointers before fn();
362       SerialArena* cur = serial;
363       serial = serial->next();
364       for (auto* block = cur->head(); block != nullptr;) {
365         auto* b = block;
366         block = b->next();
367         fn(b);
368       }
369     }
370   }
371 
372   // Delete or Destruct all objects owned by the arena.
373   void CleanupList();
374 
CacheSerialArena(SerialArena * serial)375   inline void CacheSerialArena(SerialArena* serial) {
376     thread_cache().last_serial_arena = serial;
377     thread_cache().last_lifecycle_id_seen = lifecycle_id_;
378     // TODO(haberman): evaluate whether we would gain efficiency by getting rid
379     // of hint_.  It's the only write we do to ArenaImpl in the allocation path,
380     // which will dirty the cache line.
381 
382     hint_.store(serial, std::memory_order_release);
383   }
384 
GetSerialArenaFast(SerialArena ** arena)385   PROTOBUF_ALWAYS_INLINE bool GetSerialArenaFast(SerialArena** arena) {
386     if (GetSerialArenaFromThreadCache(arena)) return true;
387 
388     // Check whether we own the last accessed SerialArena on this arena.  This
389     // fast path optimizes the case where a single thread uses multiple arenas.
390     ThreadCache* tc = &thread_cache();
391     SerialArena* serial = hint_.load(std::memory_order_acquire);
392     if (PROTOBUF_PREDICT_TRUE(serial != NULL && serial->owner() == tc)) {
393       *arena = serial;
394       return true;
395     }
396     return false;
397   }
398 
GetSerialArenaFromThreadCache(SerialArena ** arena)399   PROTOBUF_ALWAYS_INLINE bool GetSerialArenaFromThreadCache(
400       SerialArena** arena) {
401     // If this thread already owns a block in this arena then try to use that.
402     // This fast path optimizes the case where multiple threads allocate from
403     // the same arena.
404     ThreadCache* tc = &thread_cache();
405     if (PROTOBUF_PREDICT_TRUE(tc->last_lifecycle_id_seen == lifecycle_id_)) {
406       *arena = tc->last_serial_arena;
407       return true;
408     }
409     return false;
410   }
411   SerialArena* GetSerialArenaFallback(void* me);
412 
413 #ifdef _MSC_VER
414 #pragma warning(disable : 4324)
415 #endif
416   struct alignas(64) ThreadCache {
417 #if defined(GOOGLE_PROTOBUF_NO_THREADLOCAL)
418     // If we are using the ThreadLocalStorage class to store the ThreadCache,
419     // then the ThreadCache's default constructor has to be responsible for
420     // initializing it.
ThreadCacheThreadCache421     ThreadCache()
422         : next_lifecycle_id(0),
423           last_lifecycle_id_seen(-1),
424           last_serial_arena(NULL) {}
425 #endif
426 
427     // Number of per-thread lifecycle IDs to reserve. Must be power of two.
428     // To reduce contention on a global atomic, each thread reserves a batch of
429     // IDs.  The following number is calculated based on a stress test with
430     // ~6500 threads all frequently allocating a new arena.
431     static constexpr size_t kPerThreadIds = 256;
432     // Next lifecycle ID available to this thread. We need to reserve a new
433     // batch, if `next_lifecycle_id & (kPerThreadIds - 1) == 0`.
434     uint64 next_lifecycle_id;
435     // The ThreadCache is considered valid as long as this matches the
436     // lifecycle_id of the arena being used.
437     uint64 last_lifecycle_id_seen;
438     SerialArena* last_serial_arena;
439   };
440 
441   // Lifecycle_id can be highly contended variable in a situation of lots of
442   // arena creation. Make sure that other global variables are not sharing the
443   // cacheline.
444 #ifdef _MSC_VER
445 #pragma warning(disable : 4324)
446 #endif
447   struct alignas(64) CacheAlignedLifecycleIdGenerator {
448     std::atomic<LifecycleIdAtomic> id;
449   };
450   static CacheAlignedLifecycleIdGenerator lifecycle_id_generator_;
451 #if defined(GOOGLE_PROTOBUF_NO_THREADLOCAL)
452   // Android ndk does not support __thread keyword so we use a custom thread
453   // local storage class we implemented.
454   // iOS also does not support the __thread keyword.
455   static ThreadCache& thread_cache();
456 #elif defined(PROTOBUF_USE_DLLS)
457   // Thread local variables cannot be exposed through DLL interface but we can
458   // wrap them in static functions.
459   static ThreadCache& thread_cache();
460 #else
461   static PROTOBUF_THREAD_LOCAL ThreadCache thread_cache_;
thread_cache()462   static ThreadCache& thread_cache() { return thread_cache_; }
463 #endif
464 
465   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ArenaImpl);
466   // All protos have pointers back to the arena hence Arena must have
467   // pointer stability.
468   ArenaImpl(ArenaImpl&&) = delete;
469   ArenaImpl& operator=(ArenaImpl&&) = delete;
470 
471  public:
472   // kBlockHeaderSize is sizeof(Block), aligned up to the nearest multiple of 8
473   // to protect the invariant that pos is always at a multiple of 8.
474   static constexpr size_t kBlockHeaderSize = SerialArena::kBlockHeaderSize;
475   static constexpr size_t kSerialArenaSize =
476       (sizeof(SerialArena) + 7) & static_cast<size_t>(-8);
477   static constexpr size_t kOptionsSize =
478       (sizeof(Options) + 7) & static_cast<size_t>(-8);
479   static_assert(kBlockHeaderSize % 8 == 0,
480                 "kBlockHeaderSize must be a multiple of 8.");
481   static_assert(kSerialArenaSize % 8 == 0,
482                 "kSerialArenaSize must be a multiple of 8.");
483 };
484 
485 }  // namespace internal
486 }  // namespace protobuf
487 }  // namespace google
488 
489 #include <google/protobuf/port_undef.inc>
490 
491 #endif  // GOOGLE_PROTOBUF_ARENA_IMPL_H__
492