1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // This file defines an Arena allocator for better allocation performance.
32
33 #ifndef GOOGLE_PROTOBUF_ARENA_IMPL_H__
34 #define GOOGLE_PROTOBUF_ARENA_IMPL_H__
35
36 #include <atomic>
37 #include <limits>
38
39 #include <google/protobuf/stubs/common.h>
40 #include <google/protobuf/stubs/logging.h>
41
42 #ifdef ADDRESS_SANITIZER
43 #include <sanitizer/asan_interface.h>
44 #endif // ADDRESS_SANITIZER
45
46 #include <google/protobuf/port_def.inc>
47
48
49 namespace google {
50 namespace protobuf {
51
52 struct ArenaOptions;
53
54 namespace internal {
55
AlignUpTo8(size_t n)56 inline size_t AlignUpTo8(size_t n) {
57 // Align n to next multiple of 8 (from Hacker's Delight, Chapter 3.)
58 return (n + 7) & static_cast<size_t>(-8);
59 }
60
61 using LifecycleIdAtomic = uint64_t;
62
63 void PROTOBUF_EXPORT ArenaFree(void* object, size_t size);
64
65 // MetricsCollector collects stats for a particular arena.
66 class PROTOBUF_EXPORT ArenaMetricsCollector {
67 public:
68 virtual ~ArenaMetricsCollector();
69
70 // Invoked when the arena is about to be destroyed. This method will
71 // typically finalize any metric collection and delete the collector.
72 // space_allocated is the space used by the arena.
73 virtual void OnDestroy(uint64 space_allocated) = 0;
74
75 // OnReset() is called when the associated arena is reset.
76 // space_allocated is the space used by the arena just before the reset.
77 virtual void OnReset(uint64 space_allocated) = 0;
78
79 // Does OnAlloc() need to be called? If false, metric collection overhead
80 // will be reduced since we will not do extra work per allocation.
81 virtual bool RecordAllocs() = 0;
82
83 // OnAlloc is called when an allocation happens.
84 // type_info is promised to be static - its lifetime extends to
85 // match program's lifetime (It is given by typeid operator).
86 // Note: typeid(void) will be passed as allocated_type every time we
87 // intentionally want to avoid monitoring an allocation. (i.e. internal
88 // allocations for managing the arena)
89 virtual void OnAlloc(const std::type_info* allocated_type,
90 uint64 alloc_size) = 0;
91 };
92
93 class ArenaImpl;
94
95 // A thread-unsafe Arena that can only be used within its owning thread.
96 class PROTOBUF_EXPORT SerialArena {
97 public:
98 // Blocks are variable length malloc-ed objects. The following structure
99 // describes the common header for all blocks.
100 class PROTOBUF_EXPORT Block {
101 public:
Block(size_t size,Block * next,bool special,bool user_owned)102 Block(size_t size, Block* next, bool special, bool user_owned)
103 : next_and_bits_(reinterpret_cast<uintptr_t>(next) | (special ? 1 : 0) |
104 (user_owned ? 2 : 0)),
105 pos_(kBlockHeaderSize),
106 size_(size) {
107 GOOGLE_DCHECK_EQ(reinterpret_cast<uintptr_t>(next) & 3, 0u);
108 }
109
Pointer(size_t n)110 char* Pointer(size_t n) {
111 GOOGLE_DCHECK(n <= size_);
112 return reinterpret_cast<char*>(this) + n;
113 }
114
115 // One of the blocks may be special. This is either a user-supplied
116 // initial block, or a block we created at startup to hold Options info.
117 // A special block is not deleted by Reset.
special()118 bool special() const { return (next_and_bits_ & 1) != 0; }
119
120 // Whether or not this current block is owned by the user.
121 // Only special blocks can be user_owned.
user_owned()122 bool user_owned() const { return (next_and_bits_ & 2) != 0; }
123
next()124 Block* next() const {
125 const uintptr_t bottom_bits = 3;
126 return reinterpret_cast<Block*>(next_and_bits_ & ~bottom_bits);
127 }
128
clear_next()129 void clear_next() {
130 next_and_bits_ &= 3; // Set next to nullptr, preserve bottom bits.
131 }
132
pos()133 size_t pos() const { return pos_; }
size()134 size_t size() const { return size_; }
set_pos(size_t pos)135 void set_pos(size_t pos) { pos_ = pos; }
136
137 private:
138 // Holds pointer to next block for this thread + special/user_owned bits.
139 uintptr_t next_and_bits_;
140
141 size_t pos_;
142 size_t size_;
143 // data follows
144 };
145
146 // The allocate/free methods here are a little strange, since SerialArena is
147 // allocated inside a Block which it also manages. This is to avoid doing
148 // an extra allocation for the SerialArena itself.
149
150 // Creates a new SerialArena inside Block* and returns it.
151 static SerialArena* New(Block* b, void* owner, ArenaImpl* arena);
152
153 void CleanupList();
154 uint64 SpaceUsed() const;
155
HasSpace(size_t n)156 bool HasSpace(size_t n) { return n <= static_cast<size_t>(limit_ - ptr_); }
157
AllocateAligned(size_t n)158 void* AllocateAligned(size_t n) {
159 GOOGLE_DCHECK_EQ(internal::AlignUpTo8(n), n); // Must be already aligned.
160 GOOGLE_DCHECK_GE(limit_, ptr_);
161 if (PROTOBUF_PREDICT_FALSE(!HasSpace(n))) {
162 return AllocateAlignedFallback(n);
163 }
164 void* ret = ptr_;
165 ptr_ += n;
166 #ifdef ADDRESS_SANITIZER
167 ASAN_UNPOISON_MEMORY_REGION(ret, n);
168 #endif // ADDRESS_SANITIZER
169 return ret;
170 }
171
172 // Allocate space if the current region provides enough space.
MaybeAllocateAligned(size_t n,void ** out)173 bool MaybeAllocateAligned(size_t n, void** out) {
174 GOOGLE_DCHECK_EQ(internal::AlignUpTo8(n), n); // Must be already aligned.
175 GOOGLE_DCHECK_GE(limit_, ptr_);
176 if (PROTOBUF_PREDICT_FALSE(!HasSpace(n))) return false;
177 void* ret = ptr_;
178 ptr_ += n;
179 #ifdef ADDRESS_SANITIZER
180 ASAN_UNPOISON_MEMORY_REGION(ret, n);
181 #endif // ADDRESS_SANITIZER
182 *out = ret;
183 return true;
184 }
185
AddCleanup(void * elem,void (* cleanup)(void *))186 void AddCleanup(void* elem, void (*cleanup)(void*)) {
187 if (PROTOBUF_PREDICT_FALSE(cleanup_ptr_ == cleanup_limit_)) {
188 AddCleanupFallback(elem, cleanup);
189 return;
190 }
191 cleanup_ptr_->elem = elem;
192 cleanup_ptr_->cleanup = cleanup;
193 cleanup_ptr_++;
194 }
195
AllocateAlignedAndAddCleanup(size_t n,void (* cleanup)(void *))196 void* AllocateAlignedAndAddCleanup(size_t n, void (*cleanup)(void*)) {
197 void* ret = AllocateAligned(n);
198 AddCleanup(ret, cleanup);
199 return ret;
200 }
201
head()202 Block* head() const { return head_; }
owner()203 void* owner() const { return owner_; }
next()204 SerialArena* next() const { return next_; }
set_next(SerialArena * next)205 void set_next(SerialArena* next) { next_ = next; }
206 static Block* NewBlock(Block* last_block, size_t min_bytes, ArenaImpl* arena);
207
208 private:
209 // Node contains the ptr of the object to be cleaned up and the associated
210 // cleanup function ptr.
211 struct CleanupNode {
212 void* elem; // Pointer to the object to be cleaned up.
213 void (*cleanup)(void*); // Function pointer to the destructor or deleter.
214 };
215
216 // Cleanup uses a chunked linked list, to reduce pointer chasing.
217 struct CleanupChunk {
SizeOfCleanupChunk218 static size_t SizeOf(size_t i) {
219 return sizeof(CleanupChunk) + (sizeof(CleanupNode) * (i - 1));
220 }
221 size_t size; // Total elements in the list.
222 CleanupChunk* next; // Next node in the list.
223 CleanupNode nodes[1]; // True length is |size|.
224 };
225
226 ArenaImpl* arena_; // Containing arena.
227 void* owner_; // &ThreadCache of this thread;
228 Block* head_; // Head of linked list of blocks.
229 CleanupChunk* cleanup_; // Head of cleanup list.
230 SerialArena* next_; // Next SerialArena in this linked list.
231
232 // Next pointer to allocate from. Always 8-byte aligned. Points inside
233 // head_ (and head_->pos will always be non-canonical). We keep these
234 // here to reduce indirection.
235 char* ptr_;
236 char* limit_;
237
238 // Next CleanupList members to append to. These point inside cleanup_.
239 CleanupNode* cleanup_ptr_;
240 CleanupNode* cleanup_limit_;
241
242 void* AllocateAlignedFallback(size_t n);
243 void AddCleanupFallback(void* elem, void (*cleanup)(void*));
244 void CleanupListFallback();
245
246 public:
247 static constexpr size_t kBlockHeaderSize =
248 (sizeof(Block) + 7) & static_cast<size_t>(-8);
249 };
250
251 // This class provides the core Arena memory allocation library. Different
252 // implementations only need to implement the public interface below.
253 // Arena is not a template type as that would only be useful if all protos
254 // in turn would be templates, which will/cannot happen. However separating
255 // the memory allocation part from the cruft of the API users expect we can
256 // use #ifdef the select the best implementation based on hardware / OS.
257 class PROTOBUF_EXPORT ArenaImpl {
258 public:
259 static const size_t kDefaultStartBlockSize = 256;
260 static const size_t kDefaultMaxBlockSize = 8192;
261
ArenaImpl()262 ArenaImpl() { Init(false); }
263
ArenaImpl(char * mem,size_t size)264 ArenaImpl(char* mem, size_t size) {
265 GOOGLE_DCHECK_EQ(reinterpret_cast<uintptr_t>(mem) & 7, 0u);
266 Init(false);
267
268 // Ignore initial block if it is too small.
269 if (mem != nullptr && size >= kBlockHeaderSize + kSerialArenaSize) {
270 SetInitialBlock(new (mem) SerialArena::Block(size, nullptr, true, true));
271 }
272 }
273
274 explicit ArenaImpl(const ArenaOptions& options);
275
276 // Destructor deletes all owned heap allocated objects, and destructs objects
277 // that have non-trivial destructors, except for proto2 message objects whose
278 // destructors can be skipped. Also, frees all blocks except the initial block
279 // if it was passed in.
280 ~ArenaImpl();
281
282 uint64 Reset();
283
284 uint64 SpaceAllocated() const;
285 uint64 SpaceUsed() const;
286
AllocateAligned(size_t n)287 void* AllocateAligned(size_t n) {
288 SerialArena* arena;
289 if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(&arena))) {
290 return arena->AllocateAligned(n);
291 } else {
292 return AllocateAlignedFallback(n);
293 }
294 }
295
296 // This function allocates n bytes if the common happy case is true and
297 // returns true. Otherwise does nothing and returns false. This strange
298 // semantics is necessary to allow callers to program functions that only
299 // have fallback function calls in tail position. This substantially improves
300 // code for the happy path.
MaybeAllocateAligned(size_t n,void ** out)301 PROTOBUF_ALWAYS_INLINE bool MaybeAllocateAligned(size_t n, void** out) {
302 SerialArena* a;
303 if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFromThreadCache(&a))) {
304 return a->MaybeAllocateAligned(n, out);
305 }
306 return false;
307 }
308
309 void* AllocateAlignedAndAddCleanup(size_t n, void (*cleanup)(void*));
310
311 // Add object pointer and cleanup function pointer to the list.
312 void AddCleanup(void* elem, void (*cleanup)(void*));
313
RecordAlloc(const std::type_info * allocated_type,size_t n)314 inline void RecordAlloc(const std::type_info* allocated_type,
315 size_t n) const {
316 if (PROTOBUF_PREDICT_FALSE(record_allocs())) {
317 options_->metrics_collector->OnAlloc(allocated_type, n);
318 }
319 }
320
321 std::pair<void*, size_t> NewBuffer(size_t last_size, size_t min_bytes);
322
323 private:
324 // Pointer to a linked list of SerialArena.
325 std::atomic<SerialArena*> threads_;
326 std::atomic<SerialArena*> hint_; // Fast thread-local block access
327 std::atomic<size_t> space_allocated_; // Total size of all allocated blocks.
328
329 // Unique for each arena. Changes on Reset().
330 // Least-significant-bit is 1 iff allocations should be recorded.
331 uint64 lifecycle_id_;
332
333 struct Options {
334 size_t start_block_size;
335 size_t max_block_size;
336 void* (*block_alloc)(size_t);
337 void (*block_dealloc)(void*, size_t);
338 ArenaMetricsCollector* metrics_collector;
339 };
340
341 Options* options_ = nullptr;
342
343 void* AllocateAlignedFallback(size_t n);
344 void* AllocateAlignedAndAddCleanupFallback(size_t n, void (*cleanup)(void*));
345 void AddCleanupFallback(void* elem, void (*cleanup)(void*));
346
347 void Init(bool record_allocs);
348 void SetInitialBlock(
349 SerialArena::Block* block); // Can be called right after Init()
350
351 // Return true iff allocations should be recorded in a metrics collector.
record_allocs()352 inline bool record_allocs() const { return lifecycle_id_ & 1; }
353
354 // Invoke fn(b) for every Block* b.
355 template <typename Functor>
PerBlock(Functor fn)356 void PerBlock(Functor fn) {
357 // By omitting an Acquire barrier we ensure that any user code that doesn't
358 // properly synchronize Reset() or the destructor will throw a TSAN warning.
359 SerialArena* serial = threads_.load(std::memory_order_relaxed);
360 while (serial) {
361 // fn() may delete blocks and arenas, so fetch next pointers before fn();
362 SerialArena* cur = serial;
363 serial = serial->next();
364 for (auto* block = cur->head(); block != nullptr;) {
365 auto* b = block;
366 block = b->next();
367 fn(b);
368 }
369 }
370 }
371
372 // Delete or Destruct all objects owned by the arena.
373 void CleanupList();
374
CacheSerialArena(SerialArena * serial)375 inline void CacheSerialArena(SerialArena* serial) {
376 thread_cache().last_serial_arena = serial;
377 thread_cache().last_lifecycle_id_seen = lifecycle_id_;
378 // TODO(haberman): evaluate whether we would gain efficiency by getting rid
379 // of hint_. It's the only write we do to ArenaImpl in the allocation path,
380 // which will dirty the cache line.
381
382 hint_.store(serial, std::memory_order_release);
383 }
384
GetSerialArenaFast(SerialArena ** arena)385 PROTOBUF_ALWAYS_INLINE bool GetSerialArenaFast(SerialArena** arena) {
386 if (GetSerialArenaFromThreadCache(arena)) return true;
387
388 // Check whether we own the last accessed SerialArena on this arena. This
389 // fast path optimizes the case where a single thread uses multiple arenas.
390 ThreadCache* tc = &thread_cache();
391 SerialArena* serial = hint_.load(std::memory_order_acquire);
392 if (PROTOBUF_PREDICT_TRUE(serial != NULL && serial->owner() == tc)) {
393 *arena = serial;
394 return true;
395 }
396 return false;
397 }
398
GetSerialArenaFromThreadCache(SerialArena ** arena)399 PROTOBUF_ALWAYS_INLINE bool GetSerialArenaFromThreadCache(
400 SerialArena** arena) {
401 // If this thread already owns a block in this arena then try to use that.
402 // This fast path optimizes the case where multiple threads allocate from
403 // the same arena.
404 ThreadCache* tc = &thread_cache();
405 if (PROTOBUF_PREDICT_TRUE(tc->last_lifecycle_id_seen == lifecycle_id_)) {
406 *arena = tc->last_serial_arena;
407 return true;
408 }
409 return false;
410 }
411 SerialArena* GetSerialArenaFallback(void* me);
412
413 #ifdef _MSC_VER
414 #pragma warning(disable : 4324)
415 #endif
416 struct alignas(64) ThreadCache {
417 #if defined(GOOGLE_PROTOBUF_NO_THREADLOCAL)
418 // If we are using the ThreadLocalStorage class to store the ThreadCache,
419 // then the ThreadCache's default constructor has to be responsible for
420 // initializing it.
ThreadCacheThreadCache421 ThreadCache()
422 : next_lifecycle_id(0),
423 last_lifecycle_id_seen(-1),
424 last_serial_arena(NULL) {}
425 #endif
426
427 // Number of per-thread lifecycle IDs to reserve. Must be power of two.
428 // To reduce contention on a global atomic, each thread reserves a batch of
429 // IDs. The following number is calculated based on a stress test with
430 // ~6500 threads all frequently allocating a new arena.
431 static constexpr size_t kPerThreadIds = 256;
432 // Next lifecycle ID available to this thread. We need to reserve a new
433 // batch, if `next_lifecycle_id & (kPerThreadIds - 1) == 0`.
434 uint64 next_lifecycle_id;
435 // The ThreadCache is considered valid as long as this matches the
436 // lifecycle_id of the arena being used.
437 uint64 last_lifecycle_id_seen;
438 SerialArena* last_serial_arena;
439 };
440
441 // Lifecycle_id can be highly contended variable in a situation of lots of
442 // arena creation. Make sure that other global variables are not sharing the
443 // cacheline.
444 #ifdef _MSC_VER
445 #pragma warning(disable : 4324)
446 #endif
447 struct alignas(64) CacheAlignedLifecycleIdGenerator {
448 std::atomic<LifecycleIdAtomic> id;
449 };
450 static CacheAlignedLifecycleIdGenerator lifecycle_id_generator_;
451 #if defined(GOOGLE_PROTOBUF_NO_THREADLOCAL)
452 // Android ndk does not support __thread keyword so we use a custom thread
453 // local storage class we implemented.
454 // iOS also does not support the __thread keyword.
455 static ThreadCache& thread_cache();
456 #elif defined(PROTOBUF_USE_DLLS)
457 // Thread local variables cannot be exposed through DLL interface but we can
458 // wrap them in static functions.
459 static ThreadCache& thread_cache();
460 #else
461 static PROTOBUF_THREAD_LOCAL ThreadCache thread_cache_;
thread_cache()462 static ThreadCache& thread_cache() { return thread_cache_; }
463 #endif
464
465 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ArenaImpl);
466 // All protos have pointers back to the arena hence Arena must have
467 // pointer stability.
468 ArenaImpl(ArenaImpl&&) = delete;
469 ArenaImpl& operator=(ArenaImpl&&) = delete;
470
471 public:
472 // kBlockHeaderSize is sizeof(Block), aligned up to the nearest multiple of 8
473 // to protect the invariant that pos is always at a multiple of 8.
474 static constexpr size_t kBlockHeaderSize = SerialArena::kBlockHeaderSize;
475 static constexpr size_t kSerialArenaSize =
476 (sizeof(SerialArena) + 7) & static_cast<size_t>(-8);
477 static constexpr size_t kOptionsSize =
478 (sizeof(Options) + 7) & static_cast<size_t>(-8);
479 static_assert(kBlockHeaderSize % 8 == 0,
480 "kBlockHeaderSize must be a multiple of 8.");
481 static_assert(kSerialArenaSize % 8 == 0,
482 "kSerialArenaSize must be a multiple of 8.");
483 };
484
485 } // namespace internal
486 } // namespace protobuf
487 } // namespace google
488
489 #include <google/protobuf/port_undef.inc>
490
491 #endif // GOOGLE_PROTOBUF_ARENA_IMPL_H__
492