// Copyright 2020 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef BASE_ALLOCATOR_PARTITION_ALLOCATOR_THREAD_CACHE_H_ #define BASE_ALLOCATOR_PARTITION_ALLOCATOR_THREAD_CACHE_H_ #include #include #include #include #include "base/allocator/partition_allocator/partition_alloc-inl.h" #include "base/allocator/partition_allocator/partition_alloc_base/compiler_specific.h" #include "base/allocator/partition_allocator/partition_alloc_base/component_export.h" #include "base/allocator/partition_allocator/partition_alloc_base/debug/debugging_buildflags.h" #include "base/allocator/partition_allocator/partition_alloc_base/gtest_prod_util.h" #include "base/allocator/partition_allocator/partition_alloc_base/thread_annotations.h" #include "base/allocator/partition_allocator/partition_alloc_base/time/time.h" #include "base/allocator/partition_allocator/partition_alloc_buildflags.h" #include "base/allocator/partition_allocator/partition_alloc_config.h" #include "base/allocator/partition_allocator/partition_alloc_forward.h" #include "base/allocator/partition_allocator/partition_bucket_lookup.h" #include "base/allocator/partition_allocator/partition_freelist_entry.h" #include "base/allocator/partition_allocator/partition_lock.h" #include "base/allocator/partition_allocator/partition_stats.h" #include "base/allocator/partition_allocator/partition_tls.h" #include "build/build_config.h" #if defined(ARCH_CPU_X86_64) && BUILDFLAG(HAS_64_BIT_POINTERS) #include #endif namespace partition_alloc { class ThreadCache; namespace tools { // This is used from ThreadCacheInspector, which runs in a different process. It // scans the process memory looking for the two needles, to locate the thread // cache registry instance. // // These two values were chosen randomly, and in particular neither is a valid // pointer on most 64 bit architectures. #if BUILDFLAG(HAS_64_BIT_POINTERS) constexpr uintptr_t kNeedle1 = 0xe69e32f3ad9ea63; constexpr uintptr_t kNeedle2 = 0x9615ee1c5eb14caf; #else constexpr uintptr_t kNeedle1 = 0xe69e32f3; constexpr uintptr_t kNeedle2 = 0x9615ee1c; #endif // BUILDFLAG(HAS_64_BIT_POINTERS) // This array contains, in order: // - kNeedle1 // - &ThreadCacheRegistry::Instance() // - kNeedle2 // // It is refererenced in the thread cache constructor to make sure it is not // removed by the compiler. It is also not const to make sure it ends up in // .data. constexpr size_t kThreadCacheNeedleArraySize = 4; extern uintptr_t kThreadCacheNeedleArray[kThreadCacheNeedleArraySize]; class HeapDumper; class ThreadCacheInspector; } // namespace tools namespace internal { extern PA_COMPONENT_EXPORT(PARTITION_ALLOC) PartitionTlsKey g_thread_cache_key; #if PA_CONFIG(THREAD_CACHE_FAST_TLS) extern PA_COMPONENT_EXPORT( PARTITION_ALLOC) thread_local ThreadCache* g_thread_cache; #endif } // namespace internal struct ThreadCacheLimits { // When trying to conserve memory, set the thread cache limit to this. static constexpr size_t kDefaultSizeThreshold = 512; // 32kiB is chosen here as from local experiments, "zone" allocation in // V8 is performance-sensitive, and zones can (and do) grow up to 32kiB for // each individual allocation. static constexpr size_t kLargeSizeThreshold = 1 << 15; static_assert(kLargeSizeThreshold <= std::numeric_limits::max(), ""); }; // Global registry of all ThreadCache instances. // // This class cannot allocate in the (Un)registerThreadCache() functions, as // they are called from ThreadCache constructor, which is from within the // allocator. However the other members can allocate. class PA_COMPONENT_EXPORT(PARTITION_ALLOC) ThreadCacheRegistry { public: static ThreadCacheRegistry& Instance(); // Do not instantiate. // // Several things are surprising here: // - The constructor is public even though this is intended to be a singleton: // we cannot use a "static local" variable in |Instance()| as this is // reached too early during CRT initialization on Windows, meaning that // static local variables don't work (as they call into the uninitialized // runtime). To sidestep that, we use a regular global variable in the .cc, // which is fine as this object's constructor is constexpr. // - Marked inline so that the chromium style plugin doesn't complain that a // "complex constructor" has an inline body. This warning is disabled when // the constructor is explicitly marked "inline". Note that this is a false // positive of the plugin, since constexpr implies inline. inline constexpr ThreadCacheRegistry(); void RegisterThreadCache(ThreadCache* cache); void UnregisterThreadCache(ThreadCache* cache); // Prints statistics for all thread caches, or this thread's only. void DumpStats(bool my_thread_only, ThreadCacheStats* stats); // Purge() this thread's cache, and asks the other ones to trigger Purge() at // a later point (during a deallocation). void PurgeAll(); // Runs `PurgeAll` and updates the next interval which // `GetPeriodicPurgeNextIntervalInMicroseconds` returns. // // Note that it's a caller's responsibility to invoke this member function // periodically with an appropriate interval. This function does not schedule // any task nor timer. void RunPeriodicPurge(); // Returns the appropriate interval to invoke `RunPeriodicPurge` next time. int64_t GetPeriodicPurgeNextIntervalInMicroseconds() const; // Controls the thread cache size, by setting the multiplier to a value above // or below |ThreadCache::kDefaultMultiplier|. void SetThreadCacheMultiplier(float multiplier); void SetLargestActiveBucketIndex(uint8_t largest_active_bucket_index); static internal::Lock& GetLock() { return Instance().lock_; } // Purges all thread caches *now*. This is completely thread-unsafe, and // should only be called in a post-fork() handler. void ForcePurgeAllThreadAfterForkUnsafe(); void ResetForTesting(); static constexpr internal::base::TimeDelta kMinPurgeInterval = internal::base::Seconds(1); static constexpr internal::base::TimeDelta kMaxPurgeInterval = internal::base::Minutes(1); static constexpr internal::base::TimeDelta kDefaultPurgeInterval = 2 * kMinPurgeInterval; static constexpr size_t kMinCachedMemoryForPurging = 500 * 1024; private: friend class tools::ThreadCacheInspector; friend class tools::HeapDumper; // Not using base::Lock as the object's constructor must be constexpr. internal::Lock lock_; ThreadCache* list_head_ PA_GUARDED_BY(GetLock()) = nullptr; bool periodic_purge_is_initialized_ = false; internal::base::TimeDelta periodic_purge_next_interval_ = kDefaultPurgeInterval; uint8_t largest_active_bucket_index_ = internal::BucketIndexLookup::GetIndex( ThreadCacheLimits::kDefaultSizeThreshold); }; constexpr ThreadCacheRegistry::ThreadCacheRegistry() = default; #if PA_CONFIG(THREAD_CACHE_ENABLE_STATISTICS) #define PA_INCREMENT_COUNTER(counter) ++counter #else #define PA_INCREMENT_COUNTER(counter) \ do { \ } while (0) #endif // PA_CONFIG(THREAD_CACHE_ENABLE_STATISTICS) #if BUILDFLAG(PA_DCHECK_IS_ON) namespace internal { class ReentrancyGuard { public: explicit ReentrancyGuard(bool& flag) : flag_(flag) { PA_CHECK(!flag_); flag_ = true; } ~ReentrancyGuard() { flag_ = false; } private: bool& flag_; }; } // namespace internal #define PA_REENTRANCY_GUARD(x) \ internal::ReentrancyGuard guard { \ x \ } #else // BUILDFLAG(PA_DCHECK_IS_ON) #define PA_REENTRANCY_GUARD(x) \ do { \ } while (0) #endif // BUILDFLAG(PA_DCHECK_IS_ON) // Per-thread cache. *Not* threadsafe, must only be accessed from a single // thread. // // In practice, this is easily enforced as long as only |instance| is // manipulated, as it is a thread_local member. As such, any // |ThreadCache::instance->*()| call will necessarily be done from a single // thread. class PA_COMPONENT_EXPORT(PARTITION_ALLOC) ThreadCache { public: // Initializes the thread cache for |root|. May allocate, so should be called // with the thread cache disabled on the partition side, and without the // partition lock held. // // May only be called by a single PartitionRoot. static void Init(PartitionRoot<>* root); static void DeleteForTesting(ThreadCache* tcache); // Deletes existing thread cache and creates a new one for |root|. static void SwapForTesting(PartitionRoot<>* root); // Removes the tombstone marker that would be returned by Get() otherwise. static void RemoveTombstoneForTesting(); // Can be called several times, must be called before any ThreadCache // interactions. static void EnsureThreadSpecificDataInitialized(); static ThreadCache* Get() { #if PA_CONFIG(THREAD_CACHE_FAST_TLS) return internal::g_thread_cache; #else // This region isn't MTE-tagged. return reinterpret_cast( internal::PartitionTlsGet(internal::g_thread_cache_key)); #endif } static bool IsValid(ThreadCache* tcache) { // Do not MTE-untag, as it'd mess up the sentinel value. return reinterpret_cast(tcache) & kTombstoneMask; } static bool IsTombstone(ThreadCache* tcache) { // Do not MTE-untag, as it'd mess up the sentinel value. return reinterpret_cast(tcache) == kTombstone; } // Create a new ThreadCache associated with |root|. // Must be called without the partition locked, as this may allocate. static ThreadCache* Create(PartitionRoot<>* root); ~ThreadCache(); // Force placement new. void* operator new(size_t) = delete; void* operator new(size_t, void* buffer) { return buffer; } void operator delete(void* ptr) = delete; ThreadCache(const ThreadCache&) = delete; ThreadCache(const ThreadCache&&) = delete; ThreadCache& operator=(const ThreadCache&) = delete; // Tries to put a slot at |slot_start| into the cache. // The slot comes from the bucket at index |bucket_index| from the partition // this cache is for. // // Returns true if the slot was put in the cache, and false otherwise. This // can happen either because the cache is full or the allocation was too // large. PA_ALWAYS_INLINE bool MaybePutInCache(uintptr_t slot_start, size_t bucket_index, size_t* slot_size); // Tries to allocate a memory slot from the cache. // Returns 0 on failure. // // Has the same behavior as RawAlloc(), that is: no cookie nor ref-count // handling. Sets |slot_size| to the allocated size upon success. PA_ALWAYS_INLINE uintptr_t GetFromCache(size_t bucket_index, size_t* slot_size); // Asks this cache to trigger |Purge()| at a later point. Can be called from // any thread. void SetShouldPurge(); // Empties the cache. // The Partition lock must *not* be held when calling this. // Must be called from the thread this cache is for. void Purge(); // |TryPurge| is the same as |Purge|, except that |TryPurge| will // not crash if the thread cache is inconsistent. Normally inconsistency // is a sign of a bug somewhere, so |Purge| should be preferred in most cases. void TryPurge(); // Amount of cached memory for this thread's cache, in bytes. size_t CachedMemory() const; void AccumulateStats(ThreadCacheStats* stats) const; // Purge the thread cache of the current thread, if one exists. static void PurgeCurrentThread(); const ThreadAllocStats& thread_alloc_stats() const { return thread_alloc_stats_; } size_t bucket_count_for_testing(size_t index) const { return buckets_[index].count; } internal::base::PlatformThreadId thread_id() const { return thread_id_; } // Sets the maximum size of allocations that may be cached by the thread // cache. This applies to all threads. However, the maximum size is bounded by // |kLargeSizeThreshold|. static void SetLargestCachedSize(size_t size); // Cumulative stats about *all* allocations made on the `root_` partition on // this thread, that is not only the allocations serviced by the thread cache, // but all allocations, including large and direct-mapped ones. This should in // theory be split into a separate PerThread data structure, but the thread // cache is the only per-thread data we have as of now. // // TODO(lizeb): Investigate adding a proper per-thread data structure. PA_ALWAYS_INLINE void RecordAllocation(size_t size); PA_ALWAYS_INLINE void RecordDeallocation(size_t size); void ResetPerThreadAllocationStatsForTesting(); // Fill 1 / kBatchFillRatio * bucket.limit slots at a time. static constexpr uint16_t kBatchFillRatio = 8; // Limit for the smallest bucket will be kDefaultMultiplier * // kSmallBucketBaseCount by default. static constexpr float kDefaultMultiplier = 2.; static constexpr uint8_t kSmallBucketBaseCount = 64; static constexpr size_t kDefaultSizeThreshold = ThreadCacheLimits::kDefaultSizeThreshold; static constexpr size_t kLargeSizeThreshold = ThreadCacheLimits::kLargeSizeThreshold; const ThreadCache* prev_for_testing() const PA_EXCLUSIVE_LOCKS_REQUIRED(ThreadCacheRegistry::GetLock()) { return prev_; } const ThreadCache* next_for_testing() const PA_EXCLUSIVE_LOCKS_REQUIRED(ThreadCacheRegistry::GetLock()) { return next_; } private: friend class tools::HeapDumper; friend class tools::ThreadCacheInspector; struct Bucket { internal::PartitionFreelistEntry* freelist_head = nullptr; // Want to keep sizeof(Bucket) small, using small types. uint8_t count = 0; std::atomic limit{}; // Can be changed from another thread. uint16_t slot_size = 0; Bucket(); }; static_assert(sizeof(Bucket) <= 2 * sizeof(void*), "Keep Bucket small."); explicit ThreadCache(PartitionRoot<>* root); static void Delete(void* thread_cache_ptr); void PurgeInternal(); template void PurgeInternalHelper(); // Fills a bucket from the central allocator. void FillBucket(size_t bucket_index); // Empties the |bucket| until there are at most |limit| objects in it. template void ClearBucketHelper(Bucket& bucket, size_t limit); void ClearBucket(Bucket& bucket, size_t limit); PA_ALWAYS_INLINE void PutInBucket(Bucket& bucket, uintptr_t slot_start); void ResetForTesting(); // Releases the entire freelist starting at |head| to the root. template void FreeAfter(internal::PartitionFreelistEntry* head, size_t slot_size); static void SetGlobalLimits(PartitionRoot<>* root, float multiplier); static constexpr uint16_t kBucketCount = internal::BucketIndexLookup::GetIndex(ThreadCache::kLargeSizeThreshold) + 1; static_assert( kBucketCount < internal::kNumBuckets, "Cannot have more cached buckets than what the allocator supports"); // On some architectures, ThreadCache::Get() can be called and return // something after the thread cache has been destroyed. In this case, we set // it to this value, to signal that the thread is being terminated, and the // thread cache should not be used. // // This happens in particular on Windows, during program termination. // // We choose 0x1 as the value as it is an invalid pointer value, since it is // not aligned, and too low. Also, checking !(ptr & kTombstoneMask) checks for // nullptr and kTombstone at the same time. static constexpr uintptr_t kTombstone = 0x1; static constexpr uintptr_t kTombstoneMask = ~kTombstone; static uint8_t global_limits_[kBucketCount]; // Index of the largest active bucket. Not all processes/platforms will use // all buckets, as using larger buckets increases the memory footprint. // // TODO(lizeb): Investigate making this per-thread rather than static, to // improve locality, and open the door to per-thread settings. static uint16_t largest_active_bucket_index_; // These are at the beginning as they're accessed for each allocation. uint32_t cached_memory_ = 0; std::atomic should_purge_; ThreadCacheStats stats_; ThreadAllocStats thread_alloc_stats_; // Buckets are quite big, though each is only 2 pointers. Bucket buckets_[kBucketCount]; // Cold data below. PartitionRoot<>* const root_; const internal::base::PlatformThreadId thread_id_; #if BUILDFLAG(PA_DCHECK_IS_ON) bool is_in_thread_cache_ = false; #endif // Intrusive list since ThreadCacheRegistry::RegisterThreadCache() cannot // allocate. ThreadCache* next_ PA_GUARDED_BY(ThreadCacheRegistry::GetLock()); ThreadCache* prev_ PA_GUARDED_BY(ThreadCacheRegistry::GetLock()); friend class ThreadCacheRegistry; friend class PartitionAllocThreadCacheTest; friend class tools::ThreadCacheInspector; PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, Simple); PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, MultipleObjectsCachedPerBucket); PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, LargeAllocationsAreNotCached); PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, MultipleThreadCaches); PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, RecordStats); PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, ThreadCacheRegistry); PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, MultipleThreadCachesAccounting); PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, DynamicCountPerBucket); PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, DynamicCountPerBucketClamping); PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, DynamicCountPerBucketMultipleThreads); PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, DynamicSizeThreshold); PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, DynamicSizeThresholdPurge); PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, ClearFromTail); }; PA_ALWAYS_INLINE bool ThreadCache::MaybePutInCache(uintptr_t slot_start, size_t bucket_index, size_t* slot_size) { PA_REENTRANCY_GUARD(is_in_thread_cache_); PA_INCREMENT_COUNTER(stats_.cache_fill_count); if (PA_UNLIKELY(bucket_index > largest_active_bucket_index_)) { PA_INCREMENT_COUNTER(stats_.cache_fill_misses); return false; } auto& bucket = buckets_[bucket_index]; PA_DCHECK(bucket.count != 0 || bucket.freelist_head == nullptr); PutInBucket(bucket, slot_start); cached_memory_ += bucket.slot_size; PA_INCREMENT_COUNTER(stats_.cache_fill_hits); // Relaxed ordering: we don't care about having an up-to-date or consistent // value, just want it to not change while we are using it, hence using // relaxed ordering, and loading into a local variable. Without it, we are // gambling that the compiler would not issue multiple loads. uint8_t limit = bucket.limit.load(std::memory_order_relaxed); // Batched deallocation, amortizing lock acquisitions. if (PA_UNLIKELY(bucket.count > limit)) { ClearBucket(bucket, limit / 2); } if (PA_UNLIKELY(should_purge_.load(std::memory_order_relaxed))) { PurgeInternal(); } *slot_size = bucket.slot_size; return true; } PA_ALWAYS_INLINE uintptr_t ThreadCache::GetFromCache(size_t bucket_index, size_t* slot_size) { #if PA_CONFIG(THREAD_CACHE_ALLOC_STATS) stats_.allocs_per_bucket_[bucket_index]++; #endif PA_REENTRANCY_GUARD(is_in_thread_cache_); PA_INCREMENT_COUNTER(stats_.alloc_count); // Only handle "small" allocations. if (PA_UNLIKELY(bucket_index > largest_active_bucket_index_)) { PA_INCREMENT_COUNTER(stats_.alloc_miss_too_large); PA_INCREMENT_COUNTER(stats_.alloc_misses); return 0; } auto& bucket = buckets_[bucket_index]; if (PA_LIKELY(bucket.freelist_head)) { PA_INCREMENT_COUNTER(stats_.alloc_hits); } else { PA_DCHECK(bucket.count == 0); PA_INCREMENT_COUNTER(stats_.alloc_miss_empty); PA_INCREMENT_COUNTER(stats_.alloc_misses); FillBucket(bucket_index); // Very unlikely, means that the central allocator is out of memory. Let it // deal with it (may return 0, may crash). if (PA_UNLIKELY(!bucket.freelist_head)) { return 0; } } PA_DCHECK(bucket.count != 0); internal::PartitionFreelistEntry* entry = bucket.freelist_head; // TODO(lizeb): Consider removing once crbug.com/1382658 is fixed. #if BUILDFLAG(IS_CHROMEOS) && defined(ARCH_CPU_X86_64) && \ BUILDFLAG(HAS_64_BIT_POINTERS) // x86_64 architecture now supports 57 bits of address space, as of Ice Lake // for Intel. However Chrome OS systems do not ship with kernel support for // it, but with 48 bits, so all canonical addresses have the upper 16 bits // zeroed (17 in practice, since the upper half of address space is reserved // by the kernel). constexpr uintptr_t kCanonicalPointerMask = (1ULL << 48) - 1; PA_CHECK(!(reinterpret_cast(entry) & ~kCanonicalPointerMask)); #endif // BUILDFLAG(IS_CHROMEOS) && defined(ARCH_CPU_X86_64) && // BUILDFLAG(HAS_64_BIT_POINTERS) // Passes the bucket size to |GetNext()|, so that in case of freelist // corruption, we know the bucket size that lead to the crash, helping to // narrow down the search for culprit. |bucket| was touched just now, so this // does not introduce another cache miss. internal::PartitionFreelistEntry* next = entry->GetNextForThreadCache(bucket.slot_size); PA_DCHECK(entry != next); bucket.count--; PA_DCHECK(bucket.count != 0 || !next); bucket.freelist_head = next; *slot_size = bucket.slot_size; PA_DCHECK(cached_memory_ >= bucket.slot_size); cached_memory_ -= bucket.slot_size; return internal::SlotStartPtr2Addr(entry); } PA_ALWAYS_INLINE void ThreadCache::PutInBucket(Bucket& bucket, uintptr_t slot_start) { #if PA_CONFIG(HAS_FREELIST_SHADOW_ENTRY) && defined(ARCH_CPU_X86_64) && \ BUILDFLAG(HAS_64_BIT_POINTERS) // We see freelist corruption crashes happening in the wild. These are likely // due to out-of-bounds accesses in the previous slot, or to a Use-After-Free // somewhere in the code. // // The issue is that we detect the UaF far away from the place where it // happens. As a consequence, we should try to make incorrect code crash as // early as possible. Poisoning memory at free() time works for UaF, but it // was seen in the past to incur a high performance cost. // // Here, only poison the current cacheline, which we are touching anyway. // TODO(lizeb): Make sure this does not hurt performance. // Everything below requires this alignment. static_assert(internal::kAlignment == 16, ""); // The pointer is always 16 bytes aligned, so its start address is always == 0 // % 16. Its distance to the next cacheline is // `64 - ((slot_start & 63) / 16) * 16` static_assert( internal::kPartitionCachelineSize == 64, "The computation below assumes that cache lines are 64 bytes long."); int distance_to_next_cacheline_in_16_bytes = 4 - ((slot_start >> 4) & 3); int slot_size_remaining_in_16_bytes = #if BUILDFLAG(PUT_REF_COUNT_IN_PREVIOUS_SLOT) // When BRP is on in the "previous slot" mode, this slot may have a BRP // ref-count of the next, potentially allocated slot. Make sure we don't // overwrite it. (bucket.slot_size - sizeof(PartitionRefCount)) / 16; #else bucket.slot_size / 16; #endif // BUILDFLAG(PUT_REF_COUNT_IN_PREVIOUS_SLOT) slot_size_remaining_in_16_bytes = std::min( slot_size_remaining_in_16_bytes, distance_to_next_cacheline_in_16_bytes); static const uint32_t poison_16_bytes[4] = {0xbadbad00, 0xbadbad00, 0xbadbad00, 0xbadbad00}; // Give a hint to the compiler in hope it'll vectorize the loop. #if PA_HAS_BUILTIN(__builtin_assume_aligned) void* slot_start_tagged = __builtin_assume_aligned( internal::SlotStartAddr2Ptr(slot_start), internal::kAlignment); #else void* slot_start_tagged = internal::SlotStartAddr2Ptr(slot_start); #endif uint32_t* address_aligned = static_cast(slot_start_tagged); for (int i = 0; i < slot_size_remaining_in_16_bytes; i++) { // Clang will expand the memcpy to a 16-byte write (movups on x86). memcpy(address_aligned, poison_16_bytes, sizeof(poison_16_bytes)); address_aligned += 4; } #endif // PA_CONFIG(HAS_FREELIST_SHADOW_ENTRY) && defined(ARCH_CPU_X86_64) && // BUILDFLAG(HAS_64_BIT_POINTERS) auto* entry = internal::PartitionFreelistEntry::EmplaceAndInitForThreadCache( slot_start, bucket.freelist_head); bucket.freelist_head = entry; bucket.count++; } PA_ALWAYS_INLINE void ThreadCache::RecordAllocation(size_t size) { thread_alloc_stats_.alloc_count++; thread_alloc_stats_.alloc_total_size += size; } PA_ALWAYS_INLINE void ThreadCache::RecordDeallocation(size_t size) { thread_alloc_stats_.dealloc_count++; thread_alloc_stats_.dealloc_total_size += size; } } // namespace partition_alloc #endif // BASE_ALLOCATOR_PARTITION_ALLOCATOR_THREAD_CACHE_H_