• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef BASE_ALLOCATOR_PARTITION_ALLOCATOR_THREAD_CACHE_H_
6 #define BASE_ALLOCATOR_PARTITION_ALLOCATOR_THREAD_CACHE_H_
7 
8 #include <atomic>
9 #include <cstdint>
10 #include <limits>
11 #include <memory>
12 
13 #include "base/allocator/partition_allocator/partition_alloc-inl.h"
14 #include "base/allocator/partition_allocator/partition_alloc_base/compiler_specific.h"
15 #include "base/allocator/partition_allocator/partition_alloc_base/component_export.h"
16 #include "base/allocator/partition_allocator/partition_alloc_base/debug/debugging_buildflags.h"
17 #include "base/allocator/partition_allocator/partition_alloc_base/gtest_prod_util.h"
18 #include "base/allocator/partition_allocator/partition_alloc_base/thread_annotations.h"
19 #include "base/allocator/partition_allocator/partition_alloc_base/time/time.h"
20 #include "base/allocator/partition_allocator/partition_alloc_buildflags.h"
21 #include "base/allocator/partition_allocator/partition_alloc_config.h"
22 #include "base/allocator/partition_allocator/partition_alloc_forward.h"
23 #include "base/allocator/partition_allocator/partition_bucket_lookup.h"
24 #include "base/allocator/partition_allocator/partition_freelist_entry.h"
25 #include "base/allocator/partition_allocator/partition_lock.h"
26 #include "base/allocator/partition_allocator/partition_stats.h"
27 #include "base/allocator/partition_allocator/partition_tls.h"
28 #include "build/build_config.h"
29 
30 #if defined(ARCH_CPU_X86_64) && BUILDFLAG(HAS_64_BIT_POINTERS)
31 #include <algorithm>
32 #endif
33 
34 namespace partition_alloc {
35 
36 class ThreadCache;
37 
38 namespace tools {
39 
40 // This is used from ThreadCacheInspector, which runs in a different process. It
41 // scans the process memory looking for the two needles, to locate the thread
42 // cache registry instance.
43 //
44 // These two values were chosen randomly, and in particular neither is a valid
45 // pointer on most 64 bit architectures.
46 #if BUILDFLAG(HAS_64_BIT_POINTERS)
47 constexpr uintptr_t kNeedle1 = 0xe69e32f3ad9ea63;
48 constexpr uintptr_t kNeedle2 = 0x9615ee1c5eb14caf;
49 #else
50 constexpr uintptr_t kNeedle1 = 0xe69e32f3;
51 constexpr uintptr_t kNeedle2 = 0x9615ee1c;
52 #endif  // BUILDFLAG(HAS_64_BIT_POINTERS)
53 
54 // This array contains, in order:
55 // - kNeedle1
56 // - &ThreadCacheRegistry::Instance()
57 // - kNeedle2
58 //
59 // It is refererenced in the thread cache constructor to make sure it is not
60 // removed by the compiler. It is also not const to make sure it ends up in
61 // .data.
62 constexpr size_t kThreadCacheNeedleArraySize = 4;
63 extern uintptr_t kThreadCacheNeedleArray[kThreadCacheNeedleArraySize];
64 
65 class HeapDumper;
66 class ThreadCacheInspector;
67 
68 }  // namespace tools
69 
70 namespace internal {
71 
72 extern PA_COMPONENT_EXPORT(PARTITION_ALLOC) PartitionTlsKey g_thread_cache_key;
73 
74 #if PA_CONFIG(THREAD_CACHE_FAST_TLS)
75 extern PA_COMPONENT_EXPORT(
76     PARTITION_ALLOC) thread_local ThreadCache* g_thread_cache;
77 #endif
78 
79 }  // namespace internal
80 
81 struct ThreadCacheLimits {
82   // When trying to conserve memory, set the thread cache limit to this.
83   static constexpr size_t kDefaultSizeThreshold = 512;
84   // 32kiB is chosen here as from local experiments, "zone" allocation in
85   // V8 is performance-sensitive, and zones can (and do) grow up to 32kiB for
86   // each individual allocation.
87   static constexpr size_t kLargeSizeThreshold = 1 << 15;
88   static_assert(kLargeSizeThreshold <= std::numeric_limits<uint16_t>::max(),
89                 "");
90 };
91 
92 // Global registry of all ThreadCache instances.
93 //
94 // This class cannot allocate in the (Un)registerThreadCache() functions, as
95 // they are called from ThreadCache constructor, which is from within the
96 // allocator. However the other members can allocate.
PA_COMPONENT_EXPORT(PARTITION_ALLOC)97 class PA_COMPONENT_EXPORT(PARTITION_ALLOC) ThreadCacheRegistry {
98  public:
99   static ThreadCacheRegistry& Instance();
100   // Do not instantiate.
101   //
102   // Several things are surprising here:
103   // - The constructor is public even though this is intended to be a singleton:
104   //   we cannot use a "static local" variable in |Instance()| as this is
105   //   reached too early during CRT initialization on Windows, meaning that
106   //   static local variables don't work (as they call into the uninitialized
107   //   runtime). To sidestep that, we use a regular global variable in the .cc,
108   //   which is fine as this object's constructor is constexpr.
109   // - Marked inline so that the chromium style plugin doesn't complain that a
110   //   "complex constructor" has an inline body. This warning is disabled when
111   //   the constructor is explicitly marked "inline". Note that this is a false
112   //   positive of the plugin, since constexpr implies inline.
113   inline constexpr ThreadCacheRegistry();
114 
115   void RegisterThreadCache(ThreadCache* cache);
116   void UnregisterThreadCache(ThreadCache* cache);
117   // Prints statistics for all thread caches, or this thread's only.
118   void DumpStats(bool my_thread_only, ThreadCacheStats* stats);
119   // Purge() this thread's cache, and asks the other ones to trigger Purge() at
120   // a later point (during a deallocation).
121   void PurgeAll();
122 
123   // Runs `PurgeAll` and updates the next interval which
124   // `GetPeriodicPurgeNextIntervalInMicroseconds` returns.
125   //
126   // Note that it's a caller's responsibility to invoke this member function
127   // periodically with an appropriate interval. This function does not schedule
128   // any task nor timer.
129   void RunPeriodicPurge();
130   // Returns the appropriate interval to invoke `RunPeriodicPurge` next time.
131   int64_t GetPeriodicPurgeNextIntervalInMicroseconds() const;
132 
133   // Controls the thread cache size, by setting the multiplier to a value above
134   // or below |ThreadCache::kDefaultMultiplier|.
135   void SetThreadCacheMultiplier(float multiplier);
136   void SetLargestActiveBucketIndex(uint8_t largest_active_bucket_index);
137 
138   static internal::Lock& GetLock() { return Instance().lock_; }
139   // Purges all thread caches *now*. This is completely thread-unsafe, and
140   // should only be called in a post-fork() handler.
141   void ForcePurgeAllThreadAfterForkUnsafe();
142 
143   void ResetForTesting();
144 
145   static constexpr internal::base::TimeDelta kMinPurgeInterval =
146       internal::base::Seconds(1);
147   static constexpr internal::base::TimeDelta kMaxPurgeInterval =
148       internal::base::Minutes(1);
149   static constexpr internal::base::TimeDelta kDefaultPurgeInterval =
150       2 * kMinPurgeInterval;
151   static constexpr size_t kMinCachedMemoryForPurging = 500 * 1024;
152 
153  private:
154   friend class tools::ThreadCacheInspector;
155   friend class tools::HeapDumper;
156 
157   // Not using base::Lock as the object's constructor must be constexpr.
158   internal::Lock lock_;
159   ThreadCache* list_head_ PA_GUARDED_BY(GetLock()) = nullptr;
160   bool periodic_purge_is_initialized_ = false;
161   internal::base::TimeDelta periodic_purge_next_interval_ =
162       kDefaultPurgeInterval;
163 
164   uint8_t largest_active_bucket_index_ = internal::BucketIndexLookup::GetIndex(
165       ThreadCacheLimits::kDefaultSizeThreshold);
166 };
167 
168 constexpr ThreadCacheRegistry::ThreadCacheRegistry() = default;
169 
170 #if PA_CONFIG(THREAD_CACHE_ENABLE_STATISTICS)
171 #define PA_INCREMENT_COUNTER(counter) ++counter
172 #else
173 #define PA_INCREMENT_COUNTER(counter) \
174   do {                                \
175   } while (0)
176 #endif  // PA_CONFIG(THREAD_CACHE_ENABLE_STATISTICS)
177 
178 #if BUILDFLAG(PA_DCHECK_IS_ON)
179 
180 namespace internal {
181 
182 class ReentrancyGuard {
183  public:
ReentrancyGuard(bool & flag)184   explicit ReentrancyGuard(bool& flag) : flag_(flag) {
185     PA_CHECK(!flag_);
186     flag_ = true;
187   }
188 
~ReentrancyGuard()189   ~ReentrancyGuard() { flag_ = false; }
190 
191  private:
192   bool& flag_;
193 };
194 
195 }  // namespace internal
196 
197 #define PA_REENTRANCY_GUARD(x)      \
198   internal::ReentrancyGuard guard { \
199     x                               \
200   }
201 
202 #else  // BUILDFLAG(PA_DCHECK_IS_ON)
203 
204 #define PA_REENTRANCY_GUARD(x) \
205   do {                         \
206   } while (0)
207 
208 #endif  // BUILDFLAG(PA_DCHECK_IS_ON)
209 
210 // Per-thread cache. *Not* threadsafe, must only be accessed from a single
211 // thread.
212 //
213 // In practice, this is easily enforced as long as only |instance| is
214 // manipulated, as it is a thread_local member. As such, any
215 // |ThreadCache::instance->*()| call will necessarily be done from a single
216 // thread.
PA_COMPONENT_EXPORT(PARTITION_ALLOC)217 class PA_COMPONENT_EXPORT(PARTITION_ALLOC) ThreadCache {
218  public:
219   // Initializes the thread cache for |root|. May allocate, so should be called
220   // with the thread cache disabled on the partition side, and without the
221   // partition lock held.
222   //
223   // May only be called by a single PartitionRoot.
224   static void Init(PartitionRoot<>* root);
225 
226   static void DeleteForTesting(ThreadCache* tcache);
227 
228   // Deletes existing thread cache and creates a new one for |root|.
229   static void SwapForTesting(PartitionRoot<>* root);
230 
231   // Removes the tombstone marker that would be returned by Get() otherwise.
232   static void RemoveTombstoneForTesting();
233 
234   // Can be called several times, must be called before any ThreadCache
235   // interactions.
236   static void EnsureThreadSpecificDataInitialized();
237 
238   static ThreadCache* Get() {
239 #if PA_CONFIG(THREAD_CACHE_FAST_TLS)
240     return internal::g_thread_cache;
241 #else
242     // This region isn't MTE-tagged.
243     return reinterpret_cast<ThreadCache*>(
244         internal::PartitionTlsGet(internal::g_thread_cache_key));
245 #endif
246   }
247 
248   static bool IsValid(ThreadCache* tcache) {
249     // Do not MTE-untag, as it'd mess up the sentinel value.
250     return reinterpret_cast<uintptr_t>(tcache) & kTombstoneMask;
251   }
252 
253   static bool IsTombstone(ThreadCache* tcache) {
254     // Do not MTE-untag, as it'd mess up the sentinel value.
255     return reinterpret_cast<uintptr_t>(tcache) == kTombstone;
256   }
257 
258   // Create a new ThreadCache associated with |root|.
259   // Must be called without the partition locked, as this may allocate.
260   static ThreadCache* Create(PartitionRoot<>* root);
261 
262   ~ThreadCache();
263 
264   // Force placement new.
265   void* operator new(size_t) = delete;
266   void* operator new(size_t, void* buffer) { return buffer; }
267   void operator delete(void* ptr) = delete;
268   ThreadCache(const ThreadCache&) = delete;
269   ThreadCache(const ThreadCache&&) = delete;
270   ThreadCache& operator=(const ThreadCache&) = delete;
271 
272   // Tries to put a slot at |slot_start| into the cache.
273   // The slot comes from the bucket at index |bucket_index| from the partition
274   // this cache is for.
275   //
276   // Returns true if the slot was put in the cache, and false otherwise. This
277   // can happen either because the cache is full or the allocation was too
278   // large.
279   PA_ALWAYS_INLINE bool MaybePutInCache(uintptr_t slot_start,
280                                         size_t bucket_index,
281                                         size_t* slot_size);
282 
283   // Tries to allocate a memory slot from the cache.
284   // Returns 0 on failure.
285   //
286   // Has the same behavior as RawAlloc(), that is: no cookie nor ref-count
287   // handling. Sets |slot_size| to the allocated size upon success.
288   PA_ALWAYS_INLINE uintptr_t GetFromCache(size_t bucket_index,
289                                           size_t* slot_size);
290 
291   // Asks this cache to trigger |Purge()| at a later point. Can be called from
292   // any thread.
293   void SetShouldPurge();
294   // Empties the cache.
295   // The Partition lock must *not* be held when calling this.
296   // Must be called from the thread this cache is for.
297   void Purge();
298   // |TryPurge| is the same as |Purge|, except that |TryPurge| will
299   // not crash if the thread cache is inconsistent. Normally inconsistency
300   // is a sign of a bug somewhere, so |Purge| should be preferred in most cases.
301   void TryPurge();
302   // Amount of cached memory for this thread's cache, in bytes.
303   size_t CachedMemory() const;
304   void AccumulateStats(ThreadCacheStats* stats) const;
305 
306   // Purge the thread cache of the current thread, if one exists.
307   static void PurgeCurrentThread();
308 
309   const ThreadAllocStats& thread_alloc_stats() const {
310     return thread_alloc_stats_;
311   }
312   size_t bucket_count_for_testing(size_t index) const {
313     return buckets_[index].count;
314   }
315 
316   internal::base::PlatformThreadId thread_id() const { return thread_id_; }
317 
318   // Sets the maximum size of allocations that may be cached by the thread
319   // cache. This applies to all threads. However, the maximum size is bounded by
320   // |kLargeSizeThreshold|.
321   static void SetLargestCachedSize(size_t size);
322 
323   // Cumulative stats about *all* allocations made on the `root_` partition on
324   // this thread, that is not only the allocations serviced by the thread cache,
325   // but all allocations, including large and direct-mapped ones. This should in
326   // theory be split into a separate PerThread data structure, but the thread
327   // cache is the only per-thread data we have as of now.
328   //
329   // TODO(lizeb): Investigate adding a proper per-thread data structure.
330   PA_ALWAYS_INLINE void RecordAllocation(size_t size);
331   PA_ALWAYS_INLINE void RecordDeallocation(size_t size);
332   void ResetPerThreadAllocationStatsForTesting();
333 
334   // Fill 1 / kBatchFillRatio * bucket.limit slots at a time.
335   static constexpr uint16_t kBatchFillRatio = 8;
336 
337   // Limit for the smallest bucket will be kDefaultMultiplier *
338   // kSmallBucketBaseCount by default.
339   static constexpr float kDefaultMultiplier = 2.;
340   static constexpr uint8_t kSmallBucketBaseCount = 64;
341 
342   static constexpr size_t kDefaultSizeThreshold =
343       ThreadCacheLimits::kDefaultSizeThreshold;
344   static constexpr size_t kLargeSizeThreshold =
345       ThreadCacheLimits::kLargeSizeThreshold;
346 
347   const ThreadCache* prev_for_testing() const
348       PA_EXCLUSIVE_LOCKS_REQUIRED(ThreadCacheRegistry::GetLock()) {
349     return prev_;
350   }
351   const ThreadCache* next_for_testing() const
352       PA_EXCLUSIVE_LOCKS_REQUIRED(ThreadCacheRegistry::GetLock()) {
353     return next_;
354   }
355 
356  private:
357   friend class tools::HeapDumper;
358   friend class tools::ThreadCacheInspector;
359 
360   struct Bucket {
361     internal::PartitionFreelistEntry* freelist_head = nullptr;
362     // Want to keep sizeof(Bucket) small, using small types.
363     uint8_t count = 0;
364     std::atomic<uint8_t> limit{};  // Can be changed from another thread.
365     uint16_t slot_size = 0;
366 
367     Bucket();
368   };
369   static_assert(sizeof(Bucket) <= 2 * sizeof(void*), "Keep Bucket small.");
370 
371   explicit ThreadCache(PartitionRoot<>* root);
372   static void Delete(void* thread_cache_ptr);
373 
374   void PurgeInternal();
375   template <bool crash_on_corruption>
376   void PurgeInternalHelper();
377 
378   // Fills a bucket from the central allocator.
379   void FillBucket(size_t bucket_index);
380   // Empties the |bucket| until there are at most |limit| objects in it.
381   template <bool crash_on_corruption>
382   void ClearBucketHelper(Bucket& bucket, size_t limit);
383   void ClearBucket(Bucket& bucket, size_t limit);
384   PA_ALWAYS_INLINE void PutInBucket(Bucket& bucket, uintptr_t slot_start);
385   void ResetForTesting();
386   // Releases the entire freelist starting at |head| to the root.
387   template <bool crash_on_corruption>
388   void FreeAfter(internal::PartitionFreelistEntry* head, size_t slot_size);
389   static void SetGlobalLimits(PartitionRoot<>* root, float multiplier);
390 
391   static constexpr uint16_t kBucketCount =
392       internal::BucketIndexLookup::GetIndex(ThreadCache::kLargeSizeThreshold) +
393       1;
394   static_assert(
395       kBucketCount < internal::kNumBuckets,
396       "Cannot have more cached buckets than what the allocator supports");
397 
398   // On some architectures, ThreadCache::Get() can be called and return
399   // something after the thread cache has been destroyed. In this case, we set
400   // it to this value, to signal that the thread is being terminated, and the
401   // thread cache should not be used.
402   //
403   // This happens in particular on Windows, during program termination.
404   //
405   // We choose 0x1 as the value as it is an invalid pointer value, since it is
406   // not aligned, and too low. Also, checking !(ptr & kTombstoneMask) checks for
407   // nullptr and kTombstone at the same time.
408   static constexpr uintptr_t kTombstone = 0x1;
409   static constexpr uintptr_t kTombstoneMask = ~kTombstone;
410 
411   static uint8_t global_limits_[kBucketCount];
412   // Index of the largest active bucket. Not all processes/platforms will use
413   // all buckets, as using larger buckets increases the memory footprint.
414   //
415   // TODO(lizeb): Investigate making this per-thread rather than static, to
416   // improve locality, and open the door to per-thread settings.
417   static uint16_t largest_active_bucket_index_;
418 
419   // These are at the beginning as they're accessed for each allocation.
420   uint32_t cached_memory_ = 0;
421   std::atomic<bool> should_purge_;
422   ThreadCacheStats stats_;
423   ThreadAllocStats thread_alloc_stats_;
424 
425   // Buckets are quite big, though each is only 2 pointers.
426   Bucket buckets_[kBucketCount];
427 
428   // Cold data below.
429   PartitionRoot<>* const root_;
430 
431   const internal::base::PlatformThreadId thread_id_;
432 #if BUILDFLAG(PA_DCHECK_IS_ON)
433   bool is_in_thread_cache_ = false;
434 #endif
435 
436   // Intrusive list since ThreadCacheRegistry::RegisterThreadCache() cannot
437   // allocate.
438   ThreadCache* next_ PA_GUARDED_BY(ThreadCacheRegistry::GetLock());
439   ThreadCache* prev_ PA_GUARDED_BY(ThreadCacheRegistry::GetLock());
440 
441   friend class ThreadCacheRegistry;
442   friend class PartitionAllocThreadCacheTest;
443   friend class tools::ThreadCacheInspector;
444   PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, Simple);
445   PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
446                               MultipleObjectsCachedPerBucket);
447   PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
448                               LargeAllocationsAreNotCached);
449   PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
450                               MultipleThreadCaches);
451   PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, RecordStats);
452   PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
453                               ThreadCacheRegistry);
454   PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
455                               MultipleThreadCachesAccounting);
456   PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
457                               DynamicCountPerBucket);
458   PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
459                               DynamicCountPerBucketClamping);
460   PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
461                               DynamicCountPerBucketMultipleThreads);
462   PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
463                               DynamicSizeThreshold);
464   PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest,
465                               DynamicSizeThresholdPurge);
466   PA_FRIEND_TEST_ALL_PREFIXES(PartitionAllocThreadCacheTest, ClearFromTail);
467 };
468 
MaybePutInCache(uintptr_t slot_start,size_t bucket_index,size_t * slot_size)469 PA_ALWAYS_INLINE bool ThreadCache::MaybePutInCache(uintptr_t slot_start,
470                                                    size_t bucket_index,
471                                                    size_t* slot_size) {
472   PA_REENTRANCY_GUARD(is_in_thread_cache_);
473   PA_INCREMENT_COUNTER(stats_.cache_fill_count);
474 
475   if (PA_UNLIKELY(bucket_index > largest_active_bucket_index_)) {
476     PA_INCREMENT_COUNTER(stats_.cache_fill_misses);
477     return false;
478   }
479 
480   auto& bucket = buckets_[bucket_index];
481 
482   PA_DCHECK(bucket.count != 0 || bucket.freelist_head == nullptr);
483 
484   PutInBucket(bucket, slot_start);
485   cached_memory_ += bucket.slot_size;
486   PA_INCREMENT_COUNTER(stats_.cache_fill_hits);
487 
488   // Relaxed ordering: we don't care about having an up-to-date or consistent
489   // value, just want it to not change while we are using it, hence using
490   // relaxed ordering, and loading into a local variable. Without it, we are
491   // gambling that the compiler would not issue multiple loads.
492   uint8_t limit = bucket.limit.load(std::memory_order_relaxed);
493   // Batched deallocation, amortizing lock acquisitions.
494   if (PA_UNLIKELY(bucket.count > limit)) {
495     ClearBucket(bucket, limit / 2);
496   }
497 
498   if (PA_UNLIKELY(should_purge_.load(std::memory_order_relaxed))) {
499     PurgeInternal();
500   }
501 
502   *slot_size = bucket.slot_size;
503   return true;
504 }
505 
GetFromCache(size_t bucket_index,size_t * slot_size)506 PA_ALWAYS_INLINE uintptr_t ThreadCache::GetFromCache(size_t bucket_index,
507                                                      size_t* slot_size) {
508 #if PA_CONFIG(THREAD_CACHE_ALLOC_STATS)
509   stats_.allocs_per_bucket_[bucket_index]++;
510 #endif
511 
512   PA_REENTRANCY_GUARD(is_in_thread_cache_);
513   PA_INCREMENT_COUNTER(stats_.alloc_count);
514   // Only handle "small" allocations.
515   if (PA_UNLIKELY(bucket_index > largest_active_bucket_index_)) {
516     PA_INCREMENT_COUNTER(stats_.alloc_miss_too_large);
517     PA_INCREMENT_COUNTER(stats_.alloc_misses);
518     return 0;
519   }
520 
521   auto& bucket = buckets_[bucket_index];
522   if (PA_LIKELY(bucket.freelist_head)) {
523     PA_INCREMENT_COUNTER(stats_.alloc_hits);
524   } else {
525     PA_DCHECK(bucket.count == 0);
526     PA_INCREMENT_COUNTER(stats_.alloc_miss_empty);
527     PA_INCREMENT_COUNTER(stats_.alloc_misses);
528 
529     FillBucket(bucket_index);
530 
531     // Very unlikely, means that the central allocator is out of memory. Let it
532     // deal with it (may return 0, may crash).
533     if (PA_UNLIKELY(!bucket.freelist_head)) {
534       return 0;
535     }
536   }
537 
538   PA_DCHECK(bucket.count != 0);
539   internal::PartitionFreelistEntry* entry = bucket.freelist_head;
540   // TODO(lizeb): Consider removing once crbug.com/1382658 is fixed.
541 #if BUILDFLAG(IS_CHROMEOS) && defined(ARCH_CPU_X86_64) && \
542     BUILDFLAG(HAS_64_BIT_POINTERS)
543   // x86_64 architecture now supports 57 bits of address space, as of Ice Lake
544   // for Intel. However Chrome OS systems do not ship with kernel support for
545   // it, but with 48 bits, so all canonical addresses have the upper 16 bits
546   // zeroed (17 in practice, since the upper half of address space is reserved
547   // by the kernel).
548   constexpr uintptr_t kCanonicalPointerMask = (1ULL << 48) - 1;
549   PA_CHECK(!(reinterpret_cast<uintptr_t>(entry) & ~kCanonicalPointerMask));
550 #endif  // BUILDFLAG(IS_CHROMEOS) && defined(ARCH_CPU_X86_64) &&
551         // BUILDFLAG(HAS_64_BIT_POINTERS)
552 
553   // Passes the bucket size to |GetNext()|, so that in case of freelist
554   // corruption, we know the bucket size that lead to the crash, helping to
555   // narrow down the search for culprit. |bucket| was touched just now, so this
556   // does not introduce another cache miss.
557   internal::PartitionFreelistEntry* next =
558       entry->GetNextForThreadCache<true>(bucket.slot_size);
559   PA_DCHECK(entry != next);
560   bucket.count--;
561   PA_DCHECK(bucket.count != 0 || !next);
562   bucket.freelist_head = next;
563   *slot_size = bucket.slot_size;
564 
565   PA_DCHECK(cached_memory_ >= bucket.slot_size);
566   cached_memory_ -= bucket.slot_size;
567 
568   return internal::SlotStartPtr2Addr(entry);
569 }
570 
PutInBucket(Bucket & bucket,uintptr_t slot_start)571 PA_ALWAYS_INLINE void ThreadCache::PutInBucket(Bucket& bucket,
572                                                uintptr_t slot_start) {
573 #if PA_CONFIG(HAS_FREELIST_SHADOW_ENTRY) && defined(ARCH_CPU_X86_64) && \
574     BUILDFLAG(HAS_64_BIT_POINTERS)
575   // We see freelist corruption crashes happening in the wild.  These are likely
576   // due to out-of-bounds accesses in the previous slot, or to a Use-After-Free
577   // somewhere in the code.
578   //
579   // The issue is that we detect the UaF far away from the place where it
580   // happens. As a consequence, we should try to make incorrect code crash as
581   // early as possible. Poisoning memory at free() time works for UaF, but it
582   // was seen in the past to incur a high performance cost.
583   //
584   // Here, only poison the current cacheline, which we are touching anyway.
585   // TODO(lizeb): Make sure this does not hurt performance.
586 
587   // Everything below requires this alignment.
588   static_assert(internal::kAlignment == 16, "");
589 
590   // The pointer is always 16 bytes aligned, so its start address is always == 0
591   // % 16. Its distance to the next cacheline is
592   //   `64 - ((slot_start & 63) / 16) * 16`
593   static_assert(
594       internal::kPartitionCachelineSize == 64,
595       "The computation below assumes that cache lines are 64 bytes long.");
596   int distance_to_next_cacheline_in_16_bytes = 4 - ((slot_start >> 4) & 3);
597   int slot_size_remaining_in_16_bytes =
598 #if BUILDFLAG(PUT_REF_COUNT_IN_PREVIOUS_SLOT)
599       // When BRP is on in the "previous slot" mode, this slot may have a BRP
600       // ref-count of the next, potentially allocated slot. Make sure we don't
601       // overwrite it.
602       (bucket.slot_size - sizeof(PartitionRefCount)) / 16;
603 #else
604       bucket.slot_size / 16;
605 #endif  // BUILDFLAG(PUT_REF_COUNT_IN_PREVIOUS_SLOT)
606 
607   slot_size_remaining_in_16_bytes = std::min(
608       slot_size_remaining_in_16_bytes, distance_to_next_cacheline_in_16_bytes);
609 
610   static const uint32_t poison_16_bytes[4] = {0xbadbad00, 0xbadbad00,
611                                               0xbadbad00, 0xbadbad00};
612   // Give a hint to the compiler in hope it'll vectorize the loop.
613 #if PA_HAS_BUILTIN(__builtin_assume_aligned)
614   void* slot_start_tagged = __builtin_assume_aligned(
615       internal::SlotStartAddr2Ptr(slot_start), internal::kAlignment);
616 #else
617   void* slot_start_tagged = internal::SlotStartAddr2Ptr(slot_start);
618 #endif
619   uint32_t* address_aligned = static_cast<uint32_t*>(slot_start_tagged);
620   for (int i = 0; i < slot_size_remaining_in_16_bytes; i++) {
621     // Clang will expand the memcpy to a 16-byte write (movups on x86).
622     memcpy(address_aligned, poison_16_bytes, sizeof(poison_16_bytes));
623     address_aligned += 4;
624   }
625 #endif  // PA_CONFIG(HAS_FREELIST_SHADOW_ENTRY) && defined(ARCH_CPU_X86_64) &&
626         // BUILDFLAG(HAS_64_BIT_POINTERS)
627 
628   auto* entry = internal::PartitionFreelistEntry::EmplaceAndInitForThreadCache(
629       slot_start, bucket.freelist_head);
630   bucket.freelist_head = entry;
631   bucket.count++;
632 }
633 
RecordAllocation(size_t size)634 PA_ALWAYS_INLINE void ThreadCache::RecordAllocation(size_t size) {
635   thread_alloc_stats_.alloc_count++;
636   thread_alloc_stats_.alloc_total_size += size;
637 }
638 
RecordDeallocation(size_t size)639 PA_ALWAYS_INLINE void ThreadCache::RecordDeallocation(size_t size) {
640   thread_alloc_stats_.dealloc_count++;
641   thread_alloc_stats_.dealloc_total_size += size;
642 }
643 
644 }  // namespace partition_alloc
645 
646 #endif  // BASE_ALLOCATOR_PARTITION_ALLOCATOR_THREAD_CACHE_H_
647