• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "partition_alloc/partition_root.h"
6 
7 #include <cstdint>
8 
9 #include "build/build_config.h"
10 #include "partition_alloc/freeslot_bitmap.h"
11 #include "partition_alloc/oom.h"
12 #include "partition_alloc/page_allocator.h"
13 #include "partition_alloc/partition_address_space.h"
14 #include "partition_alloc/partition_alloc-inl.h"
15 #include "partition_alloc/partition_alloc_base/bits.h"
16 #include "partition_alloc/partition_alloc_base/compiler_specific.h"
17 #include "partition_alloc/partition_alloc_base/component_export.h"
18 #include "partition_alloc/partition_alloc_base/debug/debugging_buildflags.h"
19 #include "partition_alloc/partition_alloc_base/thread_annotations.h"
20 #include "partition_alloc/partition_alloc_buildflags.h"
21 #include "partition_alloc/partition_alloc_check.h"
22 #include "partition_alloc/partition_alloc_config.h"
23 #include "partition_alloc/partition_alloc_constants.h"
24 #include "partition_alloc/partition_bucket.h"
25 #include "partition_alloc/partition_cookie.h"
26 #include "partition_alloc/partition_oom.h"
27 #include "partition_alloc/partition_page.h"
28 #include "partition_alloc/partition_ref_count.h"
29 #include "partition_alloc/reservation_offset_table.h"
30 #include "partition_alloc/tagging.h"
31 #include "partition_alloc/thread_isolation/thread_isolation.h"
32 
33 #if BUILDFLAG(IS_MAC)
34 #include "partition_alloc/partition_alloc_base/mac/mac_util.h"
35 #endif
36 
37 #if BUILDFLAG(USE_STARSCAN)
38 #include "partition_alloc/starscan/pcscan.h"
39 #endif
40 
41 #if !BUILDFLAG(HAS_64_BIT_POINTERS)
42 #include "partition_alloc/address_pool_manager_bitmap.h"
43 #endif
44 
45 #if BUILDFLAG(IS_WIN)
46 #include <windows.h>
47 #include "wow64apiset.h"
48 #endif
49 
50 #if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
51 #include <pthread.h>
52 #endif
53 
54 namespace partition_alloc::internal {
55 
56 #if BUILDFLAG(RECORD_ALLOC_INFO)
57 // Even if this is not hidden behind a BUILDFLAG, it should not use any memory
58 // when recording is disabled, since it ends up in the .bss section.
59 AllocInfo g_allocs = {};
60 
RecordAllocOrFree(uintptr_t addr,size_t size)61 void RecordAllocOrFree(uintptr_t addr, size_t size) {
62   g_allocs.allocs[g_allocs.index.fetch_add(1, std::memory_order_relaxed) %
63                   kAllocInfoSize] = {addr, size};
64 }
65 #endif  // BUILDFLAG(RECORD_ALLOC_INFO)
66 
67 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
IsPtrWithinSameAlloc(uintptr_t orig_address,uintptr_t test_address,size_t type_size)68 PtrPosWithinAlloc IsPtrWithinSameAlloc(uintptr_t orig_address,
69                                        uintptr_t test_address,
70                                        size_t type_size) {
71   // Required for pointers right past an allocation. See
72   // |PartitionAllocGetSlotStartInBRPPool()|.
73   uintptr_t adjusted_address =
74       orig_address - kPartitionPastAllocationAdjustment;
75   PA_DCHECK(IsManagedByNormalBucketsOrDirectMap(adjusted_address));
76   DCheckIfManagedByPartitionAllocBRPPool(adjusted_address);
77 
78   uintptr_t slot_start = PartitionAllocGetSlotStartInBRPPool(adjusted_address);
79   // Don't use |adjusted_address| beyond this point at all. It was needed to
80   // pick the right slot, but now we're dealing with very concrete addresses.
81   // Zero it just in case, to catch errors.
82   adjusted_address = 0;
83 
84   auto* slot_span = SlotSpanMetadata::FromSlotStart(slot_start);
85   auto* root = PartitionRoot::FromSlotSpan(slot_span);
86   // Double check that ref-count is indeed present.
87   PA_DCHECK(root->brp_enabled());
88 
89   uintptr_t object_addr = root->SlotStartToObjectAddr(slot_start);
90   uintptr_t object_end = object_addr + root->GetSlotUsableSize(slot_span);
91   if (test_address < object_addr || object_end < test_address) {
92     return PtrPosWithinAlloc::kFarOOB;
93 #if BUILDFLAG(BACKUP_REF_PTR_POISON_OOB_PTR)
94   } else if (object_end - type_size < test_address) {
95     // Not even a single element of the type referenced by the pointer can fit
96     // between the pointer and the end of the object.
97     return PtrPosWithinAlloc::kAllocEnd;
98 #endif
99   } else {
100     return PtrPosWithinAlloc::kInBounds;
101   }
102 }
103 #endif  // BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
104 
105 }  // namespace partition_alloc::internal
106 
107 namespace partition_alloc {
108 
109 #if PA_CONFIG(USE_PARTITION_ROOT_ENUMERATOR)
110 
111 namespace {
112 internal::Lock g_root_enumerator_lock;
113 }
114 
GetEnumeratorLock()115 internal::Lock& PartitionRoot::GetEnumeratorLock() {
116   return g_root_enumerator_lock;
117 }
118 
119 namespace internal {
120 
121 class PartitionRootEnumerator {
122  public:
123   using EnumerateCallback = void (*)(PartitionRoot* root, bool in_child);
124   enum EnumerateOrder {
125     kNormal,
126     kReverse,
127   };
128 
Instance()129   static PartitionRootEnumerator& Instance() {
130     static PartitionRootEnumerator instance;
131     return instance;
132   }
133 
Enumerate(EnumerateCallback callback,bool in_child,EnumerateOrder order)134   void Enumerate(EnumerateCallback callback,
135                  bool in_child,
136                  EnumerateOrder order) PA_NO_THREAD_SAFETY_ANALYSIS {
137     if (order == kNormal) {
138       PartitionRoot* root;
139       for (root = Head(partition_roots_); root != nullptr;
140            root = root->next_root) {
141         callback(root, in_child);
142       }
143     } else {
144       PA_DCHECK(order == kReverse);
145       PartitionRoot* root;
146       for (root = Tail(partition_roots_); root != nullptr;
147            root = root->prev_root) {
148         callback(root, in_child);
149       }
150     }
151   }
152 
Register(PartitionRoot * root)153   void Register(PartitionRoot* root) {
154     internal::ScopedGuard guard(PartitionRoot::GetEnumeratorLock());
155     root->next_root = partition_roots_;
156     root->prev_root = nullptr;
157     if (partition_roots_) {
158       partition_roots_->prev_root = root;
159     }
160     partition_roots_ = root;
161   }
162 
Unregister(PartitionRoot * root)163   void Unregister(PartitionRoot* root) {
164     internal::ScopedGuard guard(PartitionRoot::GetEnumeratorLock());
165     PartitionRoot* prev = root->prev_root;
166     PartitionRoot* next = root->next_root;
167     if (prev) {
168       PA_DCHECK(prev->next_root == root);
169       prev->next_root = next;
170     } else {
171       PA_DCHECK(partition_roots_ == root);
172       partition_roots_ = next;
173     }
174     if (next) {
175       PA_DCHECK(next->prev_root == root);
176       next->prev_root = prev;
177     }
178     root->next_root = nullptr;
179     root->prev_root = nullptr;
180   }
181 
182  private:
183   constexpr PartitionRootEnumerator() = default;
184 
Head(PartitionRoot * roots)185   PartitionRoot* Head(PartitionRoot* roots) { return roots; }
186 
Tail(PartitionRoot * roots)187   PartitionRoot* Tail(PartitionRoot* roots) PA_NO_THREAD_SAFETY_ANALYSIS {
188     if (!roots) {
189       return nullptr;
190     }
191     PartitionRoot* node = roots;
192     for (; node->next_root != nullptr; node = node->next_root)
193       ;
194     return node;
195   }
196 
197   PartitionRoot* partition_roots_
198       PA_GUARDED_BY(PartitionRoot::GetEnumeratorLock()) = nullptr;
199 };
200 
201 }  // namespace internal
202 
203 #endif  // PA_USE_PARTITION_ROOT_ENUMERATOR
204 
205 #if BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
206 
207 namespace {
208 
209 #if PA_CONFIG(HAS_ATFORK_HANDLER)
210 
LockRoot(PartitionRoot * root,bool)211 void LockRoot(PartitionRoot* root, bool) PA_NO_THREAD_SAFETY_ANALYSIS {
212   PA_DCHECK(root);
213   internal::PartitionRootLock(root).Acquire();
214 }
215 
216 // PA_NO_THREAD_SAFETY_ANALYSIS: acquires the lock and doesn't release it, by
217 // design.
BeforeForkInParent()218 void BeforeForkInParent() PA_NO_THREAD_SAFETY_ANALYSIS {
219   // PartitionRoot::GetLock() is private. So use
220   // g_root_enumerator_lock here.
221   g_root_enumerator_lock.Acquire();
222   internal::PartitionRootEnumerator::Instance().Enumerate(
223       LockRoot, false,
224       internal::PartitionRootEnumerator::EnumerateOrder::kNormal);
225 
226   ThreadCacheRegistry::GetLock().Acquire();
227 }
228 
229 template <typename T>
UnlockOrReinit(T & lock,bool in_child)230 void UnlockOrReinit(T& lock, bool in_child) PA_NO_THREAD_SAFETY_ANALYSIS {
231   // Only re-init the locks in the child process, in the parent can unlock
232   // normally.
233   if (in_child) {
234     lock.Reinit();
235   } else {
236     lock.Release();
237   }
238 }
239 
UnlockOrReinitRoot(PartitionRoot * root,bool in_child)240 void UnlockOrReinitRoot(PartitionRoot* root,
241                         bool in_child) PA_NO_THREAD_SAFETY_ANALYSIS {
242   UnlockOrReinit(internal::PartitionRootLock(root), in_child);
243 }
244 
ReleaseLocks(bool in_child)245 void ReleaseLocks(bool in_child) PA_NO_THREAD_SAFETY_ANALYSIS {
246   // In reverse order, even though there are no lock ordering dependencies.
247   UnlockOrReinit(ThreadCacheRegistry::GetLock(), in_child);
248   internal::PartitionRootEnumerator::Instance().Enumerate(
249       UnlockOrReinitRoot, in_child,
250       internal::PartitionRootEnumerator::EnumerateOrder::kReverse);
251 
252   // PartitionRoot::GetLock() is private. So use
253   // g_root_enumerator_lock here.
254   UnlockOrReinit(g_root_enumerator_lock, in_child);
255 }
256 
AfterForkInParent()257 void AfterForkInParent() {
258   ReleaseLocks(/* in_child = */ false);
259 }
260 
AfterForkInChild()261 void AfterForkInChild() {
262   ReleaseLocks(/* in_child = */ true);
263   // Unsafe, as noted in the name. This is fine here however, since at this
264   // point there is only one thread, this one (unless another post-fork()
265   // handler created a thread, but it would have needed to allocate, which would
266   // have deadlocked the process already).
267   //
268   // If we don't reclaim this memory, it is lost forever. Note that this is only
269   // really an issue if we fork() a multi-threaded process without calling
270   // exec() right away, which is discouraged.
271   ThreadCacheRegistry::Instance().ForcePurgeAllThreadAfterForkUnsafe();
272 }
273 #endif  // PA_CONFIG(HAS_ATFORK_HANDLER)
274 
275 std::atomic<bool> g_global_init_called;
PartitionAllocMallocInitOnce()276 void PartitionAllocMallocInitOnce() {
277   bool expected = false;
278   // No need to block execution for potential concurrent initialization, merely
279   // want to make sure this is only called once.
280   if (!g_global_init_called.compare_exchange_strong(expected, true)) {
281     return;
282   }
283 
284 #if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
285   // When fork() is called, only the current thread continues to execute in the
286   // child process. If the lock is held, but *not* by this thread when fork() is
287   // called, we have a deadlock.
288   //
289   // The "solution" here is to acquire the lock on the forking thread before
290   // fork(), and keep it held until fork() is done, in the parent and the
291   // child. To clean up memory, we also must empty the thread caches in the
292   // child, which is easier, since no threads except for the current one are
293   // running right after the fork().
294   //
295   // This is not perfect though, since:
296   // - Multiple pre/post-fork() handlers can be registered, they are then run in
297   //   LIFO order for the pre-fork handler, and FIFO order for the post-fork
298   //   one. So unless we are the first to register a handler, if another handler
299   //   allocates, then we deterministically deadlock.
300   // - pthread handlers are *not* called when the application calls clone()
301   //   directly, which is what Chrome does to launch processes.
302   //
303   // However, no perfect solution really exists to make threads + fork()
304   // cooperate, but deadlocks are real (and fork() is used in DEATH_TEST()s),
305   // and other malloc() implementations use the same techniques.
306   int err =
307       pthread_atfork(BeforeForkInParent, AfterForkInParent, AfterForkInChild);
308   PA_CHECK(err == 0);
309 #endif  // BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
310 }
311 
312 }  // namespace
313 
314 #if BUILDFLAG(IS_APPLE)
PartitionAllocMallocHookOnBeforeForkInParent()315 void PartitionAllocMallocHookOnBeforeForkInParent() {
316   BeforeForkInParent();
317 }
318 
PartitionAllocMallocHookOnAfterForkInParent()319 void PartitionAllocMallocHookOnAfterForkInParent() {
320   AfterForkInParent();
321 }
322 
PartitionAllocMallocHookOnAfterForkInChild()323 void PartitionAllocMallocHookOnAfterForkInChild() {
324   AfterForkInChild();
325 }
326 #endif  // BUILDFLAG(IS_APPLE)
327 
328 #endif  // BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
329 
330 namespace internal {
331 
332 namespace {
333 // 64 was chosen arbitrarily, as it seems like a reasonable trade-off between
334 // performance and purging opportunity. Higher value (i.e. smaller slots)
335 // wouldn't necessarily increase chances of purging, but would result in
336 // more work and larger |slot_usage| array. Lower value would probably decrease
337 // chances of purging. Not empirically tested.
338 constexpr size_t kMaxPurgeableSlotsPerSystemPage = 64;
339 PA_ALWAYS_INLINE PAGE_ALLOCATOR_CONSTANTS_DECLARE_CONSTEXPR size_t
MinPurgeableSlotSize()340 MinPurgeableSlotSize() {
341   return SystemPageSize() / kMaxPurgeableSlotsPerSystemPage;
342 }
343 }  // namespace
344 
345 // The function attempts to unprovision unused slots and discard unused pages.
346 // It may also "straighten" the free list.
347 //
348 // If `accounting_only` is set to true, no action is performed and the function
349 // merely returns the number of bytes in the would-be discarded pages.
PartitionPurgeSlotSpan(PartitionRoot * root,internal::SlotSpanMetadata * slot_span,bool accounting_only)350 static size_t PartitionPurgeSlotSpan(PartitionRoot* root,
351                                      internal::SlotSpanMetadata* slot_span,
352                                      bool accounting_only)
353     PA_EXCLUSIVE_LOCKS_REQUIRED(internal::PartitionRootLock(root)) {
354   const internal::PartitionBucket* bucket = slot_span->bucket;
355   size_t slot_size = bucket->slot_size;
356 
357   if (slot_size < MinPurgeableSlotSize() || !slot_span->num_allocated_slots) {
358     return 0;
359   }
360 
361   size_t bucket_num_slots = bucket->get_slots_per_span();
362   size_t discardable_bytes = 0;
363 
364   if (slot_span->CanStoreRawSize()) {
365     uint32_t utilized_slot_size = static_cast<uint32_t>(
366         RoundUpToSystemPage(slot_span->GetUtilizedSlotSize()));
367     discardable_bytes = bucket->slot_size - utilized_slot_size;
368     if (discardable_bytes && !accounting_only) {
369       uintptr_t slot_span_start =
370           internal::SlotSpanMetadata::ToSlotSpanStart(slot_span);
371       uintptr_t committed_data_end = slot_span_start + utilized_slot_size;
372       ScopedSyscallTimer timer{root};
373       DiscardSystemPages(committed_data_end, discardable_bytes);
374     }
375     return discardable_bytes;
376   }
377 
378 #if defined(PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR)
379   constexpr size_t kMaxSlotCount =
380       (PartitionPageSize() * kMaxPartitionPagesPerRegularSlotSpan) /
381       MinPurgeableSlotSize();
382 #elif BUILDFLAG(IS_APPLE) || (BUILDFLAG(IS_LINUX) && defined(ARCH_CPU_ARM64))
383   // It's better for slot_usage to be stack-allocated and fixed-size, which
384   // demands that its size be constexpr. On IS_APPLE and Linux on arm64,
385   // PartitionPageSize() is always SystemPageSize() << 2, so regardless of
386   // what the run time page size is, kMaxSlotCount can always be simplified
387   // to this expression.
388   constexpr size_t kMaxSlotCount =
389       4 * kMaxPurgeableSlotsPerSystemPage *
390       internal::kMaxPartitionPagesPerRegularSlotSpan;
391   PA_CHECK(kMaxSlotCount == (PartitionPageSize() *
392                              internal::kMaxPartitionPagesPerRegularSlotSpan) /
393                                 MinPurgeableSlotSize());
394 #endif
395   PA_DCHECK(bucket_num_slots <= kMaxSlotCount);
396   PA_DCHECK(slot_span->num_unprovisioned_slots < bucket_num_slots);
397   size_t num_provisioned_slots =
398       bucket_num_slots - slot_span->num_unprovisioned_slots;
399   char slot_usage[kMaxSlotCount];
400 #if !BUILDFLAG(IS_WIN)
401   // The last freelist entry should not be discarded when using OS_WIN.
402   // DiscardVirtualMemory makes the contents of discarded memory undefined.
403   size_t last_slot = static_cast<size_t>(-1);
404 #endif
405   memset(slot_usage, 1, num_provisioned_slots);
406   uintptr_t slot_span_start = SlotSpanMetadata::ToSlotSpanStart(slot_span);
407   // First, walk the freelist for this slot span and make a bitmap of which
408   // slots are not in use.
409   for (EncodedNextFreelistEntry* entry = slot_span->get_freelist_head(); entry;
410        entry = entry->GetNext(slot_size)) {
411     size_t slot_number =
412         bucket->GetSlotNumber(SlotStartPtr2Addr(entry) - slot_span_start);
413     PA_DCHECK(slot_number < num_provisioned_slots);
414     slot_usage[slot_number] = 0;
415 #if !BUILDFLAG(IS_WIN)
416     // If we have a slot where the encoded next pointer is 0, we can actually
417     // discard that entry because touching a discarded page is guaranteed to
418     // return the original content or 0. (Note that this optimization won't be
419     // effective on big-endian machines because the masking function is
420     // negation.)
421     if (entry->IsEncodedNextPtrZero()) {
422       last_slot = slot_number;
423     }
424 #endif
425   }
426 
427   // If the slot(s) at the end of the slot span are not in use, we can truncate
428   // them entirely and rewrite the freelist.
429   size_t truncated_slots = 0;
430   while (!slot_usage[num_provisioned_slots - 1]) {
431     truncated_slots++;
432     num_provisioned_slots--;
433     PA_DCHECK(num_provisioned_slots);
434   }
435   // First, do the work of calculating the discardable bytes. Don't actually
436   // discard anything if `accounting_only` is set.
437   size_t unprovisioned_bytes = 0;
438   uintptr_t begin_addr = slot_span_start + (num_provisioned_slots * slot_size);
439   uintptr_t end_addr = begin_addr + (slot_size * truncated_slots);
440   if (truncated_slots) {
441     // The slots that do not contain discarded pages should not be included to
442     // |truncated_slots|. Detects those slots and fixes |truncated_slots| and
443     // |num_provisioned_slots| accordingly.
444     uintptr_t rounded_up_truncatation_begin_addr =
445         RoundUpToSystemPage(begin_addr);
446     while (begin_addr + slot_size <= rounded_up_truncatation_begin_addr) {
447       begin_addr += slot_size;
448       PA_DCHECK(truncated_slots);
449       --truncated_slots;
450       ++num_provisioned_slots;
451     }
452     begin_addr = rounded_up_truncatation_begin_addr;
453 
454     // We round the end address here up and not down because we're at the end of
455     // a slot span, so we "own" all the way up the page boundary.
456     end_addr = RoundUpToSystemPage(end_addr);
457     PA_DCHECK(end_addr <= slot_span_start + bucket->get_bytes_per_span());
458     if (begin_addr < end_addr) {
459       unprovisioned_bytes = end_addr - begin_addr;
460       discardable_bytes += unprovisioned_bytes;
461     }
462   }
463 
464   // If `accounting_only` isn't set, then take action to remove unprovisioned
465   // slots from the free list (if any) and "straighten" the list (if
466   // requested) to help reduce fragmentation in the future. Then
467   // discard/decommit the pages hosting the unprovisioned slots.
468   if (!accounting_only) {
469     auto straighten_mode =
470         PartitionRoot::GetStraightenLargerSlotSpanFreeListsMode();
471     bool straighten =
472         straighten_mode == StraightenLargerSlotSpanFreeListsMode::kAlways ||
473         (straighten_mode ==
474              StraightenLargerSlotSpanFreeListsMode::kOnlyWhenUnprovisioning &&
475          unprovisioned_bytes);
476 
477     PA_DCHECK((unprovisioned_bytes > 0) == (truncated_slots > 0));
478     size_t new_unprovisioned_slots =
479         truncated_slots + slot_span->num_unprovisioned_slots;
480     PA_DCHECK(new_unprovisioned_slots <= bucket->get_slots_per_span());
481     slot_span->num_unprovisioned_slots = new_unprovisioned_slots;
482 
483     size_t num_new_freelist_entries = 0;
484     internal::EncodedNextFreelistEntry* back = nullptr;
485     if (straighten) {
486       // Rewrite the freelist to "straighten" it. This achieves two things:
487       // getting rid of unprovisioned entries, ordering etnries based on how
488       // close they're to the slot span start. This reduces chances of
489       // allocating further slots, in hope that we'll get some unused pages at
490       // the end of the span that can be unprovisioned, thus reducing
491       // fragmentation.
492       for (size_t slot_index = 0; slot_index < num_provisioned_slots;
493            ++slot_index) {
494         if (slot_usage[slot_index]) {
495           continue;
496         }
497         // Add the slot to the end of the list. The most proper thing to do
498         // would be to null-terminate the new entry with:
499         //   auto* entry = EncodedNextFreelistEntry::EmplaceAndInitNull(
500         //       slot_span_start + (slot_size * slot_index));
501         // But no need to do this, as it's last-ness is likely temporary, and
502         // the next iteration's back->SetNext(), or the post-loop
503         // EncodedNextFreelistEntry::EmplaceAndInitNull(back) will override it
504         // anyway.
505         auto* entry = static_cast<EncodedNextFreelistEntry*>(
506             SlotStartAddr2Ptr(slot_span_start + (slot_size * slot_index)));
507         if (num_new_freelist_entries) {
508           back->SetNext(entry);
509         } else {
510           slot_span->SetFreelistHead(entry);
511         }
512         back = entry;
513         num_new_freelist_entries++;
514       }
515     } else if (unprovisioned_bytes) {
516       // If there are any unprovisioned entries, scan the list to remove them,
517       // without "straightening" it.
518       uintptr_t first_unprovisioned_slot =
519           slot_span_start + (num_provisioned_slots * slot_size);
520       bool skipped = false;
521       for (EncodedNextFreelistEntry* entry = slot_span->get_freelist_head();
522            entry; entry = entry->GetNext(slot_size)) {
523         uintptr_t entry_addr = SlotStartPtr2Addr(entry);
524         if (entry_addr >= first_unprovisioned_slot) {
525           skipped = true;
526           continue;
527         }
528         // If the last visited entry was skipped (due to being unprovisioned),
529         // update the next pointer of the last not skipped entry (or the head
530         // if no entry exists). Otherwise the link is already correct.
531         if (skipped) {
532           if (num_new_freelist_entries) {
533             back->SetNext(entry);
534           } else {
535             slot_span->SetFreelistHead(entry);
536           }
537           skipped = false;
538         }
539         back = entry;
540         num_new_freelist_entries++;
541       }
542     }
543     // If any of the above loops were executed, null-terminate the last entry,
544     // or the head if no entry exists.
545     if (straighten || unprovisioned_bytes) {
546       if (num_new_freelist_entries) {
547         PA_DCHECK(back);
548         EncodedNextFreelistEntry::EmplaceAndInitNull(back);
549 #if !BUILDFLAG(IS_WIN)
550         // Memorize index of the last slot in the list, as it may be able to
551         // participate in an optimization related to page discaring (below), due
552         // to its next pointer encoded as 0.
553         last_slot =
554             bucket->GetSlotNumber(SlotStartPtr2Addr(back) - slot_span_start);
555 #endif
556       } else {
557         PA_DCHECK(!back);
558         slot_span->SetFreelistHead(nullptr);
559       }
560       PA_DCHECK(num_new_freelist_entries ==
561                 num_provisioned_slots - slot_span->num_allocated_slots);
562     }
563 
564 #if BUILDFLAG(USE_FREESLOT_BITMAP)
565     FreeSlotBitmapReset(slot_span_start + (slot_size * num_provisioned_slots),
566                         end_addr, slot_size);
567 #endif
568 
569     if (unprovisioned_bytes) {
570       if (!kUseLazyCommit) {
571         // Discard the memory.
572         ScopedSyscallTimer timer{root};
573         DiscardSystemPages(begin_addr, unprovisioned_bytes);
574       } else {
575         // See crbug.com/1431606 to understand the detail. LazyCommit depends
576         // on the design: both used slots and unused slots (=in the freelist)
577         // are committed. However this removes the unused slots from the
578         // freelist. So if using DiscardSystemPages() here, PartitionAlloc may
579         // commit the system pages which has been already committed again.
580         // This will make commited_size and max_committed_size metrics wrong.
581         // PA should use DecommitSystemPagesForData() instead.
582         root->DecommitSystemPagesForData(
583             begin_addr, unprovisioned_bytes,
584             PageAccessibilityDisposition::kAllowKeepForPerf);
585       }
586     }
587   }
588 
589   if (slot_size < SystemPageSize()) {
590     // Returns here because implementing the following steps for smaller slot
591     // size will need a complicated logic and make the code messy.
592     return discardable_bytes;
593   }
594 
595   // Next, walk the slots and for any not in use, consider which system pages
596   // are no longer needed. We can discard any system pages back to the system as
597   // long as we don't interfere with a freelist pointer or an adjacent used
598   // slot. Note they'll be automatically paged back in when touched, and
599   // zero-initialized (except Windows).
600   for (size_t i = 0; i < num_provisioned_slots; ++i) {
601     if (slot_usage[i]) {
602       continue;
603     }
604 
605     // The first address we can safely discard is just after the freelist
606     // pointer. There's one optimization opportunity: if the freelist pointer is
607     // encoded as 0, we can discard that pointer value too (except on
608     // Windows).
609     begin_addr = slot_span_start + (i * slot_size);
610     end_addr = begin_addr + slot_size;
611     bool can_discard_free_list_pointer = false;
612 #if !BUILDFLAG(IS_WIN)
613     if (i != last_slot) {
614       begin_addr += sizeof(internal::EncodedNextFreelistEntry);
615     } else {
616       can_discard_free_list_pointer = true;
617     }
618 #else
619     begin_addr += sizeof(internal::EncodedNextFreelistEntry);
620 #endif
621 
622     uintptr_t rounded_up_begin_addr = RoundUpToSystemPage(begin_addr);
623     uintptr_t rounded_down_begin_addr = RoundDownToSystemPage(begin_addr);
624     end_addr = RoundDownToSystemPage(end_addr);
625 
626     // |rounded_up_begin_addr| could be greater than |end_addr| only if slot
627     // size was less than system page size, or if free list pointer crossed the
628     // page boundary. Neither is possible here.
629     PA_DCHECK(rounded_up_begin_addr <= end_addr);
630 
631     if (rounded_down_begin_addr < rounded_up_begin_addr && i != 0 &&
632         !slot_usage[i - 1] && can_discard_free_list_pointer) {
633       // This slot contains a partial page in the beginning. The rest of that
634       // page is contained in the slot[i-1], which is also discardable.
635       // Therefore we can discard this page.
636       begin_addr = rounded_down_begin_addr;
637     } else {
638       begin_addr = rounded_up_begin_addr;
639     }
640 
641     if (begin_addr < end_addr) {
642       size_t partial_slot_bytes = end_addr - begin_addr;
643       discardable_bytes += partial_slot_bytes;
644       if (!accounting_only) {
645         // Discard the pages. But don't be tempted to decommit it (as done
646         // above), because here we're getting rid of provisioned pages amidst
647         // used pages, so we're relying on them to materialize automatically
648         // when the virtual address is accessed, so the mapping needs to be
649         // intact.
650         ScopedSyscallTimer timer{root};
651         DiscardSystemPages(begin_addr, partial_slot_bytes);
652       }
653     }
654   }
655 
656   return discardable_bytes;
657 }
658 
PartitionPurgeBucket(PartitionRoot * root,internal::PartitionBucket * bucket)659 static void PartitionPurgeBucket(PartitionRoot* root,
660                                  internal::PartitionBucket* bucket)
661     PA_EXCLUSIVE_LOCKS_REQUIRED(internal::PartitionRootLock(root)) {
662   if (bucket->active_slot_spans_head !=
663       internal::SlotSpanMetadata::get_sentinel_slot_span()) {
664     for (internal::SlotSpanMetadata* slot_span = bucket->active_slot_spans_head;
665          slot_span; slot_span = slot_span->next_slot_span) {
666       PA_DCHECK(slot_span !=
667                 internal::SlotSpanMetadata::get_sentinel_slot_span());
668       PartitionPurgeSlotSpan(root, slot_span, false);
669     }
670   }
671 }
672 
PartitionDumpSlotSpanStats(PartitionBucketMemoryStats * stats_out,PartitionRoot * root,internal::SlotSpanMetadata * slot_span)673 static void PartitionDumpSlotSpanStats(PartitionBucketMemoryStats* stats_out,
674                                        PartitionRoot* root,
675                                        internal::SlotSpanMetadata* slot_span)
676     PA_EXCLUSIVE_LOCKS_REQUIRED(internal::PartitionRootLock(root)) {
677   uint16_t bucket_num_slots = slot_span->bucket->get_slots_per_span();
678 
679   if (slot_span->is_decommitted()) {
680     ++stats_out->num_decommitted_slot_spans;
681     return;
682   }
683 
684   stats_out->discardable_bytes += PartitionPurgeSlotSpan(root, slot_span, true);
685 
686   if (slot_span->CanStoreRawSize()) {
687     stats_out->active_bytes += static_cast<uint32_t>(slot_span->GetRawSize());
688   } else {
689     stats_out->active_bytes +=
690         (slot_span->num_allocated_slots * stats_out->bucket_slot_size);
691   }
692   stats_out->active_count += slot_span->num_allocated_slots;
693 
694   size_t slot_span_bytes_resident = RoundUpToSystemPage(
695       (bucket_num_slots - slot_span->num_unprovisioned_slots) *
696       stats_out->bucket_slot_size);
697   stats_out->resident_bytes += slot_span_bytes_resident;
698   if (slot_span->is_empty()) {
699     stats_out->decommittable_bytes += slot_span_bytes_resident;
700     ++stats_out->num_empty_slot_spans;
701   } else if (slot_span->is_full()) {
702     ++stats_out->num_full_slot_spans;
703   } else {
704     PA_DCHECK(slot_span->is_active());
705     ++stats_out->num_active_slot_spans;
706   }
707 }
708 
PartitionDumpBucketStats(PartitionBucketMemoryStats * stats_out,PartitionRoot * root,const internal::PartitionBucket * bucket)709 static void PartitionDumpBucketStats(PartitionBucketMemoryStats* stats_out,
710                                      PartitionRoot* root,
711                                      const internal::PartitionBucket* bucket)
712     PA_EXCLUSIVE_LOCKS_REQUIRED(internal::PartitionRootLock(root)) {
713   PA_DCHECK(!bucket->is_direct_mapped());
714   stats_out->is_valid = false;
715   // If the active slot span list is empty (==
716   // internal::SlotSpanMetadata::get_sentinel_slot_span()), the bucket might
717   // still need to be reported if it has a list of empty, decommitted or full
718   // slot spans.
719   if (bucket->active_slot_spans_head ==
720           internal::SlotSpanMetadata::get_sentinel_slot_span() &&
721       !bucket->empty_slot_spans_head && !bucket->decommitted_slot_spans_head &&
722       !bucket->num_full_slot_spans) {
723     return;
724   }
725 
726   memset(stats_out, '\0', sizeof(*stats_out));
727   stats_out->is_valid = true;
728   stats_out->is_direct_map = false;
729   stats_out->num_full_slot_spans =
730       static_cast<size_t>(bucket->num_full_slot_spans);
731   stats_out->bucket_slot_size = bucket->slot_size;
732   uint16_t bucket_num_slots = bucket->get_slots_per_span();
733   size_t bucket_useful_storage = stats_out->bucket_slot_size * bucket_num_slots;
734   stats_out->allocated_slot_span_size = bucket->get_bytes_per_span();
735   stats_out->active_bytes = bucket->num_full_slot_spans * bucket_useful_storage;
736   stats_out->active_count = bucket->num_full_slot_spans * bucket_num_slots;
737   stats_out->resident_bytes =
738       bucket->num_full_slot_spans * stats_out->allocated_slot_span_size;
739 
740   for (internal::SlotSpanMetadata* slot_span = bucket->empty_slot_spans_head;
741        slot_span; slot_span = slot_span->next_slot_span) {
742     PA_DCHECK(slot_span->is_empty() || slot_span->is_decommitted());
743     PartitionDumpSlotSpanStats(stats_out, root, slot_span);
744   }
745   for (internal::SlotSpanMetadata* slot_span =
746            bucket->decommitted_slot_spans_head;
747        slot_span; slot_span = slot_span->next_slot_span) {
748     PA_DCHECK(slot_span->is_decommitted());
749     PartitionDumpSlotSpanStats(stats_out, root, slot_span);
750   }
751 
752   if (bucket->active_slot_spans_head !=
753       internal::SlotSpanMetadata::get_sentinel_slot_span()) {
754     for (internal::SlotSpanMetadata* slot_span = bucket->active_slot_spans_head;
755          slot_span; slot_span = slot_span->next_slot_span) {
756       PA_DCHECK(slot_span !=
757                 internal::SlotSpanMetadata::get_sentinel_slot_span());
758       PartitionDumpSlotSpanStats(stats_out, root, slot_span);
759     }
760   }
761 }
762 
763 #if BUILDFLAG(PA_DCHECK_IS_ON)
DCheckIfManagedByPartitionAllocBRPPool(uintptr_t address)764 void DCheckIfManagedByPartitionAllocBRPPool(uintptr_t address) {
765   PA_DCHECK(IsManagedByPartitionAllocBRPPool(address));
766 }
767 #endif
768 
769 #if BUILDFLAG(ENABLE_THREAD_ISOLATION)
PartitionAllocThreadIsolationInit(ThreadIsolationOption thread_isolation)770 void PartitionAllocThreadIsolationInit(ThreadIsolationOption thread_isolation) {
771 #if BUILDFLAG(PA_DCHECK_IS_ON)
772   ThreadIsolationSettings::settings.enabled = true;
773 #endif
774   PartitionAddressSpace::InitThreadIsolatedPool(thread_isolation);
775   // Call WriteProtectThreadIsolatedGlobals last since we might not have write
776   // permissions to to globals afterwards.
777   WriteProtectThreadIsolatedGlobals(thread_isolation);
778 }
779 #endif  // BUILDFLAG(ENABLE_THREAD_ISOLATION)
780 
781 }  // namespace internal
782 
OutOfMemory(size_t size)783 [[noreturn]] PA_NOINLINE void PartitionRoot::OutOfMemory(size_t size) {
784   const size_t virtual_address_space_size =
785       total_size_of_super_pages.load(std::memory_order_relaxed) +
786       total_size_of_direct_mapped_pages.load(std::memory_order_relaxed);
787 #if !defined(ARCH_CPU_64_BITS)
788   const size_t uncommitted_size =
789       virtual_address_space_size -
790       total_size_of_committed_pages.load(std::memory_order_relaxed);
791 
792   // Check whether this OOM is due to a lot of super pages that are allocated
793   // but not committed, probably due to http://crbug.com/421387.
794   if (uncommitted_size > internal::kReasonableSizeOfUnusedPages) {
795     internal::PartitionOutOfMemoryWithLotsOfUncommitedPages(size);
796   }
797 
798 #if BUILDFLAG(IS_WIN)
799   // If true then we are running on 64-bit Windows.
800   BOOL is_wow_64 = FALSE;
801   // Intentionally ignoring failures.
802   IsWow64Process(GetCurrentProcess(), &is_wow_64);
803   // 32-bit address space on Windows is typically either 2 GiB (on 32-bit
804   // Windows) or 4 GiB (on 64-bit Windows). 2.8 and 1.0 GiB are just rough
805   // guesses as to how much address space PA can consume (note that code,
806   // stacks, and other allocators will also consume address space).
807   const size_t kReasonableVirtualSize = (is_wow_64 ? 2800 : 1024) * 1024 * 1024;
808   // Make it obvious whether we are running on 64-bit Windows.
809   PA_DEBUG_DATA_ON_STACK("iswow64", static_cast<size_t>(is_wow_64));
810 #else
811   constexpr size_t kReasonableVirtualSize =
812       // 1.5GiB elsewhere, since address space is typically 3GiB.
813       (1024 + 512) * 1024 * 1024;
814 #endif
815   if (virtual_address_space_size > kReasonableVirtualSize) {
816     internal::PartitionOutOfMemoryWithLargeVirtualSize(
817         virtual_address_space_size);
818   }
819 #endif  // #if !defined(ARCH_CPU_64_BITS)
820 
821   // Out of memory can be due to multiple causes, such as:
822   // - Out of virtual address space in the desired pool
823   // - Out of commit due to either our process, or another one
824   // - Excessive allocations in the current process
825   //
826   // Saving these values make it easier to distinguish between these. See the
827   // documentation in PA_CONFIG(DEBUG_DATA_ON_STACK) on how to get these from
828   // minidumps.
829   PA_DEBUG_DATA_ON_STACK("va_size", virtual_address_space_size);
830   PA_DEBUG_DATA_ON_STACK("alloc", get_total_size_of_allocated_bytes());
831   PA_DEBUG_DATA_ON_STACK("commit", get_total_size_of_committed_pages());
832   PA_DEBUG_DATA_ON_STACK("size", size);
833 
834   if (internal::g_oom_handling_function) {
835     (*internal::g_oom_handling_function)(size);
836   }
837   OOM_CRASH(size);
838 }
839 
DecommitEmptySlotSpans()840 void PartitionRoot::DecommitEmptySlotSpans() {
841   ShrinkEmptySlotSpansRing(0);
842   // Just decommitted everything, and holding the lock, should be exactly 0.
843   PA_DCHECK(empty_slot_spans_dirty_bytes == 0);
844 }
845 
DestructForTesting()846 void PartitionRoot::DestructForTesting() {
847   // We need to destruct the thread cache before we unreserve any of the super
848   // pages below, which we currently are not doing. So, we should only call
849   // this function on PartitionRoots without a thread cache.
850   PA_CHECK(!settings.with_thread_cache);
851   auto pool_handle = ChoosePool();
852 #if BUILDFLAG(ENABLE_THREAD_ISOLATION)
853   // The pages managed by thread isolated pool will be free-ed at
854   // UninitThreadIsolatedForTesting(). Don't invoke FreePages() for the pages.
855   if (pool_handle == internal::kThreadIsolatedPoolHandle) {
856     return;
857   }
858   PA_DCHECK(pool_handle < internal::kNumPools);
859 #else
860   PA_DCHECK(pool_handle <= internal::kNumPools);
861 #endif
862 
863   auto* curr = first_extent;
864   while (curr != nullptr) {
865     auto* next = curr->next;
866     uintptr_t address = SuperPagesBeginFromExtent(curr);
867     size_t size =
868         internal::kSuperPageSize * curr->number_of_consecutive_super_pages;
869 #if !BUILDFLAG(HAS_64_BIT_POINTERS)
870     internal::AddressPoolManager::GetInstance().MarkUnused(pool_handle, address,
871                                                            size);
872 #endif
873     internal::AddressPoolManager::GetInstance().UnreserveAndDecommit(
874         pool_handle, address, size);
875     curr = next;
876   }
877 }
878 
879 #if PA_CONFIG(ENABLE_MAC11_MALLOC_SIZE_HACK)
InitMac11MallocSizeHackUsableSize(size_t ref_count_size)880 void PartitionRoot::InitMac11MallocSizeHackUsableSize(size_t ref_count_size) {
881   settings.mac11_malloc_size_hack_enabled_ = true;
882 
883   // 0 means reserve just enough extras to fit PartitionRefCount.
884   if (!ref_count_size) {
885     ref_count_size = sizeof(internal::PartitionRefCount);
886   }
887   // Request of 32B will fall into a 48B bucket in the presence of BRP
888   // ref-count, yielding |48 - ref_count_size| of actual usable space.
889   settings.mac11_malloc_size_hack_usable_size_ = 48 - ref_count_size;
890 }
891 
EnableMac11MallocSizeHackForTesting(size_t ref_count_size)892 void PartitionRoot::EnableMac11MallocSizeHackForTesting(size_t ref_count_size) {
893   settings.mac11_malloc_size_hack_enabled_ = true;
894   InitMac11MallocSizeHackUsableSize(ref_count_size);
895 }
896 
EnableMac11MallocSizeHackIfNeeded(size_t ref_count_size)897 void PartitionRoot::EnableMac11MallocSizeHackIfNeeded(size_t ref_count_size) {
898   settings.mac11_malloc_size_hack_enabled_ =
899       settings.brp_enabled_ && internal::base::mac::MacOSMajorVersion() == 11;
900   if (settings.mac11_malloc_size_hack_enabled_) {
901     InitMac11MallocSizeHackUsableSize(ref_count_size);
902   }
903 }
904 #endif  // PA_CONFIG(ENABLE_MAC11_MALLOC_SIZE_HACK)
905 
906 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT) && !BUILDFLAG(HAS_64_BIT_POINTERS)
907 namespace {
908 std::atomic<bool> g_reserve_brp_guard_region_called;
909 // An address constructed by repeating `kQuarantinedByte` shouldn't never point
910 // to valid memory. Preemptively reserve a memory region around that address and
911 // make it inaccessible. Not needed for 64-bit platforms where the address is
912 // guaranteed to be non-canonical. Safe to call multiple times.
ReserveBackupRefPtrGuardRegionIfNeeded()913 void ReserveBackupRefPtrGuardRegionIfNeeded() {
914   bool expected = false;
915   // No need to block execution for potential concurrent initialization, merely
916   // want to make sure this is only called once.
917   if (!g_reserve_brp_guard_region_called.compare_exchange_strong(expected,
918                                                                  true)) {
919     return;
920   }
921 
922   size_t alignment = internal::PageAllocationGranularity();
923   uintptr_t requested_address;
924   memset(&requested_address, internal::kQuarantinedByte,
925          sizeof(requested_address));
926   requested_address = RoundDownToPageAllocationGranularity(requested_address);
927 
928   // Request several pages so that even unreasonably large C++ objects stay
929   // within the inaccessible region. If some of the pages can't be reserved,
930   // it's still preferable to try and reserve the rest.
931   for (size_t i = 0; i < 4; ++i) {
932     [[maybe_unused]] uintptr_t allocated_address =
933         AllocPages(requested_address, alignment, alignment,
934                    PageAccessibilityConfiguration(
935                        PageAccessibilityConfiguration::kInaccessible),
936                    PageTag::kPartitionAlloc);
937     requested_address += alignment;
938   }
939 }
940 }  // namespace
941 #endif  // BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT) &&
942         // !BUILDFLAG(HAS_64_BIT_POINTERS)
943 
Init(PartitionOptions opts)944 void PartitionRoot::Init(PartitionOptions opts) {
945   {
946 #if BUILDFLAG(IS_APPLE)
947     // Needed to statically bound page size, which is a runtime constant on
948     // apple OSes.
949     PA_CHECK((internal::SystemPageSize() == (size_t{1} << 12)) ||
950              (internal::SystemPageSize() == (size_t{1} << 14)));
951 #elif BUILDFLAG(IS_LINUX) && defined(ARCH_CPU_ARM64)
952     // Check runtime pagesize. Though the code is currently the same, it is
953     // not merged with the IS_APPLE case above as a 1 << 16 case needs to be
954     // added here in the future, to allow 64 kiB pagesize. That is only
955     // supported on Linux on arm64, not on IS_APPLE, but not yet present here
956     // as the rest of the partition allocator does not currently support it.
957     PA_CHECK((internal::SystemPageSize() == (size_t{1} << 12)) ||
958              (internal::SystemPageSize() == (size_t{1} << 14)));
959 #endif
960 
961     ::partition_alloc::internal::ScopedGuard guard{lock_};
962     if (initialized) {
963       return;
964     }
965 
966 #if BUILDFLAG(HAS_64_BIT_POINTERS)
967     // Reserve address space for partition alloc.
968     internal::PartitionAddressSpace::Init();
969 #endif
970 
971 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT) && !BUILDFLAG(HAS_64_BIT_POINTERS)
972     ReserveBackupRefPtrGuardRegionIfNeeded();
973 #endif
974 
975     settings.allow_aligned_alloc =
976         opts.aligned_alloc == PartitionOptions::kAllowed;
977 #if BUILDFLAG(PA_DCHECK_IS_ON)
978     settings.use_cookie = true;
979 #else
980     static_assert(!Settings::use_cookie);
981 #endif  // BUILDFLAG(PA_DCHECK_IS_ON)
982 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
983     settings.brp_enabled_ = opts.backup_ref_ptr == PartitionOptions::kEnabled;
984 #if PA_CONFIG(ENABLE_MAC11_MALLOC_SIZE_HACK)
985     EnableMac11MallocSizeHackIfNeeded(opts.ref_count_size);
986 #endif
987 #else   // BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
988     PA_CHECK(opts.backup_ref_ptr == PartitionOptions::kDisabled);
989 #endif  // BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
990     settings.use_configurable_pool =
991         (opts.use_configurable_pool == PartitionOptions::kAllowed) &&
992         IsConfigurablePoolAvailable();
993     PA_DCHECK(!settings.use_configurable_pool || IsConfigurablePoolAvailable());
994     settings.zapping_by_free_flags =
995         opts.zapping_by_free_flags == PartitionOptions::kEnabled;
996 #if PA_CONFIG(HAS_MEMORY_TAGGING)
997     settings.memory_tagging_enabled_ =
998         opts.memory_tagging.enabled == PartitionOptions::kEnabled;
999     // Memory tagging is not supported in the configurable pool because MTE
1000     // stores tagging information in the high bits of the pointer, it causes
1001     // issues with components like V8's ArrayBuffers which use custom pointer
1002     // representations. All custom representations encountered so far rely on an
1003     // "is in configurable pool?" check, so we use that as a proxy.
1004     PA_CHECK(!settings.memory_tagging_enabled_ ||
1005              !settings.use_configurable_pool);
1006 
1007     settings.memory_tagging_reporting_mode_ =
1008         opts.memory_tagging.reporting_mode;
1009 #endif  // PA_CONFIG(HAS_MEMORY_TAGGING)
1010 
1011     // brp_enabled() is not supported in the configurable pool because
1012     // BRP requires objects to be in a different Pool.
1013     PA_CHECK(!(settings.use_configurable_pool && brp_enabled()));
1014 
1015 #if BUILDFLAG(ENABLE_THREAD_ISOLATION)
1016     // BRP and thread isolated mode use different pools, so they can't be
1017     // enabled at the same time.
1018     PA_CHECK(!opts.thread_isolation.enabled ||
1019              opts.backup_ref_ptr == PartitionOptions::kDisabled);
1020     settings.thread_isolation = opts.thread_isolation;
1021 #endif  // BUILDFLAG(ENABLE_THREAD_ISOLATION)
1022 
1023     // Ref-count messes up alignment needed for AlignedAlloc, making this
1024     // option incompatible. However, except in the
1025     // PUT_REF_COUNT_IN_PREVIOUS_SLOT case.
1026 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT) && \
1027     !BUILDFLAG(PUT_REF_COUNT_IN_PREVIOUS_SLOT)
1028     PA_CHECK(!settings.allow_aligned_alloc || !settings.brp_enabled_);
1029 #endif
1030 
1031 #if PA_CONFIG(EXTRAS_REQUIRED)
1032     settings.extras_size = 0;
1033     settings.extras_offset = 0;
1034 
1035     if (settings.use_cookie) {
1036       settings.extras_size += internal::kPartitionCookieSizeAdjustment;
1037     }
1038 
1039     if (brp_enabled()) {
1040       // TODO(tasak): In the PUT_REF_COUNT_IN_PREVIOUS_SLOT case, ref-count is
1041       // stored out-of-line for single-slot slot spans, so no need to
1042       // add/subtract its size in this case.
1043       size_t ref_count_size = opts.ref_count_size;
1044       if (!ref_count_size) {
1045         ref_count_size = internal::kPartitionRefCountSizeAdjustment;
1046       }
1047       ref_count_size = internal::AlignUpRefCountSizeForMac(ref_count_size);
1048 #if PA_CONFIG(INCREASE_REF_COUNT_SIZE_FOR_MTE)
1049       if (IsMemoryTaggingEnabled()) {
1050         ref_count_size = internal::base::bits::AlignUp(
1051             ref_count_size, internal::kMemTagGranuleSize);
1052       }
1053       settings.ref_count_size = ref_count_size;
1054 #endif  // PA_CONFIG(INCREASE_REF_COUNT_SIZE_FOR_MTE)
1055       PA_CHECK(internal::kPartitionRefCountSizeAdjustment <= ref_count_size);
1056       settings.extras_size += ref_count_size;
1057       settings.extras_offset += internal::kPartitionRefCountOffsetAdjustment;
1058     }
1059 #endif  // PA_CONFIG(EXTRAS_REQUIRED)
1060 
1061     // Re-confirm the above PA_CHECKs, by making sure there are no
1062     // pre-allocation extras when AlignedAlloc is allowed. Post-allocation
1063     // extras are ok.
1064     PA_CHECK(!settings.allow_aligned_alloc || !settings.extras_offset);
1065 
1066     settings.quarantine_mode =
1067 #if BUILDFLAG(USE_STARSCAN)
1068         (opts.star_scan_quarantine == PartitionOptions::kDisallowed
1069              ? QuarantineMode::kAlwaysDisabled
1070              : QuarantineMode::kDisabledByDefault);
1071 #else
1072         QuarantineMode::kAlwaysDisabled;
1073 #endif  // BUILDFLAG(USE_STARSCAN)
1074 
1075     // We mark the sentinel slot span as free to make sure it is skipped by our
1076     // logic to find a new active slot span.
1077     memset(&sentinel_bucket, 0, sizeof(sentinel_bucket));
1078     sentinel_bucket.active_slot_spans_head =
1079         SlotSpan::get_sentinel_slot_span_non_const();
1080 
1081     // This is a "magic" value so we can test if a root pointer is valid.
1082     inverted_self = ~reinterpret_cast<uintptr_t>(this);
1083 
1084     // Set up the actual usable buckets first.
1085     constexpr internal::BucketIndexLookup lookup{};
1086     size_t bucket_index = 0;
1087     while (lookup.bucket_sizes()[bucket_index] !=
1088            internal::kInvalidBucketSize) {
1089       buckets[bucket_index].Init(lookup.bucket_sizes()[bucket_index]);
1090       bucket_index++;
1091     }
1092     PA_DCHECK(bucket_index < internal::kNumBuckets);
1093 
1094     // Remaining buckets are not usable, and not real.
1095     for (size_t index = bucket_index; index < internal::kNumBuckets; index++) {
1096       // Cannot init with size 0 since it computes 1 / size, but make sure the
1097       // bucket is invalid.
1098       buckets[index].Init(internal::kInvalidBucketSize);
1099       buckets[index].active_slot_spans_head = nullptr;
1100       PA_DCHECK(!buckets[index].is_valid());
1101     }
1102 
1103 #if !PA_CONFIG(THREAD_CACHE_SUPPORTED)
1104     // TLS in ThreadCache not supported on other OSes.
1105     settings.with_thread_cache = false;
1106 #else
1107     ThreadCache::EnsureThreadSpecificDataInitialized();
1108     settings.with_thread_cache =
1109         (opts.thread_cache == PartitionOptions::kEnabled);
1110 
1111     if (settings.with_thread_cache) {
1112       ThreadCache::Init(this);
1113     }
1114 #endif  // !PA_CONFIG(THREAD_CACHE_SUPPORTED)
1115 
1116 #if PA_CONFIG(USE_PARTITION_ROOT_ENUMERATOR)
1117     internal::PartitionRootEnumerator::Instance().Register(this);
1118 #endif
1119 
1120     initialized = true;
1121   }
1122 
1123   // Called without the lock, might allocate.
1124 #if BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
1125   PartitionAllocMallocInitOnce();
1126 #endif
1127 
1128 #if BUILDFLAG(ENABLE_THREAD_ISOLATION)
1129   if (settings.thread_isolation.enabled) {
1130     internal::PartitionAllocThreadIsolationInit(settings.thread_isolation);
1131   }
1132 #endif
1133 }
1134 
1135 PartitionRoot::Settings::Settings() = default;
1136 
PartitionRoot()1137 PartitionRoot::PartitionRoot()
1138     : scheduler_loop_quarantine_root(*this),
1139       scheduler_loop_quarantine(
1140           scheduler_loop_quarantine_root
1141               .CreateBranch<internal::SchedulerLoopQuarantineBranch::
1142                                 kQuarantineCapacityCount>()) {}
1143 
PartitionRoot(PartitionOptions opts)1144 PartitionRoot::PartitionRoot(PartitionOptions opts)
1145     : scheduler_loop_quarantine_root(
1146           *this,
1147           opts.scheduler_loop_quarantine_capacity_in_bytes),
1148       scheduler_loop_quarantine(
1149           scheduler_loop_quarantine_root
1150               .CreateBranch<internal::SchedulerLoopQuarantineBranch::
1151                                 kQuarantineCapacityCount>()) {
1152   Init(opts);
1153 }
1154 
~PartitionRoot()1155 PartitionRoot::~PartitionRoot() {
1156 #if BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
1157   PA_CHECK(!settings.with_thread_cache)
1158       << "Must not destroy a partition with a thread cache";
1159 #endif  // BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
1160 
1161 #if PA_CONFIG(USE_PARTITION_ROOT_ENUMERATOR)
1162   if (initialized) {
1163     internal::PartitionRootEnumerator::Instance().Unregister(this);
1164   }
1165 #endif  // PA_CONFIG(USE_PARTITION_ALLOC_ENUMERATOR)
1166 }
1167 
EnableThreadCacheIfSupported()1168 void PartitionRoot::EnableThreadCacheIfSupported() {
1169 #if PA_CONFIG(THREAD_CACHE_SUPPORTED)
1170   ::partition_alloc::internal::ScopedGuard guard{lock_};
1171   PA_CHECK(!settings.with_thread_cache);
1172   // By the time we get there, there may be multiple threads created in the
1173   // process. Since `with_thread_cache` is accessed without a lock, it can
1174   // become visible to another thread before the effects of
1175   // `internal::ThreadCacheInit()` are visible. To prevent that, we fake thread
1176   // cache creation being in-progress while this is running.
1177   //
1178   // This synchronizes with the acquire load in `MaybeInitThreadCacheAndAlloc()`
1179   // to ensure that we don't create (and thus use) a ThreadCache before
1180   // ThreadCache::Init()'s effects are visible.
1181   int before =
1182       thread_caches_being_constructed_.fetch_add(1, std::memory_order_acquire);
1183   PA_CHECK(before == 0);
1184   ThreadCache::Init(this);
1185   thread_caches_being_constructed_.fetch_sub(1, std::memory_order_release);
1186   settings.with_thread_cache = true;
1187 #endif  // PA_CONFIG(THREAD_CACHE_SUPPORTED)
1188 }
1189 
TryReallocInPlaceForDirectMap(internal::SlotSpanMetadata * slot_span,size_t requested_size)1190 bool PartitionRoot::TryReallocInPlaceForDirectMap(
1191     internal::SlotSpanMetadata* slot_span,
1192     size_t requested_size) {
1193   PA_DCHECK(slot_span->bucket->is_direct_mapped());
1194   // Slot-span metadata isn't MTE-tagged.
1195   PA_DCHECK(
1196       internal::IsManagedByDirectMap(reinterpret_cast<uintptr_t>(slot_span)));
1197 
1198   size_t raw_size = AdjustSizeForExtrasAdd(requested_size);
1199   auto* extent = DirectMapExtent::FromSlotSpan(slot_span);
1200   size_t current_reservation_size = extent->reservation_size;
1201   // Calculate the new reservation size the way PartitionDirectMap() would, but
1202   // skip the alignment, because this call isn't requesting it.
1203   size_t new_reservation_size = GetDirectMapReservationSize(raw_size);
1204 
1205   // If new reservation would be larger, there is nothing we can do to
1206   // reallocate in-place.
1207   if (new_reservation_size > current_reservation_size) {
1208     return false;
1209   }
1210 
1211   // Don't reallocate in-place if new reservation size would be less than 80 %
1212   // of the current one, to avoid holding on to too much unused address space.
1213   // Make this check before comparing slot sizes, as even with equal or similar
1214   // slot sizes we can save a lot if the original allocation was heavily padded
1215   // for alignment.
1216   if ((new_reservation_size >> internal::SystemPageShift()) * 5 <
1217       (current_reservation_size >> internal::SystemPageShift()) * 4) {
1218     return false;
1219   }
1220 
1221   // Note that the new size isn't a bucketed size; this function is called
1222   // whenever we're reallocating a direct mapped allocation, so calculate it
1223   // the way PartitionDirectMap() would.
1224   size_t new_slot_size = GetDirectMapSlotSize(raw_size);
1225   if (new_slot_size < internal::kMinDirectMappedDownsize) {
1226     return false;
1227   }
1228 
1229   // Past this point, we decided we'll attempt to reallocate without relocating,
1230   // so we have to honor the padding for alignment in front of the original
1231   // allocation, even though this function isn't requesting any alignment.
1232 
1233   // bucket->slot_size is the currently committed size of the allocation.
1234   size_t current_slot_size = slot_span->bucket->slot_size;
1235   size_t current_usable_size = GetSlotUsableSize(slot_span);
1236   uintptr_t slot_start = SlotSpan::ToSlotSpanStart(slot_span);
1237   // This is the available part of the reservation up to which the new
1238   // allocation can grow.
1239   size_t available_reservation_size =
1240       current_reservation_size - extent->padding_for_alignment -
1241       PartitionRoot::GetDirectMapMetadataAndGuardPagesSize();
1242 #if BUILDFLAG(PA_DCHECK_IS_ON)
1243   uintptr_t reservation_start = slot_start & internal::kSuperPageBaseMask;
1244   PA_DCHECK(internal::IsReservationStart(reservation_start));
1245   PA_DCHECK(slot_start + available_reservation_size ==
1246             reservation_start + current_reservation_size -
1247                 GetDirectMapMetadataAndGuardPagesSize() +
1248                 internal::PartitionPageSize());
1249 #endif
1250 
1251   PA_DCHECK(new_slot_size > internal::kMaxMemoryTaggingSize);
1252   if (new_slot_size == current_slot_size) {
1253     // No need to move any memory around, but update size and cookie below.
1254     // That's because raw_size may have changed.
1255   } else if (new_slot_size < current_slot_size) {
1256     // Shrink by decommitting unneeded pages and making them inaccessible.
1257     size_t decommit_size = current_slot_size - new_slot_size;
1258     DecommitSystemPagesForData(slot_start + new_slot_size, decommit_size,
1259                                PageAccessibilityDisposition::kRequireUpdate);
1260     // Since the decommited system pages are still reserved, we don't need to
1261     // change the entries for decommitted pages in the reservation offset table.
1262   } else if (new_slot_size <= available_reservation_size) {
1263     // Grow within the actually reserved address space. Just need to make the
1264     // pages accessible again.
1265     size_t recommit_slot_size_growth = new_slot_size - current_slot_size;
1266     // Direct map never uses tagging, as size is always >kMaxMemoryTaggingSize.
1267     RecommitSystemPagesForData(
1268         slot_start + current_slot_size, recommit_slot_size_growth,
1269         PageAccessibilityDisposition::kRequireUpdate, false);
1270     // The recommited system pages had been already reserved and all the
1271     // entries in the reservation offset table (for entire reservation_size
1272     // region) have been already initialized.
1273 
1274 #if BUILDFLAG(PA_DCHECK_IS_ON)
1275     memset(reinterpret_cast<void*>(slot_start + current_slot_size),
1276            internal::kUninitializedByte, recommit_slot_size_growth);
1277 #endif
1278   } else {
1279     // We can't perform the realloc in-place.
1280     // TODO: support this too when possible.
1281     return false;
1282   }
1283 
1284   DecreaseTotalSizeOfAllocatedBytes(reinterpret_cast<uintptr_t>(slot_span),
1285                                     slot_span->bucket->slot_size);
1286   slot_span->SetRawSize(raw_size);
1287   slot_span->bucket->slot_size = new_slot_size;
1288   IncreaseTotalSizeOfAllocatedBytes(reinterpret_cast<uintptr_t>(slot_span),
1289                                     slot_span->bucket->slot_size, raw_size);
1290 
1291   // Always record in-place realloc() as free()+malloc() pair.
1292   //
1293   // The early returns above (`return false`) will fall back to free()+malloc(),
1294   // so this is consistent.
1295   auto* thread_cache = GetOrCreateThreadCache();
1296   if (ThreadCache::IsValid(thread_cache)) {
1297     thread_cache->RecordDeallocation(current_usable_size);
1298     thread_cache->RecordAllocation(GetSlotUsableSize(slot_span));
1299   }
1300 
1301   // Write a new trailing cookie.
1302   if (settings.use_cookie) {
1303     auto* object = static_cast<unsigned char*>(SlotStartToObject(slot_start));
1304     internal::PartitionCookieWriteValue(object + GetSlotUsableSize(slot_span));
1305   }
1306 
1307   return true;
1308 }
1309 
TryReallocInPlaceForNormalBuckets(void * object,SlotSpan * slot_span,size_t new_size)1310 bool PartitionRoot::TryReallocInPlaceForNormalBuckets(void* object,
1311                                                       SlotSpan* slot_span,
1312                                                       size_t new_size) {
1313   uintptr_t slot_start = ObjectToSlotStart(object);
1314   PA_DCHECK(internal::IsManagedByNormalBuckets(slot_start));
1315 
1316   // TODO: note that tcmalloc will "ignore" a downsizing realloc() unless the
1317   // new size is a significant percentage smaller. We could do the same if we
1318   // determine it is a win.
1319   if (AllocationCapacityFromRequestedSize(new_size) !=
1320       AllocationCapacityFromSlotStart(slot_start)) {
1321     return false;
1322   }
1323   size_t current_usable_size = GetSlotUsableSize(slot_span);
1324 
1325   // Trying to allocate |new_size| would use the same amount of underlying
1326   // memory as we're already using, so re-use the allocation after updating
1327   // statistics (and cookie, if present).
1328   if (slot_span->CanStoreRawSize()) {
1329 #if BUILDFLAG(PUT_REF_COUNT_IN_PREVIOUS_SLOT) && BUILDFLAG(PA_DCHECK_IS_ON)
1330     internal::PartitionRefCount* old_ref_count = nullptr;
1331     if (brp_enabled()) {
1332       old_ref_count = internal::PartitionRefCountPointer(slot_start);
1333     }
1334 #endif  // BUILDFLAG(PUT_REF_COUNT_IN_PREVIOUS_SLOT) &&
1335         // BUILDFLAG(PA_DCHECK_IS_ON)
1336     size_t new_raw_size = AdjustSizeForExtrasAdd(new_size);
1337     slot_span->SetRawSize(new_raw_size);
1338 #if BUILDFLAG(PUT_REF_COUNT_IN_PREVIOUS_SLOT) && BUILDFLAG(PA_DCHECK_IS_ON)
1339     if (brp_enabled()) {
1340       internal::PartitionRefCount* new_ref_count =
1341           internal::PartitionRefCountPointer(slot_start);
1342       PA_DCHECK(new_ref_count == old_ref_count);
1343     }
1344 #endif  // BUILDFLAG(PUT_REF_COUNT_IN_PREVIOUS_SLOT) &&
1345         // BUILDFLAG(PA_DCHECK_IS_ON)
1346     // Write a new trailing cookie only when it is possible to keep track
1347     // raw size (otherwise we wouldn't know where to look for it later).
1348     if (settings.use_cookie) {
1349       internal::PartitionCookieWriteValue(static_cast<unsigned char*>(object) +
1350                                           GetSlotUsableSize(slot_span));
1351     }
1352   }
1353 
1354   // Always record a realloc() as a free() + malloc(), even if it's in
1355   // place. When we cannot do it in place (`return false` above), the allocator
1356   // falls back to free()+malloc(), so this is consistent.
1357   ThreadCache* thread_cache = GetOrCreateThreadCache();
1358   if (PA_LIKELY(ThreadCache::IsValid(thread_cache))) {
1359     thread_cache->RecordDeallocation(current_usable_size);
1360     thread_cache->RecordAllocation(GetSlotUsableSize(slot_span));
1361   }
1362 
1363   return object;
1364 }
1365 
PurgeMemory(int flags)1366 void PartitionRoot::PurgeMemory(int flags) {
1367   {
1368     ::partition_alloc::internal::ScopedGuard guard{
1369         internal::PartitionRootLock(this)};
1370 #if BUILDFLAG(USE_STARSCAN)
1371     // Avoid purging if there is PCScan task currently scheduled. Since pcscan
1372     // takes snapshot of all allocated pages, decommitting pages here (even
1373     // under the lock) is racy.
1374     // TODO(bikineev): Consider rescheduling the purging after PCScan.
1375     if (PCScan::IsInProgress()) {
1376       return;
1377     }
1378 #endif  // BUILDFLAG(USE_STARSCAN)
1379 
1380     if (flags & PurgeFlags::kDecommitEmptySlotSpans) {
1381       DecommitEmptySlotSpans();
1382     }
1383     if (flags & PurgeFlags::kDiscardUnusedSystemPages) {
1384       for (Bucket& bucket : buckets) {
1385         if (bucket.slot_size == internal::kInvalidBucketSize) {
1386           continue;
1387         }
1388 
1389         if (bucket.slot_size >= internal::MinPurgeableSlotSize()) {
1390           internal::PartitionPurgeBucket(this, &bucket);
1391         } else {
1392           if (sort_smaller_slot_span_free_lists_) {
1393             bucket.SortSmallerSlotSpanFreeLists();
1394           }
1395         }
1396 
1397         // Do it at the end, as the actions above change the status of slot
1398         // spans (e.g. empty -> decommitted).
1399         bucket.MaintainActiveList();
1400 
1401         if (sort_active_slot_spans_) {
1402           bucket.SortActiveSlotSpans();
1403         }
1404       }
1405     }
1406   }
1407 }
1408 
ShrinkEmptySlotSpansRing(size_t limit)1409 void PartitionRoot::ShrinkEmptySlotSpansRing(size_t limit) {
1410   int16_t index = global_empty_slot_span_ring_index;
1411   int16_t starting_index = index;
1412   while (empty_slot_spans_dirty_bytes > limit) {
1413     SlotSpan* slot_span = global_empty_slot_span_ring[index];
1414     // The ring is not always full, may be nullptr.
1415     if (slot_span) {
1416       slot_span->DecommitIfPossible(this);
1417       global_empty_slot_span_ring[index] = nullptr;
1418     }
1419     index += 1;
1420     // Walk through the entirety of possible slots, even though the last ones
1421     // are unused, if global_empty_slot_span_ring_size is smaller than
1422     // kMaxFreeableSpans. It's simpler, and does not cost anything, since all
1423     // the pointers are going to be nullptr.
1424     if (index == internal::kMaxFreeableSpans) {
1425       index = 0;
1426     }
1427 
1428     // Went around the whole ring, since this is locked,
1429     // empty_slot_spans_dirty_bytes should be exactly 0.
1430     if (index == starting_index) {
1431       PA_DCHECK(empty_slot_spans_dirty_bytes == 0);
1432       // Metrics issue, don't crash, return.
1433       break;
1434     }
1435   }
1436 }
1437 
DumpStats(const char * partition_name,bool is_light_dump,PartitionStatsDumper * dumper)1438 void PartitionRoot::DumpStats(const char* partition_name,
1439                               bool is_light_dump,
1440                               PartitionStatsDumper* dumper) {
1441   static const size_t kMaxReportableDirectMaps = 4096;
1442   // Allocate on the heap rather than on the stack to avoid stack overflow
1443   // skirmishes (on Windows, in particular). Allocate before locking below,
1444   // otherwise when PartitionAlloc is malloc() we get reentrancy issues. This
1445   // inflates reported values a bit for detailed dumps though, by 16kiB.
1446   std::unique_ptr<uint32_t[]> direct_map_lengths;
1447   if (!is_light_dump) {
1448     direct_map_lengths =
1449         std::unique_ptr<uint32_t[]>(new uint32_t[kMaxReportableDirectMaps]);
1450   }
1451   PartitionBucketMemoryStats bucket_stats[internal::kNumBuckets];
1452   size_t num_direct_mapped_allocations = 0;
1453   PartitionMemoryStats stats = {};
1454 
1455   stats.syscall_count = syscall_count.load(std::memory_order_relaxed);
1456   stats.syscall_total_time_ns =
1457       syscall_total_time_ns.load(std::memory_order_relaxed);
1458 
1459   // Collect data with the lock held, cannot allocate or call third-party code
1460   // below.
1461   {
1462     ::partition_alloc::internal::ScopedGuard guard{
1463         internal::PartitionRootLock(this)};
1464     PA_DCHECK(total_size_of_allocated_bytes <= max_size_of_allocated_bytes);
1465 
1466     stats.total_mmapped_bytes =
1467         total_size_of_super_pages.load(std::memory_order_relaxed) +
1468         total_size_of_direct_mapped_pages.load(std::memory_order_relaxed);
1469     stats.total_committed_bytes =
1470         total_size_of_committed_pages.load(std::memory_order_relaxed);
1471     stats.max_committed_bytes =
1472         max_size_of_committed_pages.load(std::memory_order_relaxed);
1473     stats.total_allocated_bytes = total_size_of_allocated_bytes;
1474     stats.max_allocated_bytes = max_size_of_allocated_bytes;
1475 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
1476     stats.total_brp_quarantined_bytes =
1477         total_size_of_brp_quarantined_bytes.load(std::memory_order_relaxed);
1478     stats.total_brp_quarantined_count =
1479         total_count_of_brp_quarantined_slots.load(std::memory_order_relaxed);
1480     stats.cumulative_brp_quarantined_bytes =
1481         cumulative_size_of_brp_quarantined_bytes.load(
1482             std::memory_order_relaxed);
1483     stats.cumulative_brp_quarantined_count =
1484         cumulative_count_of_brp_quarantined_slots.load(
1485             std::memory_order_relaxed);
1486 #endif
1487 
1488     size_t direct_mapped_allocations_total_size = 0;
1489     for (size_t i = 0; i < internal::kNumBuckets; ++i) {
1490       const Bucket* bucket = &bucket_at(i);
1491       // Don't report the pseudo buckets that the generic allocator sets up in
1492       // order to preserve a fast size->bucket map (see
1493       // PartitionRoot::Init() for details).
1494       if (!bucket->is_valid()) {
1495         bucket_stats[i].is_valid = false;
1496       } else {
1497         internal::PartitionDumpBucketStats(&bucket_stats[i], this, bucket);
1498       }
1499       if (bucket_stats[i].is_valid) {
1500         stats.total_resident_bytes += bucket_stats[i].resident_bytes;
1501         stats.total_active_bytes += bucket_stats[i].active_bytes;
1502         stats.total_active_count += bucket_stats[i].active_count;
1503         stats.total_decommittable_bytes += bucket_stats[i].decommittable_bytes;
1504         stats.total_discardable_bytes += bucket_stats[i].discardable_bytes;
1505       }
1506     }
1507 
1508     for (DirectMapExtent* extent = direct_map_list;
1509          extent && num_direct_mapped_allocations < kMaxReportableDirectMaps;
1510          extent = extent->next_extent, ++num_direct_mapped_allocations) {
1511       PA_DCHECK(!extent->next_extent ||
1512                 extent->next_extent->prev_extent == extent);
1513       size_t slot_size = extent->bucket->slot_size;
1514       direct_mapped_allocations_total_size += slot_size;
1515       if (is_light_dump) {
1516         continue;
1517       }
1518       direct_map_lengths[num_direct_mapped_allocations] = slot_size;
1519     }
1520 
1521     stats.total_resident_bytes += direct_mapped_allocations_total_size;
1522     stats.total_active_bytes += direct_mapped_allocations_total_size;
1523     stats.total_active_count += num_direct_mapped_allocations;
1524 
1525     stats.has_thread_cache = settings.with_thread_cache;
1526     if (stats.has_thread_cache) {
1527       ThreadCacheRegistry::Instance().DumpStats(
1528           true, &stats.current_thread_cache_stats);
1529       ThreadCacheRegistry::Instance().DumpStats(false,
1530                                                 &stats.all_thread_caches_stats);
1531     }
1532   }
1533 
1534   // Do not hold the lock when calling |dumper|, as it may allocate.
1535   if (!is_light_dump) {
1536     for (auto& stat : bucket_stats) {
1537       if (stat.is_valid) {
1538         dumper->PartitionsDumpBucketStats(partition_name, &stat);
1539       }
1540     }
1541 
1542     for (size_t i = 0; i < num_direct_mapped_allocations; ++i) {
1543       uint32_t size = direct_map_lengths[i];
1544 
1545       PartitionBucketMemoryStats mapped_stats = {};
1546       mapped_stats.is_valid = true;
1547       mapped_stats.is_direct_map = true;
1548       mapped_stats.num_full_slot_spans = 1;
1549       mapped_stats.allocated_slot_span_size = size;
1550       mapped_stats.bucket_slot_size = size;
1551       mapped_stats.active_bytes = size;
1552       mapped_stats.active_count = 1;
1553       mapped_stats.resident_bytes = size;
1554       dumper->PartitionsDumpBucketStats(partition_name, &mapped_stats);
1555     }
1556   }
1557   dumper->PartitionDumpTotals(partition_name, &stats);
1558 }
1559 
1560 // static
DeleteForTesting(PartitionRoot * partition_root)1561 void PartitionRoot::DeleteForTesting(PartitionRoot* partition_root) {
1562   if (partition_root->settings.with_thread_cache) {
1563     ThreadCache::SwapForTesting(nullptr);
1564     partition_root->settings.with_thread_cache = false;
1565   }
1566 
1567   partition_root->DestructForTesting();  // IN-TEST
1568 
1569   delete partition_root;
1570 }
1571 
ResetForTesting(bool allow_leaks)1572 void PartitionRoot::ResetForTesting(bool allow_leaks) {
1573   if (settings.with_thread_cache) {
1574     ThreadCache::SwapForTesting(nullptr);
1575     settings.with_thread_cache = false;
1576   }
1577 
1578   ::partition_alloc::internal::ScopedGuard guard{
1579       internal::PartitionRootLock(this)};
1580 
1581 #if BUILDFLAG(PA_DCHECK_IS_ON)
1582   if (!allow_leaks) {
1583     unsigned num_allocated_slots = 0;
1584     for (Bucket& bucket : buckets) {
1585       if (bucket.active_slot_spans_head !=
1586           internal::SlotSpanMetadata::get_sentinel_slot_span()) {
1587         for (internal::SlotSpanMetadata* slot_span =
1588                  bucket.active_slot_spans_head;
1589              slot_span; slot_span = slot_span->next_slot_span) {
1590           num_allocated_slots += slot_span->num_allocated_slots;
1591         }
1592       }
1593       // Full slot spans are nowhere. Need to see bucket.num_full_slot_spans
1594       // to count the number of full slot spans' slots.
1595       if (bucket.num_full_slot_spans) {
1596         num_allocated_slots +=
1597             bucket.num_full_slot_spans * bucket.get_slots_per_span();
1598       }
1599     }
1600     PA_DCHECK(num_allocated_slots == 0);
1601 
1602     // Check for direct-mapped allocations.
1603     PA_DCHECK(!direct_map_list);
1604   }
1605 #endif
1606 
1607   DestructForTesting();  // IN-TEST
1608 
1609 #if PA_CONFIG(USE_PARTITION_ROOT_ENUMERATOR)
1610   if (initialized) {
1611     internal::PartitionRootEnumerator::Instance().Unregister(this);
1612   }
1613 #endif  // PA_CONFIG(USE_PARTITION_ROOT_ENUMERATOR)
1614 
1615   for (Bucket& bucket : buckets) {
1616     bucket.active_slot_spans_head =
1617         SlotSpan::get_sentinel_slot_span_non_const();
1618     bucket.empty_slot_spans_head = nullptr;
1619     bucket.decommitted_slot_spans_head = nullptr;
1620     bucket.num_full_slot_spans = 0;
1621   }
1622 
1623   next_super_page = 0;
1624   next_partition_page = 0;
1625   next_partition_page_end = 0;
1626   current_extent = nullptr;
1627   first_extent = nullptr;
1628 
1629   direct_map_list = nullptr;
1630   for (auto*& entity : global_empty_slot_span_ring) {
1631     entity = nullptr;
1632   }
1633 
1634   global_empty_slot_span_ring_index = 0;
1635   global_empty_slot_span_ring_size = internal::kDefaultEmptySlotSpanRingSize;
1636   initialized = false;
1637 }
1638 
ResetBookkeepingForTesting()1639 void PartitionRoot::ResetBookkeepingForTesting() {
1640   ::partition_alloc::internal::ScopedGuard guard{
1641       internal::PartitionRootLock(this)};
1642   max_size_of_allocated_bytes = total_size_of_allocated_bytes;
1643   max_size_of_committed_pages.store(total_size_of_committed_pages);
1644 }
1645 
MaybeInitThreadCache()1646 ThreadCache* PartitionRoot::MaybeInitThreadCache() {
1647   auto* tcache = ThreadCache::Get();
1648   // See comment in `EnableThreadCacheIfSupport()` for why this is an acquire
1649   // load.
1650   if (ThreadCache::IsTombstone(tcache) ||
1651       thread_caches_being_constructed_.load(std::memory_order_acquire)) {
1652     // Two cases:
1653     // 1. Thread is being terminated, don't try to use the thread cache, and
1654     //    don't try to resurrect it.
1655     // 2. Someone, somewhere is currently allocating a thread cache. This may
1656     //    be us, in which case we are re-entering and should not create a thread
1657     //    cache. If it is not us, then this merely delays thread cache
1658     //    construction a bit, which is not an issue.
1659     return nullptr;
1660   }
1661 
1662   // There is no per-thread ThreadCache allocated here yet, and this partition
1663   // has a thread cache, allocate a new one.
1664   //
1665   // The thread cache allocation itself will not reenter here, as it sidesteps
1666   // the thread cache by using placement new and |RawAlloc()|. However,
1667   // internally to libc, allocations may happen to create a new TLS
1668   // variable. This would end up here again, which is not what we want (and
1669   // likely is not supported by libc).
1670   //
1671   // To avoid this sort of reentrancy, increase the count of thread caches that
1672   // are currently allocating a thread cache.
1673   //
1674   // Note that there is no deadlock or data inconsistency concern, since we do
1675   // not hold the lock, and has such haven't touched any internal data.
1676   int before =
1677       thread_caches_being_constructed_.fetch_add(1, std::memory_order_relaxed);
1678   PA_CHECK(before < std::numeric_limits<int>::max());
1679   tcache = ThreadCache::Create(this);
1680   thread_caches_being_constructed_.fetch_sub(1, std::memory_order_relaxed);
1681 
1682   return tcache;
1683 }
1684 
1685 // static
SetStraightenLargerSlotSpanFreeListsMode(StraightenLargerSlotSpanFreeListsMode new_value)1686 void PartitionRoot::SetStraightenLargerSlotSpanFreeListsMode(
1687     StraightenLargerSlotSpanFreeListsMode new_value) {
1688   straighten_larger_slot_span_free_lists_ = new_value;
1689 }
1690 
1691 // static
SetSortSmallerSlotSpanFreeListsEnabled(bool new_value)1692 void PartitionRoot::SetSortSmallerSlotSpanFreeListsEnabled(bool new_value) {
1693   sort_smaller_slot_span_free_lists_ = new_value;
1694 }
1695 
1696 // static
SetSortActiveSlotSpansEnabled(bool new_value)1697 void PartitionRoot::SetSortActiveSlotSpansEnabled(bool new_value) {
1698   sort_active_slot_spans_ = new_value;
1699 }
1700 
1701 // Explicitly define common template instantiations to reduce compile time.
1702 #define EXPORT_TEMPLATE \
1703   template PA_EXPORT_TEMPLATE_DEFINE(PA_COMPONENT_EXPORT(PARTITION_ALLOC))
1704 EXPORT_TEMPLATE void* PartitionRoot::Alloc<AllocFlags::kNone>(size_t,
1705                                                               const char*);
1706 EXPORT_TEMPLATE void* PartitionRoot::Alloc<AllocFlags::kReturnNull>(
1707     size_t,
1708     const char*);
1709 EXPORT_TEMPLATE void*
1710 PartitionRoot::Realloc<AllocFlags::kNone, FreeFlags::kNone>(void*,
1711                                                             size_t,
1712                                                             const char*);
1713 EXPORT_TEMPLATE void*
1714 PartitionRoot::Realloc<AllocFlags::kReturnNull, FreeFlags::kNone>(void*,
1715                                                                   size_t,
1716                                                                   const char*);
1717 EXPORT_TEMPLATE void* PartitionRoot::AlignedAlloc<AllocFlags::kNone>(size_t,
1718                                                                      size_t);
1719 #undef EXPORT_TEMPLATE
1720 
1721 // TODO(https://crbug.com/1500662) Stop ignoring the -Winvalid-offsetof warning.
1722 #if defined(__clang__)
1723 #pragma clang diagnostic push
1724 #pragma clang diagnostic ignored "-Winvalid-offsetof"
1725 #endif
1726 static_assert(offsetof(PartitionRoot, sentinel_bucket) ==
1727                   offsetof(PartitionRoot, buckets) +
1728                       internal::kNumBuckets * sizeof(PartitionRoot::Bucket),
1729               "sentinel_bucket must be just after the regular buckets.");
1730 
1731 static_assert(
1732     offsetof(PartitionRoot, lock_) >= 64,
1733     "The lock should not be on the same cacheline as the read-mostly flags");
1734 #if defined(__clang__)
1735 #pragma clang diagnostic pop
1736 #endif
1737 
1738 }  // namespace partition_alloc
1739