1 // Copyright 2020 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "partition_alloc/partition_root.h"
6
7 #include <cstdint>
8
9 #include "build/build_config.h"
10 #include "partition_alloc/freeslot_bitmap.h"
11 #include "partition_alloc/oom.h"
12 #include "partition_alloc/page_allocator.h"
13 #include "partition_alloc/partition_address_space.h"
14 #include "partition_alloc/partition_alloc-inl.h"
15 #include "partition_alloc/partition_alloc_base/bits.h"
16 #include "partition_alloc/partition_alloc_base/compiler_specific.h"
17 #include "partition_alloc/partition_alloc_base/component_export.h"
18 #include "partition_alloc/partition_alloc_base/debug/debugging_buildflags.h"
19 #include "partition_alloc/partition_alloc_base/thread_annotations.h"
20 #include "partition_alloc/partition_alloc_buildflags.h"
21 #include "partition_alloc/partition_alloc_check.h"
22 #include "partition_alloc/partition_alloc_config.h"
23 #include "partition_alloc/partition_alloc_constants.h"
24 #include "partition_alloc/partition_bucket.h"
25 #include "partition_alloc/partition_cookie.h"
26 #include "partition_alloc/partition_oom.h"
27 #include "partition_alloc/partition_page.h"
28 #include "partition_alloc/partition_ref_count.h"
29 #include "partition_alloc/reservation_offset_table.h"
30 #include "partition_alloc/tagging.h"
31 #include "partition_alloc/thread_isolation/thread_isolation.h"
32
33 #if BUILDFLAG(IS_MAC)
34 #include "partition_alloc/partition_alloc_base/mac/mac_util.h"
35 #endif
36
37 #if BUILDFLAG(USE_STARSCAN)
38 #include "partition_alloc/starscan/pcscan.h"
39 #endif
40
41 #if !BUILDFLAG(HAS_64_BIT_POINTERS)
42 #include "partition_alloc/address_pool_manager_bitmap.h"
43 #endif
44
45 #if BUILDFLAG(IS_WIN)
46 #include <windows.h>
47 #include "wow64apiset.h"
48 #endif
49
50 #if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
51 #include <pthread.h>
52 #endif
53
54 namespace partition_alloc::internal {
55
56 #if BUILDFLAG(RECORD_ALLOC_INFO)
57 // Even if this is not hidden behind a BUILDFLAG, it should not use any memory
58 // when recording is disabled, since it ends up in the .bss section.
59 AllocInfo g_allocs = {};
60
RecordAllocOrFree(uintptr_t addr,size_t size)61 void RecordAllocOrFree(uintptr_t addr, size_t size) {
62 g_allocs.allocs[g_allocs.index.fetch_add(1, std::memory_order_relaxed) %
63 kAllocInfoSize] = {addr, size};
64 }
65 #endif // BUILDFLAG(RECORD_ALLOC_INFO)
66
67 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
IsPtrWithinSameAlloc(uintptr_t orig_address,uintptr_t test_address,size_t type_size)68 PtrPosWithinAlloc IsPtrWithinSameAlloc(uintptr_t orig_address,
69 uintptr_t test_address,
70 size_t type_size) {
71 // Required for pointers right past an allocation. See
72 // |PartitionAllocGetSlotStartInBRPPool()|.
73 uintptr_t adjusted_address =
74 orig_address - kPartitionPastAllocationAdjustment;
75 PA_DCHECK(IsManagedByNormalBucketsOrDirectMap(adjusted_address));
76 DCheckIfManagedByPartitionAllocBRPPool(adjusted_address);
77
78 uintptr_t slot_start = PartitionAllocGetSlotStartInBRPPool(adjusted_address);
79 // Don't use |adjusted_address| beyond this point at all. It was needed to
80 // pick the right slot, but now we're dealing with very concrete addresses.
81 // Zero it just in case, to catch errors.
82 adjusted_address = 0;
83
84 auto* slot_span = SlotSpanMetadata::FromSlotStart(slot_start);
85 auto* root = PartitionRoot::FromSlotSpan(slot_span);
86 // Double check that ref-count is indeed present.
87 PA_DCHECK(root->brp_enabled());
88
89 uintptr_t object_addr = root->SlotStartToObjectAddr(slot_start);
90 uintptr_t object_end = object_addr + root->GetSlotUsableSize(slot_span);
91 if (test_address < object_addr || object_end < test_address) {
92 return PtrPosWithinAlloc::kFarOOB;
93 #if BUILDFLAG(BACKUP_REF_PTR_POISON_OOB_PTR)
94 } else if (object_end - type_size < test_address) {
95 // Not even a single element of the type referenced by the pointer can fit
96 // between the pointer and the end of the object.
97 return PtrPosWithinAlloc::kAllocEnd;
98 #endif
99 } else {
100 return PtrPosWithinAlloc::kInBounds;
101 }
102 }
103 #endif // BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
104
105 } // namespace partition_alloc::internal
106
107 namespace partition_alloc {
108
109 #if PA_CONFIG(USE_PARTITION_ROOT_ENUMERATOR)
110
111 namespace {
112 internal::Lock g_root_enumerator_lock;
113 }
114
GetEnumeratorLock()115 internal::Lock& PartitionRoot::GetEnumeratorLock() {
116 return g_root_enumerator_lock;
117 }
118
119 namespace internal {
120
121 class PartitionRootEnumerator {
122 public:
123 using EnumerateCallback = void (*)(PartitionRoot* root, bool in_child);
124 enum EnumerateOrder {
125 kNormal,
126 kReverse,
127 };
128
Instance()129 static PartitionRootEnumerator& Instance() {
130 static PartitionRootEnumerator instance;
131 return instance;
132 }
133
Enumerate(EnumerateCallback callback,bool in_child,EnumerateOrder order)134 void Enumerate(EnumerateCallback callback,
135 bool in_child,
136 EnumerateOrder order) PA_NO_THREAD_SAFETY_ANALYSIS {
137 if (order == kNormal) {
138 PartitionRoot* root;
139 for (root = Head(partition_roots_); root != nullptr;
140 root = root->next_root) {
141 callback(root, in_child);
142 }
143 } else {
144 PA_DCHECK(order == kReverse);
145 PartitionRoot* root;
146 for (root = Tail(partition_roots_); root != nullptr;
147 root = root->prev_root) {
148 callback(root, in_child);
149 }
150 }
151 }
152
Register(PartitionRoot * root)153 void Register(PartitionRoot* root) {
154 internal::ScopedGuard guard(PartitionRoot::GetEnumeratorLock());
155 root->next_root = partition_roots_;
156 root->prev_root = nullptr;
157 if (partition_roots_) {
158 partition_roots_->prev_root = root;
159 }
160 partition_roots_ = root;
161 }
162
Unregister(PartitionRoot * root)163 void Unregister(PartitionRoot* root) {
164 internal::ScopedGuard guard(PartitionRoot::GetEnumeratorLock());
165 PartitionRoot* prev = root->prev_root;
166 PartitionRoot* next = root->next_root;
167 if (prev) {
168 PA_DCHECK(prev->next_root == root);
169 prev->next_root = next;
170 } else {
171 PA_DCHECK(partition_roots_ == root);
172 partition_roots_ = next;
173 }
174 if (next) {
175 PA_DCHECK(next->prev_root == root);
176 next->prev_root = prev;
177 }
178 root->next_root = nullptr;
179 root->prev_root = nullptr;
180 }
181
182 private:
183 constexpr PartitionRootEnumerator() = default;
184
Head(PartitionRoot * roots)185 PartitionRoot* Head(PartitionRoot* roots) { return roots; }
186
Tail(PartitionRoot * roots)187 PartitionRoot* Tail(PartitionRoot* roots) PA_NO_THREAD_SAFETY_ANALYSIS {
188 if (!roots) {
189 return nullptr;
190 }
191 PartitionRoot* node = roots;
192 for (; node->next_root != nullptr; node = node->next_root)
193 ;
194 return node;
195 }
196
197 PartitionRoot* partition_roots_
198 PA_GUARDED_BY(PartitionRoot::GetEnumeratorLock()) = nullptr;
199 };
200
201 } // namespace internal
202
203 #endif // PA_USE_PARTITION_ROOT_ENUMERATOR
204
205 #if BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
206
207 namespace {
208
209 #if PA_CONFIG(HAS_ATFORK_HANDLER)
210
LockRoot(PartitionRoot * root,bool)211 void LockRoot(PartitionRoot* root, bool) PA_NO_THREAD_SAFETY_ANALYSIS {
212 PA_DCHECK(root);
213 internal::PartitionRootLock(root).Acquire();
214 }
215
216 // PA_NO_THREAD_SAFETY_ANALYSIS: acquires the lock and doesn't release it, by
217 // design.
BeforeForkInParent()218 void BeforeForkInParent() PA_NO_THREAD_SAFETY_ANALYSIS {
219 // PartitionRoot::GetLock() is private. So use
220 // g_root_enumerator_lock here.
221 g_root_enumerator_lock.Acquire();
222 internal::PartitionRootEnumerator::Instance().Enumerate(
223 LockRoot, false,
224 internal::PartitionRootEnumerator::EnumerateOrder::kNormal);
225
226 ThreadCacheRegistry::GetLock().Acquire();
227 }
228
229 template <typename T>
UnlockOrReinit(T & lock,bool in_child)230 void UnlockOrReinit(T& lock, bool in_child) PA_NO_THREAD_SAFETY_ANALYSIS {
231 // Only re-init the locks in the child process, in the parent can unlock
232 // normally.
233 if (in_child) {
234 lock.Reinit();
235 } else {
236 lock.Release();
237 }
238 }
239
UnlockOrReinitRoot(PartitionRoot * root,bool in_child)240 void UnlockOrReinitRoot(PartitionRoot* root,
241 bool in_child) PA_NO_THREAD_SAFETY_ANALYSIS {
242 UnlockOrReinit(internal::PartitionRootLock(root), in_child);
243 }
244
ReleaseLocks(bool in_child)245 void ReleaseLocks(bool in_child) PA_NO_THREAD_SAFETY_ANALYSIS {
246 // In reverse order, even though there are no lock ordering dependencies.
247 UnlockOrReinit(ThreadCacheRegistry::GetLock(), in_child);
248 internal::PartitionRootEnumerator::Instance().Enumerate(
249 UnlockOrReinitRoot, in_child,
250 internal::PartitionRootEnumerator::EnumerateOrder::kReverse);
251
252 // PartitionRoot::GetLock() is private. So use
253 // g_root_enumerator_lock here.
254 UnlockOrReinit(g_root_enumerator_lock, in_child);
255 }
256
AfterForkInParent()257 void AfterForkInParent() {
258 ReleaseLocks(/* in_child = */ false);
259 }
260
AfterForkInChild()261 void AfterForkInChild() {
262 ReleaseLocks(/* in_child = */ true);
263 // Unsafe, as noted in the name. This is fine here however, since at this
264 // point there is only one thread, this one (unless another post-fork()
265 // handler created a thread, but it would have needed to allocate, which would
266 // have deadlocked the process already).
267 //
268 // If we don't reclaim this memory, it is lost forever. Note that this is only
269 // really an issue if we fork() a multi-threaded process without calling
270 // exec() right away, which is discouraged.
271 ThreadCacheRegistry::Instance().ForcePurgeAllThreadAfterForkUnsafe();
272 }
273 #endif // PA_CONFIG(HAS_ATFORK_HANDLER)
274
275 std::atomic<bool> g_global_init_called;
PartitionAllocMallocInitOnce()276 void PartitionAllocMallocInitOnce() {
277 bool expected = false;
278 // No need to block execution for potential concurrent initialization, merely
279 // want to make sure this is only called once.
280 if (!g_global_init_called.compare_exchange_strong(expected, true)) {
281 return;
282 }
283
284 #if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
285 // When fork() is called, only the current thread continues to execute in the
286 // child process. If the lock is held, but *not* by this thread when fork() is
287 // called, we have a deadlock.
288 //
289 // The "solution" here is to acquire the lock on the forking thread before
290 // fork(), and keep it held until fork() is done, in the parent and the
291 // child. To clean up memory, we also must empty the thread caches in the
292 // child, which is easier, since no threads except for the current one are
293 // running right after the fork().
294 //
295 // This is not perfect though, since:
296 // - Multiple pre/post-fork() handlers can be registered, they are then run in
297 // LIFO order for the pre-fork handler, and FIFO order for the post-fork
298 // one. So unless we are the first to register a handler, if another handler
299 // allocates, then we deterministically deadlock.
300 // - pthread handlers are *not* called when the application calls clone()
301 // directly, which is what Chrome does to launch processes.
302 //
303 // However, no perfect solution really exists to make threads + fork()
304 // cooperate, but deadlocks are real (and fork() is used in DEATH_TEST()s),
305 // and other malloc() implementations use the same techniques.
306 int err =
307 pthread_atfork(BeforeForkInParent, AfterForkInParent, AfterForkInChild);
308 PA_CHECK(err == 0);
309 #endif // BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
310 }
311
312 } // namespace
313
314 #if BUILDFLAG(IS_APPLE)
PartitionAllocMallocHookOnBeforeForkInParent()315 void PartitionAllocMallocHookOnBeforeForkInParent() {
316 BeforeForkInParent();
317 }
318
PartitionAllocMallocHookOnAfterForkInParent()319 void PartitionAllocMallocHookOnAfterForkInParent() {
320 AfterForkInParent();
321 }
322
PartitionAllocMallocHookOnAfterForkInChild()323 void PartitionAllocMallocHookOnAfterForkInChild() {
324 AfterForkInChild();
325 }
326 #endif // BUILDFLAG(IS_APPLE)
327
328 #endif // BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
329
330 namespace internal {
331
332 namespace {
333 // 64 was chosen arbitrarily, as it seems like a reasonable trade-off between
334 // performance and purging opportunity. Higher value (i.e. smaller slots)
335 // wouldn't necessarily increase chances of purging, but would result in
336 // more work and larger |slot_usage| array. Lower value would probably decrease
337 // chances of purging. Not empirically tested.
338 constexpr size_t kMaxPurgeableSlotsPerSystemPage = 64;
339 PA_ALWAYS_INLINE PAGE_ALLOCATOR_CONSTANTS_DECLARE_CONSTEXPR size_t
MinPurgeableSlotSize()340 MinPurgeableSlotSize() {
341 return SystemPageSize() / kMaxPurgeableSlotsPerSystemPage;
342 }
343 } // namespace
344
345 // The function attempts to unprovision unused slots and discard unused pages.
346 // It may also "straighten" the free list.
347 //
348 // If `accounting_only` is set to true, no action is performed and the function
349 // merely returns the number of bytes in the would-be discarded pages.
PartitionPurgeSlotSpan(PartitionRoot * root,internal::SlotSpanMetadata * slot_span,bool accounting_only)350 static size_t PartitionPurgeSlotSpan(PartitionRoot* root,
351 internal::SlotSpanMetadata* slot_span,
352 bool accounting_only)
353 PA_EXCLUSIVE_LOCKS_REQUIRED(internal::PartitionRootLock(root)) {
354 const internal::PartitionBucket* bucket = slot_span->bucket;
355 size_t slot_size = bucket->slot_size;
356
357 if (slot_size < MinPurgeableSlotSize() || !slot_span->num_allocated_slots) {
358 return 0;
359 }
360
361 size_t bucket_num_slots = bucket->get_slots_per_span();
362 size_t discardable_bytes = 0;
363
364 if (slot_span->CanStoreRawSize()) {
365 uint32_t utilized_slot_size = static_cast<uint32_t>(
366 RoundUpToSystemPage(slot_span->GetUtilizedSlotSize()));
367 discardable_bytes = bucket->slot_size - utilized_slot_size;
368 if (discardable_bytes && !accounting_only) {
369 uintptr_t slot_span_start =
370 internal::SlotSpanMetadata::ToSlotSpanStart(slot_span);
371 uintptr_t committed_data_end = slot_span_start + utilized_slot_size;
372 ScopedSyscallTimer timer{root};
373 DiscardSystemPages(committed_data_end, discardable_bytes);
374 }
375 return discardable_bytes;
376 }
377
378 #if defined(PAGE_ALLOCATOR_CONSTANTS_ARE_CONSTEXPR)
379 constexpr size_t kMaxSlotCount =
380 (PartitionPageSize() * kMaxPartitionPagesPerRegularSlotSpan) /
381 MinPurgeableSlotSize();
382 #elif BUILDFLAG(IS_APPLE) || (BUILDFLAG(IS_LINUX) && defined(ARCH_CPU_ARM64))
383 // It's better for slot_usage to be stack-allocated and fixed-size, which
384 // demands that its size be constexpr. On IS_APPLE and Linux on arm64,
385 // PartitionPageSize() is always SystemPageSize() << 2, so regardless of
386 // what the run time page size is, kMaxSlotCount can always be simplified
387 // to this expression.
388 constexpr size_t kMaxSlotCount =
389 4 * kMaxPurgeableSlotsPerSystemPage *
390 internal::kMaxPartitionPagesPerRegularSlotSpan;
391 PA_CHECK(kMaxSlotCount == (PartitionPageSize() *
392 internal::kMaxPartitionPagesPerRegularSlotSpan) /
393 MinPurgeableSlotSize());
394 #endif
395 PA_DCHECK(bucket_num_slots <= kMaxSlotCount);
396 PA_DCHECK(slot_span->num_unprovisioned_slots < bucket_num_slots);
397 size_t num_provisioned_slots =
398 bucket_num_slots - slot_span->num_unprovisioned_slots;
399 char slot_usage[kMaxSlotCount];
400 #if !BUILDFLAG(IS_WIN)
401 // The last freelist entry should not be discarded when using OS_WIN.
402 // DiscardVirtualMemory makes the contents of discarded memory undefined.
403 size_t last_slot = static_cast<size_t>(-1);
404 #endif
405 memset(slot_usage, 1, num_provisioned_slots);
406 uintptr_t slot_span_start = SlotSpanMetadata::ToSlotSpanStart(slot_span);
407 // First, walk the freelist for this slot span and make a bitmap of which
408 // slots are not in use.
409 for (EncodedNextFreelistEntry* entry = slot_span->get_freelist_head(); entry;
410 entry = entry->GetNext(slot_size)) {
411 size_t slot_number =
412 bucket->GetSlotNumber(SlotStartPtr2Addr(entry) - slot_span_start);
413 PA_DCHECK(slot_number < num_provisioned_slots);
414 slot_usage[slot_number] = 0;
415 #if !BUILDFLAG(IS_WIN)
416 // If we have a slot where the encoded next pointer is 0, we can actually
417 // discard that entry because touching a discarded page is guaranteed to
418 // return the original content or 0. (Note that this optimization won't be
419 // effective on big-endian machines because the masking function is
420 // negation.)
421 if (entry->IsEncodedNextPtrZero()) {
422 last_slot = slot_number;
423 }
424 #endif
425 }
426
427 // If the slot(s) at the end of the slot span are not in use, we can truncate
428 // them entirely and rewrite the freelist.
429 size_t truncated_slots = 0;
430 while (!slot_usage[num_provisioned_slots - 1]) {
431 truncated_slots++;
432 num_provisioned_slots--;
433 PA_DCHECK(num_provisioned_slots);
434 }
435 // First, do the work of calculating the discardable bytes. Don't actually
436 // discard anything if `accounting_only` is set.
437 size_t unprovisioned_bytes = 0;
438 uintptr_t begin_addr = slot_span_start + (num_provisioned_slots * slot_size);
439 uintptr_t end_addr = begin_addr + (slot_size * truncated_slots);
440 if (truncated_slots) {
441 // The slots that do not contain discarded pages should not be included to
442 // |truncated_slots|. Detects those slots and fixes |truncated_slots| and
443 // |num_provisioned_slots| accordingly.
444 uintptr_t rounded_up_truncatation_begin_addr =
445 RoundUpToSystemPage(begin_addr);
446 while (begin_addr + slot_size <= rounded_up_truncatation_begin_addr) {
447 begin_addr += slot_size;
448 PA_DCHECK(truncated_slots);
449 --truncated_slots;
450 ++num_provisioned_slots;
451 }
452 begin_addr = rounded_up_truncatation_begin_addr;
453
454 // We round the end address here up and not down because we're at the end of
455 // a slot span, so we "own" all the way up the page boundary.
456 end_addr = RoundUpToSystemPage(end_addr);
457 PA_DCHECK(end_addr <= slot_span_start + bucket->get_bytes_per_span());
458 if (begin_addr < end_addr) {
459 unprovisioned_bytes = end_addr - begin_addr;
460 discardable_bytes += unprovisioned_bytes;
461 }
462 }
463
464 // If `accounting_only` isn't set, then take action to remove unprovisioned
465 // slots from the free list (if any) and "straighten" the list (if
466 // requested) to help reduce fragmentation in the future. Then
467 // discard/decommit the pages hosting the unprovisioned slots.
468 if (!accounting_only) {
469 auto straighten_mode =
470 PartitionRoot::GetStraightenLargerSlotSpanFreeListsMode();
471 bool straighten =
472 straighten_mode == StraightenLargerSlotSpanFreeListsMode::kAlways ||
473 (straighten_mode ==
474 StraightenLargerSlotSpanFreeListsMode::kOnlyWhenUnprovisioning &&
475 unprovisioned_bytes);
476
477 PA_DCHECK((unprovisioned_bytes > 0) == (truncated_slots > 0));
478 size_t new_unprovisioned_slots =
479 truncated_slots + slot_span->num_unprovisioned_slots;
480 PA_DCHECK(new_unprovisioned_slots <= bucket->get_slots_per_span());
481 slot_span->num_unprovisioned_slots = new_unprovisioned_slots;
482
483 size_t num_new_freelist_entries = 0;
484 internal::EncodedNextFreelistEntry* back = nullptr;
485 if (straighten) {
486 // Rewrite the freelist to "straighten" it. This achieves two things:
487 // getting rid of unprovisioned entries, ordering etnries based on how
488 // close they're to the slot span start. This reduces chances of
489 // allocating further slots, in hope that we'll get some unused pages at
490 // the end of the span that can be unprovisioned, thus reducing
491 // fragmentation.
492 for (size_t slot_index = 0; slot_index < num_provisioned_slots;
493 ++slot_index) {
494 if (slot_usage[slot_index]) {
495 continue;
496 }
497 // Add the slot to the end of the list. The most proper thing to do
498 // would be to null-terminate the new entry with:
499 // auto* entry = EncodedNextFreelistEntry::EmplaceAndInitNull(
500 // slot_span_start + (slot_size * slot_index));
501 // But no need to do this, as it's last-ness is likely temporary, and
502 // the next iteration's back->SetNext(), or the post-loop
503 // EncodedNextFreelistEntry::EmplaceAndInitNull(back) will override it
504 // anyway.
505 auto* entry = static_cast<EncodedNextFreelistEntry*>(
506 SlotStartAddr2Ptr(slot_span_start + (slot_size * slot_index)));
507 if (num_new_freelist_entries) {
508 back->SetNext(entry);
509 } else {
510 slot_span->SetFreelistHead(entry);
511 }
512 back = entry;
513 num_new_freelist_entries++;
514 }
515 } else if (unprovisioned_bytes) {
516 // If there are any unprovisioned entries, scan the list to remove them,
517 // without "straightening" it.
518 uintptr_t first_unprovisioned_slot =
519 slot_span_start + (num_provisioned_slots * slot_size);
520 bool skipped = false;
521 for (EncodedNextFreelistEntry* entry = slot_span->get_freelist_head();
522 entry; entry = entry->GetNext(slot_size)) {
523 uintptr_t entry_addr = SlotStartPtr2Addr(entry);
524 if (entry_addr >= first_unprovisioned_slot) {
525 skipped = true;
526 continue;
527 }
528 // If the last visited entry was skipped (due to being unprovisioned),
529 // update the next pointer of the last not skipped entry (or the head
530 // if no entry exists). Otherwise the link is already correct.
531 if (skipped) {
532 if (num_new_freelist_entries) {
533 back->SetNext(entry);
534 } else {
535 slot_span->SetFreelistHead(entry);
536 }
537 skipped = false;
538 }
539 back = entry;
540 num_new_freelist_entries++;
541 }
542 }
543 // If any of the above loops were executed, null-terminate the last entry,
544 // or the head if no entry exists.
545 if (straighten || unprovisioned_bytes) {
546 if (num_new_freelist_entries) {
547 PA_DCHECK(back);
548 EncodedNextFreelistEntry::EmplaceAndInitNull(back);
549 #if !BUILDFLAG(IS_WIN)
550 // Memorize index of the last slot in the list, as it may be able to
551 // participate in an optimization related to page discaring (below), due
552 // to its next pointer encoded as 0.
553 last_slot =
554 bucket->GetSlotNumber(SlotStartPtr2Addr(back) - slot_span_start);
555 #endif
556 } else {
557 PA_DCHECK(!back);
558 slot_span->SetFreelistHead(nullptr);
559 }
560 PA_DCHECK(num_new_freelist_entries ==
561 num_provisioned_slots - slot_span->num_allocated_slots);
562 }
563
564 #if BUILDFLAG(USE_FREESLOT_BITMAP)
565 FreeSlotBitmapReset(slot_span_start + (slot_size * num_provisioned_slots),
566 end_addr, slot_size);
567 #endif
568
569 if (unprovisioned_bytes) {
570 if (!kUseLazyCommit) {
571 // Discard the memory.
572 ScopedSyscallTimer timer{root};
573 DiscardSystemPages(begin_addr, unprovisioned_bytes);
574 } else {
575 // See crbug.com/1431606 to understand the detail. LazyCommit depends
576 // on the design: both used slots and unused slots (=in the freelist)
577 // are committed. However this removes the unused slots from the
578 // freelist. So if using DiscardSystemPages() here, PartitionAlloc may
579 // commit the system pages which has been already committed again.
580 // This will make commited_size and max_committed_size metrics wrong.
581 // PA should use DecommitSystemPagesForData() instead.
582 root->DecommitSystemPagesForData(
583 begin_addr, unprovisioned_bytes,
584 PageAccessibilityDisposition::kAllowKeepForPerf);
585 }
586 }
587 }
588
589 if (slot_size < SystemPageSize()) {
590 // Returns here because implementing the following steps for smaller slot
591 // size will need a complicated logic and make the code messy.
592 return discardable_bytes;
593 }
594
595 // Next, walk the slots and for any not in use, consider which system pages
596 // are no longer needed. We can discard any system pages back to the system as
597 // long as we don't interfere with a freelist pointer or an adjacent used
598 // slot. Note they'll be automatically paged back in when touched, and
599 // zero-initialized (except Windows).
600 for (size_t i = 0; i < num_provisioned_slots; ++i) {
601 if (slot_usage[i]) {
602 continue;
603 }
604
605 // The first address we can safely discard is just after the freelist
606 // pointer. There's one optimization opportunity: if the freelist pointer is
607 // encoded as 0, we can discard that pointer value too (except on
608 // Windows).
609 begin_addr = slot_span_start + (i * slot_size);
610 end_addr = begin_addr + slot_size;
611 bool can_discard_free_list_pointer = false;
612 #if !BUILDFLAG(IS_WIN)
613 if (i != last_slot) {
614 begin_addr += sizeof(internal::EncodedNextFreelistEntry);
615 } else {
616 can_discard_free_list_pointer = true;
617 }
618 #else
619 begin_addr += sizeof(internal::EncodedNextFreelistEntry);
620 #endif
621
622 uintptr_t rounded_up_begin_addr = RoundUpToSystemPage(begin_addr);
623 uintptr_t rounded_down_begin_addr = RoundDownToSystemPage(begin_addr);
624 end_addr = RoundDownToSystemPage(end_addr);
625
626 // |rounded_up_begin_addr| could be greater than |end_addr| only if slot
627 // size was less than system page size, or if free list pointer crossed the
628 // page boundary. Neither is possible here.
629 PA_DCHECK(rounded_up_begin_addr <= end_addr);
630
631 if (rounded_down_begin_addr < rounded_up_begin_addr && i != 0 &&
632 !slot_usage[i - 1] && can_discard_free_list_pointer) {
633 // This slot contains a partial page in the beginning. The rest of that
634 // page is contained in the slot[i-1], which is also discardable.
635 // Therefore we can discard this page.
636 begin_addr = rounded_down_begin_addr;
637 } else {
638 begin_addr = rounded_up_begin_addr;
639 }
640
641 if (begin_addr < end_addr) {
642 size_t partial_slot_bytes = end_addr - begin_addr;
643 discardable_bytes += partial_slot_bytes;
644 if (!accounting_only) {
645 // Discard the pages. But don't be tempted to decommit it (as done
646 // above), because here we're getting rid of provisioned pages amidst
647 // used pages, so we're relying on them to materialize automatically
648 // when the virtual address is accessed, so the mapping needs to be
649 // intact.
650 ScopedSyscallTimer timer{root};
651 DiscardSystemPages(begin_addr, partial_slot_bytes);
652 }
653 }
654 }
655
656 return discardable_bytes;
657 }
658
PartitionPurgeBucket(PartitionRoot * root,internal::PartitionBucket * bucket)659 static void PartitionPurgeBucket(PartitionRoot* root,
660 internal::PartitionBucket* bucket)
661 PA_EXCLUSIVE_LOCKS_REQUIRED(internal::PartitionRootLock(root)) {
662 if (bucket->active_slot_spans_head !=
663 internal::SlotSpanMetadata::get_sentinel_slot_span()) {
664 for (internal::SlotSpanMetadata* slot_span = bucket->active_slot_spans_head;
665 slot_span; slot_span = slot_span->next_slot_span) {
666 PA_DCHECK(slot_span !=
667 internal::SlotSpanMetadata::get_sentinel_slot_span());
668 PartitionPurgeSlotSpan(root, slot_span, false);
669 }
670 }
671 }
672
PartitionDumpSlotSpanStats(PartitionBucketMemoryStats * stats_out,PartitionRoot * root,internal::SlotSpanMetadata * slot_span)673 static void PartitionDumpSlotSpanStats(PartitionBucketMemoryStats* stats_out,
674 PartitionRoot* root,
675 internal::SlotSpanMetadata* slot_span)
676 PA_EXCLUSIVE_LOCKS_REQUIRED(internal::PartitionRootLock(root)) {
677 uint16_t bucket_num_slots = slot_span->bucket->get_slots_per_span();
678
679 if (slot_span->is_decommitted()) {
680 ++stats_out->num_decommitted_slot_spans;
681 return;
682 }
683
684 stats_out->discardable_bytes += PartitionPurgeSlotSpan(root, slot_span, true);
685
686 if (slot_span->CanStoreRawSize()) {
687 stats_out->active_bytes += static_cast<uint32_t>(slot_span->GetRawSize());
688 } else {
689 stats_out->active_bytes +=
690 (slot_span->num_allocated_slots * stats_out->bucket_slot_size);
691 }
692 stats_out->active_count += slot_span->num_allocated_slots;
693
694 size_t slot_span_bytes_resident = RoundUpToSystemPage(
695 (bucket_num_slots - slot_span->num_unprovisioned_slots) *
696 stats_out->bucket_slot_size);
697 stats_out->resident_bytes += slot_span_bytes_resident;
698 if (slot_span->is_empty()) {
699 stats_out->decommittable_bytes += slot_span_bytes_resident;
700 ++stats_out->num_empty_slot_spans;
701 } else if (slot_span->is_full()) {
702 ++stats_out->num_full_slot_spans;
703 } else {
704 PA_DCHECK(slot_span->is_active());
705 ++stats_out->num_active_slot_spans;
706 }
707 }
708
PartitionDumpBucketStats(PartitionBucketMemoryStats * stats_out,PartitionRoot * root,const internal::PartitionBucket * bucket)709 static void PartitionDumpBucketStats(PartitionBucketMemoryStats* stats_out,
710 PartitionRoot* root,
711 const internal::PartitionBucket* bucket)
712 PA_EXCLUSIVE_LOCKS_REQUIRED(internal::PartitionRootLock(root)) {
713 PA_DCHECK(!bucket->is_direct_mapped());
714 stats_out->is_valid = false;
715 // If the active slot span list is empty (==
716 // internal::SlotSpanMetadata::get_sentinel_slot_span()), the bucket might
717 // still need to be reported if it has a list of empty, decommitted or full
718 // slot spans.
719 if (bucket->active_slot_spans_head ==
720 internal::SlotSpanMetadata::get_sentinel_slot_span() &&
721 !bucket->empty_slot_spans_head && !bucket->decommitted_slot_spans_head &&
722 !bucket->num_full_slot_spans) {
723 return;
724 }
725
726 memset(stats_out, '\0', sizeof(*stats_out));
727 stats_out->is_valid = true;
728 stats_out->is_direct_map = false;
729 stats_out->num_full_slot_spans =
730 static_cast<size_t>(bucket->num_full_slot_spans);
731 stats_out->bucket_slot_size = bucket->slot_size;
732 uint16_t bucket_num_slots = bucket->get_slots_per_span();
733 size_t bucket_useful_storage = stats_out->bucket_slot_size * bucket_num_slots;
734 stats_out->allocated_slot_span_size = bucket->get_bytes_per_span();
735 stats_out->active_bytes = bucket->num_full_slot_spans * bucket_useful_storage;
736 stats_out->active_count = bucket->num_full_slot_spans * bucket_num_slots;
737 stats_out->resident_bytes =
738 bucket->num_full_slot_spans * stats_out->allocated_slot_span_size;
739
740 for (internal::SlotSpanMetadata* slot_span = bucket->empty_slot_spans_head;
741 slot_span; slot_span = slot_span->next_slot_span) {
742 PA_DCHECK(slot_span->is_empty() || slot_span->is_decommitted());
743 PartitionDumpSlotSpanStats(stats_out, root, slot_span);
744 }
745 for (internal::SlotSpanMetadata* slot_span =
746 bucket->decommitted_slot_spans_head;
747 slot_span; slot_span = slot_span->next_slot_span) {
748 PA_DCHECK(slot_span->is_decommitted());
749 PartitionDumpSlotSpanStats(stats_out, root, slot_span);
750 }
751
752 if (bucket->active_slot_spans_head !=
753 internal::SlotSpanMetadata::get_sentinel_slot_span()) {
754 for (internal::SlotSpanMetadata* slot_span = bucket->active_slot_spans_head;
755 slot_span; slot_span = slot_span->next_slot_span) {
756 PA_DCHECK(slot_span !=
757 internal::SlotSpanMetadata::get_sentinel_slot_span());
758 PartitionDumpSlotSpanStats(stats_out, root, slot_span);
759 }
760 }
761 }
762
763 #if BUILDFLAG(PA_DCHECK_IS_ON)
DCheckIfManagedByPartitionAllocBRPPool(uintptr_t address)764 void DCheckIfManagedByPartitionAllocBRPPool(uintptr_t address) {
765 PA_DCHECK(IsManagedByPartitionAllocBRPPool(address));
766 }
767 #endif
768
769 #if BUILDFLAG(ENABLE_THREAD_ISOLATION)
PartitionAllocThreadIsolationInit(ThreadIsolationOption thread_isolation)770 void PartitionAllocThreadIsolationInit(ThreadIsolationOption thread_isolation) {
771 #if BUILDFLAG(PA_DCHECK_IS_ON)
772 ThreadIsolationSettings::settings.enabled = true;
773 #endif
774 PartitionAddressSpace::InitThreadIsolatedPool(thread_isolation);
775 // Call WriteProtectThreadIsolatedGlobals last since we might not have write
776 // permissions to to globals afterwards.
777 WriteProtectThreadIsolatedGlobals(thread_isolation);
778 }
779 #endif // BUILDFLAG(ENABLE_THREAD_ISOLATION)
780
781 } // namespace internal
782
OutOfMemory(size_t size)783 [[noreturn]] PA_NOINLINE void PartitionRoot::OutOfMemory(size_t size) {
784 const size_t virtual_address_space_size =
785 total_size_of_super_pages.load(std::memory_order_relaxed) +
786 total_size_of_direct_mapped_pages.load(std::memory_order_relaxed);
787 #if !defined(ARCH_CPU_64_BITS)
788 const size_t uncommitted_size =
789 virtual_address_space_size -
790 total_size_of_committed_pages.load(std::memory_order_relaxed);
791
792 // Check whether this OOM is due to a lot of super pages that are allocated
793 // but not committed, probably due to http://crbug.com/421387.
794 if (uncommitted_size > internal::kReasonableSizeOfUnusedPages) {
795 internal::PartitionOutOfMemoryWithLotsOfUncommitedPages(size);
796 }
797
798 #if BUILDFLAG(IS_WIN)
799 // If true then we are running on 64-bit Windows.
800 BOOL is_wow_64 = FALSE;
801 // Intentionally ignoring failures.
802 IsWow64Process(GetCurrentProcess(), &is_wow_64);
803 // 32-bit address space on Windows is typically either 2 GiB (on 32-bit
804 // Windows) or 4 GiB (on 64-bit Windows). 2.8 and 1.0 GiB are just rough
805 // guesses as to how much address space PA can consume (note that code,
806 // stacks, and other allocators will also consume address space).
807 const size_t kReasonableVirtualSize = (is_wow_64 ? 2800 : 1024) * 1024 * 1024;
808 // Make it obvious whether we are running on 64-bit Windows.
809 PA_DEBUG_DATA_ON_STACK("iswow64", static_cast<size_t>(is_wow_64));
810 #else
811 constexpr size_t kReasonableVirtualSize =
812 // 1.5GiB elsewhere, since address space is typically 3GiB.
813 (1024 + 512) * 1024 * 1024;
814 #endif
815 if (virtual_address_space_size > kReasonableVirtualSize) {
816 internal::PartitionOutOfMemoryWithLargeVirtualSize(
817 virtual_address_space_size);
818 }
819 #endif // #if !defined(ARCH_CPU_64_BITS)
820
821 // Out of memory can be due to multiple causes, such as:
822 // - Out of virtual address space in the desired pool
823 // - Out of commit due to either our process, or another one
824 // - Excessive allocations in the current process
825 //
826 // Saving these values make it easier to distinguish between these. See the
827 // documentation in PA_CONFIG(DEBUG_DATA_ON_STACK) on how to get these from
828 // minidumps.
829 PA_DEBUG_DATA_ON_STACK("va_size", virtual_address_space_size);
830 PA_DEBUG_DATA_ON_STACK("alloc", get_total_size_of_allocated_bytes());
831 PA_DEBUG_DATA_ON_STACK("commit", get_total_size_of_committed_pages());
832 PA_DEBUG_DATA_ON_STACK("size", size);
833
834 if (internal::g_oom_handling_function) {
835 (*internal::g_oom_handling_function)(size);
836 }
837 OOM_CRASH(size);
838 }
839
DecommitEmptySlotSpans()840 void PartitionRoot::DecommitEmptySlotSpans() {
841 ShrinkEmptySlotSpansRing(0);
842 // Just decommitted everything, and holding the lock, should be exactly 0.
843 PA_DCHECK(empty_slot_spans_dirty_bytes == 0);
844 }
845
DestructForTesting()846 void PartitionRoot::DestructForTesting() {
847 // We need to destruct the thread cache before we unreserve any of the super
848 // pages below, which we currently are not doing. So, we should only call
849 // this function on PartitionRoots without a thread cache.
850 PA_CHECK(!settings.with_thread_cache);
851 auto pool_handle = ChoosePool();
852 #if BUILDFLAG(ENABLE_THREAD_ISOLATION)
853 // The pages managed by thread isolated pool will be free-ed at
854 // UninitThreadIsolatedForTesting(). Don't invoke FreePages() for the pages.
855 if (pool_handle == internal::kThreadIsolatedPoolHandle) {
856 return;
857 }
858 PA_DCHECK(pool_handle < internal::kNumPools);
859 #else
860 PA_DCHECK(pool_handle <= internal::kNumPools);
861 #endif
862
863 auto* curr = first_extent;
864 while (curr != nullptr) {
865 auto* next = curr->next;
866 uintptr_t address = SuperPagesBeginFromExtent(curr);
867 size_t size =
868 internal::kSuperPageSize * curr->number_of_consecutive_super_pages;
869 #if !BUILDFLAG(HAS_64_BIT_POINTERS)
870 internal::AddressPoolManager::GetInstance().MarkUnused(pool_handle, address,
871 size);
872 #endif
873 internal::AddressPoolManager::GetInstance().UnreserveAndDecommit(
874 pool_handle, address, size);
875 curr = next;
876 }
877 }
878
879 #if PA_CONFIG(ENABLE_MAC11_MALLOC_SIZE_HACK)
InitMac11MallocSizeHackUsableSize(size_t ref_count_size)880 void PartitionRoot::InitMac11MallocSizeHackUsableSize(size_t ref_count_size) {
881 settings.mac11_malloc_size_hack_enabled_ = true;
882
883 // 0 means reserve just enough extras to fit PartitionRefCount.
884 if (!ref_count_size) {
885 ref_count_size = sizeof(internal::PartitionRefCount);
886 }
887 // Request of 32B will fall into a 48B bucket in the presence of BRP
888 // ref-count, yielding |48 - ref_count_size| of actual usable space.
889 settings.mac11_malloc_size_hack_usable_size_ = 48 - ref_count_size;
890 }
891
EnableMac11MallocSizeHackForTesting(size_t ref_count_size)892 void PartitionRoot::EnableMac11MallocSizeHackForTesting(size_t ref_count_size) {
893 settings.mac11_malloc_size_hack_enabled_ = true;
894 InitMac11MallocSizeHackUsableSize(ref_count_size);
895 }
896
EnableMac11MallocSizeHackIfNeeded(size_t ref_count_size)897 void PartitionRoot::EnableMac11MallocSizeHackIfNeeded(size_t ref_count_size) {
898 settings.mac11_malloc_size_hack_enabled_ =
899 settings.brp_enabled_ && internal::base::mac::MacOSMajorVersion() == 11;
900 if (settings.mac11_malloc_size_hack_enabled_) {
901 InitMac11MallocSizeHackUsableSize(ref_count_size);
902 }
903 }
904 #endif // PA_CONFIG(ENABLE_MAC11_MALLOC_SIZE_HACK)
905
906 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT) && !BUILDFLAG(HAS_64_BIT_POINTERS)
907 namespace {
908 std::atomic<bool> g_reserve_brp_guard_region_called;
909 // An address constructed by repeating `kQuarantinedByte` shouldn't never point
910 // to valid memory. Preemptively reserve a memory region around that address and
911 // make it inaccessible. Not needed for 64-bit platforms where the address is
912 // guaranteed to be non-canonical. Safe to call multiple times.
ReserveBackupRefPtrGuardRegionIfNeeded()913 void ReserveBackupRefPtrGuardRegionIfNeeded() {
914 bool expected = false;
915 // No need to block execution for potential concurrent initialization, merely
916 // want to make sure this is only called once.
917 if (!g_reserve_brp_guard_region_called.compare_exchange_strong(expected,
918 true)) {
919 return;
920 }
921
922 size_t alignment = internal::PageAllocationGranularity();
923 uintptr_t requested_address;
924 memset(&requested_address, internal::kQuarantinedByte,
925 sizeof(requested_address));
926 requested_address = RoundDownToPageAllocationGranularity(requested_address);
927
928 // Request several pages so that even unreasonably large C++ objects stay
929 // within the inaccessible region. If some of the pages can't be reserved,
930 // it's still preferable to try and reserve the rest.
931 for (size_t i = 0; i < 4; ++i) {
932 [[maybe_unused]] uintptr_t allocated_address =
933 AllocPages(requested_address, alignment, alignment,
934 PageAccessibilityConfiguration(
935 PageAccessibilityConfiguration::kInaccessible),
936 PageTag::kPartitionAlloc);
937 requested_address += alignment;
938 }
939 }
940 } // namespace
941 #endif // BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT) &&
942 // !BUILDFLAG(HAS_64_BIT_POINTERS)
943
Init(PartitionOptions opts)944 void PartitionRoot::Init(PartitionOptions opts) {
945 {
946 #if BUILDFLAG(IS_APPLE)
947 // Needed to statically bound page size, which is a runtime constant on
948 // apple OSes.
949 PA_CHECK((internal::SystemPageSize() == (size_t{1} << 12)) ||
950 (internal::SystemPageSize() == (size_t{1} << 14)));
951 #elif BUILDFLAG(IS_LINUX) && defined(ARCH_CPU_ARM64)
952 // Check runtime pagesize. Though the code is currently the same, it is
953 // not merged with the IS_APPLE case above as a 1 << 16 case needs to be
954 // added here in the future, to allow 64 kiB pagesize. That is only
955 // supported on Linux on arm64, not on IS_APPLE, but not yet present here
956 // as the rest of the partition allocator does not currently support it.
957 PA_CHECK((internal::SystemPageSize() == (size_t{1} << 12)) ||
958 (internal::SystemPageSize() == (size_t{1} << 14)));
959 #endif
960
961 ::partition_alloc::internal::ScopedGuard guard{lock_};
962 if (initialized) {
963 return;
964 }
965
966 #if BUILDFLAG(HAS_64_BIT_POINTERS)
967 // Reserve address space for partition alloc.
968 internal::PartitionAddressSpace::Init();
969 #endif
970
971 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT) && !BUILDFLAG(HAS_64_BIT_POINTERS)
972 ReserveBackupRefPtrGuardRegionIfNeeded();
973 #endif
974
975 settings.allow_aligned_alloc =
976 opts.aligned_alloc == PartitionOptions::kAllowed;
977 #if BUILDFLAG(PA_DCHECK_IS_ON)
978 settings.use_cookie = true;
979 #else
980 static_assert(!Settings::use_cookie);
981 #endif // BUILDFLAG(PA_DCHECK_IS_ON)
982 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
983 settings.brp_enabled_ = opts.backup_ref_ptr == PartitionOptions::kEnabled;
984 #if PA_CONFIG(ENABLE_MAC11_MALLOC_SIZE_HACK)
985 EnableMac11MallocSizeHackIfNeeded(opts.ref_count_size);
986 #endif
987 #else // BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
988 PA_CHECK(opts.backup_ref_ptr == PartitionOptions::kDisabled);
989 #endif // BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
990 settings.use_configurable_pool =
991 (opts.use_configurable_pool == PartitionOptions::kAllowed) &&
992 IsConfigurablePoolAvailable();
993 PA_DCHECK(!settings.use_configurable_pool || IsConfigurablePoolAvailable());
994 settings.zapping_by_free_flags =
995 opts.zapping_by_free_flags == PartitionOptions::kEnabled;
996 #if PA_CONFIG(HAS_MEMORY_TAGGING)
997 settings.memory_tagging_enabled_ =
998 opts.memory_tagging.enabled == PartitionOptions::kEnabled;
999 // Memory tagging is not supported in the configurable pool because MTE
1000 // stores tagging information in the high bits of the pointer, it causes
1001 // issues with components like V8's ArrayBuffers which use custom pointer
1002 // representations. All custom representations encountered so far rely on an
1003 // "is in configurable pool?" check, so we use that as a proxy.
1004 PA_CHECK(!settings.memory_tagging_enabled_ ||
1005 !settings.use_configurable_pool);
1006
1007 settings.memory_tagging_reporting_mode_ =
1008 opts.memory_tagging.reporting_mode;
1009 #endif // PA_CONFIG(HAS_MEMORY_TAGGING)
1010
1011 // brp_enabled() is not supported in the configurable pool because
1012 // BRP requires objects to be in a different Pool.
1013 PA_CHECK(!(settings.use_configurable_pool && brp_enabled()));
1014
1015 #if BUILDFLAG(ENABLE_THREAD_ISOLATION)
1016 // BRP and thread isolated mode use different pools, so they can't be
1017 // enabled at the same time.
1018 PA_CHECK(!opts.thread_isolation.enabled ||
1019 opts.backup_ref_ptr == PartitionOptions::kDisabled);
1020 settings.thread_isolation = opts.thread_isolation;
1021 #endif // BUILDFLAG(ENABLE_THREAD_ISOLATION)
1022
1023 // Ref-count messes up alignment needed for AlignedAlloc, making this
1024 // option incompatible. However, except in the
1025 // PUT_REF_COUNT_IN_PREVIOUS_SLOT case.
1026 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT) && \
1027 !BUILDFLAG(PUT_REF_COUNT_IN_PREVIOUS_SLOT)
1028 PA_CHECK(!settings.allow_aligned_alloc || !settings.brp_enabled_);
1029 #endif
1030
1031 #if PA_CONFIG(EXTRAS_REQUIRED)
1032 settings.extras_size = 0;
1033 settings.extras_offset = 0;
1034
1035 if (settings.use_cookie) {
1036 settings.extras_size += internal::kPartitionCookieSizeAdjustment;
1037 }
1038
1039 if (brp_enabled()) {
1040 // TODO(tasak): In the PUT_REF_COUNT_IN_PREVIOUS_SLOT case, ref-count is
1041 // stored out-of-line for single-slot slot spans, so no need to
1042 // add/subtract its size in this case.
1043 size_t ref_count_size = opts.ref_count_size;
1044 if (!ref_count_size) {
1045 ref_count_size = internal::kPartitionRefCountSizeAdjustment;
1046 }
1047 ref_count_size = internal::AlignUpRefCountSizeForMac(ref_count_size);
1048 #if PA_CONFIG(INCREASE_REF_COUNT_SIZE_FOR_MTE)
1049 if (IsMemoryTaggingEnabled()) {
1050 ref_count_size = internal::base::bits::AlignUp(
1051 ref_count_size, internal::kMemTagGranuleSize);
1052 }
1053 settings.ref_count_size = ref_count_size;
1054 #endif // PA_CONFIG(INCREASE_REF_COUNT_SIZE_FOR_MTE)
1055 PA_CHECK(internal::kPartitionRefCountSizeAdjustment <= ref_count_size);
1056 settings.extras_size += ref_count_size;
1057 settings.extras_offset += internal::kPartitionRefCountOffsetAdjustment;
1058 }
1059 #endif // PA_CONFIG(EXTRAS_REQUIRED)
1060
1061 // Re-confirm the above PA_CHECKs, by making sure there are no
1062 // pre-allocation extras when AlignedAlloc is allowed. Post-allocation
1063 // extras are ok.
1064 PA_CHECK(!settings.allow_aligned_alloc || !settings.extras_offset);
1065
1066 settings.quarantine_mode =
1067 #if BUILDFLAG(USE_STARSCAN)
1068 (opts.star_scan_quarantine == PartitionOptions::kDisallowed
1069 ? QuarantineMode::kAlwaysDisabled
1070 : QuarantineMode::kDisabledByDefault);
1071 #else
1072 QuarantineMode::kAlwaysDisabled;
1073 #endif // BUILDFLAG(USE_STARSCAN)
1074
1075 // We mark the sentinel slot span as free to make sure it is skipped by our
1076 // logic to find a new active slot span.
1077 memset(&sentinel_bucket, 0, sizeof(sentinel_bucket));
1078 sentinel_bucket.active_slot_spans_head =
1079 SlotSpan::get_sentinel_slot_span_non_const();
1080
1081 // This is a "magic" value so we can test if a root pointer is valid.
1082 inverted_self = ~reinterpret_cast<uintptr_t>(this);
1083
1084 // Set up the actual usable buckets first.
1085 constexpr internal::BucketIndexLookup lookup{};
1086 size_t bucket_index = 0;
1087 while (lookup.bucket_sizes()[bucket_index] !=
1088 internal::kInvalidBucketSize) {
1089 buckets[bucket_index].Init(lookup.bucket_sizes()[bucket_index]);
1090 bucket_index++;
1091 }
1092 PA_DCHECK(bucket_index < internal::kNumBuckets);
1093
1094 // Remaining buckets are not usable, and not real.
1095 for (size_t index = bucket_index; index < internal::kNumBuckets; index++) {
1096 // Cannot init with size 0 since it computes 1 / size, but make sure the
1097 // bucket is invalid.
1098 buckets[index].Init(internal::kInvalidBucketSize);
1099 buckets[index].active_slot_spans_head = nullptr;
1100 PA_DCHECK(!buckets[index].is_valid());
1101 }
1102
1103 #if !PA_CONFIG(THREAD_CACHE_SUPPORTED)
1104 // TLS in ThreadCache not supported on other OSes.
1105 settings.with_thread_cache = false;
1106 #else
1107 ThreadCache::EnsureThreadSpecificDataInitialized();
1108 settings.with_thread_cache =
1109 (opts.thread_cache == PartitionOptions::kEnabled);
1110
1111 if (settings.with_thread_cache) {
1112 ThreadCache::Init(this);
1113 }
1114 #endif // !PA_CONFIG(THREAD_CACHE_SUPPORTED)
1115
1116 #if PA_CONFIG(USE_PARTITION_ROOT_ENUMERATOR)
1117 internal::PartitionRootEnumerator::Instance().Register(this);
1118 #endif
1119
1120 initialized = true;
1121 }
1122
1123 // Called without the lock, might allocate.
1124 #if BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
1125 PartitionAllocMallocInitOnce();
1126 #endif
1127
1128 #if BUILDFLAG(ENABLE_THREAD_ISOLATION)
1129 if (settings.thread_isolation.enabled) {
1130 internal::PartitionAllocThreadIsolationInit(settings.thread_isolation);
1131 }
1132 #endif
1133 }
1134
1135 PartitionRoot::Settings::Settings() = default;
1136
PartitionRoot()1137 PartitionRoot::PartitionRoot()
1138 : scheduler_loop_quarantine_root(*this),
1139 scheduler_loop_quarantine(
1140 scheduler_loop_quarantine_root
1141 .CreateBranch<internal::SchedulerLoopQuarantineBranch::
1142 kQuarantineCapacityCount>()) {}
1143
PartitionRoot(PartitionOptions opts)1144 PartitionRoot::PartitionRoot(PartitionOptions opts)
1145 : scheduler_loop_quarantine_root(
1146 *this,
1147 opts.scheduler_loop_quarantine_capacity_in_bytes),
1148 scheduler_loop_quarantine(
1149 scheduler_loop_quarantine_root
1150 .CreateBranch<internal::SchedulerLoopQuarantineBranch::
1151 kQuarantineCapacityCount>()) {
1152 Init(opts);
1153 }
1154
~PartitionRoot()1155 PartitionRoot::~PartitionRoot() {
1156 #if BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
1157 PA_CHECK(!settings.with_thread_cache)
1158 << "Must not destroy a partition with a thread cache";
1159 #endif // BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
1160
1161 #if PA_CONFIG(USE_PARTITION_ROOT_ENUMERATOR)
1162 if (initialized) {
1163 internal::PartitionRootEnumerator::Instance().Unregister(this);
1164 }
1165 #endif // PA_CONFIG(USE_PARTITION_ALLOC_ENUMERATOR)
1166 }
1167
EnableThreadCacheIfSupported()1168 void PartitionRoot::EnableThreadCacheIfSupported() {
1169 #if PA_CONFIG(THREAD_CACHE_SUPPORTED)
1170 ::partition_alloc::internal::ScopedGuard guard{lock_};
1171 PA_CHECK(!settings.with_thread_cache);
1172 // By the time we get there, there may be multiple threads created in the
1173 // process. Since `with_thread_cache` is accessed without a lock, it can
1174 // become visible to another thread before the effects of
1175 // `internal::ThreadCacheInit()` are visible. To prevent that, we fake thread
1176 // cache creation being in-progress while this is running.
1177 //
1178 // This synchronizes with the acquire load in `MaybeInitThreadCacheAndAlloc()`
1179 // to ensure that we don't create (and thus use) a ThreadCache before
1180 // ThreadCache::Init()'s effects are visible.
1181 int before =
1182 thread_caches_being_constructed_.fetch_add(1, std::memory_order_acquire);
1183 PA_CHECK(before == 0);
1184 ThreadCache::Init(this);
1185 thread_caches_being_constructed_.fetch_sub(1, std::memory_order_release);
1186 settings.with_thread_cache = true;
1187 #endif // PA_CONFIG(THREAD_CACHE_SUPPORTED)
1188 }
1189
TryReallocInPlaceForDirectMap(internal::SlotSpanMetadata * slot_span,size_t requested_size)1190 bool PartitionRoot::TryReallocInPlaceForDirectMap(
1191 internal::SlotSpanMetadata* slot_span,
1192 size_t requested_size) {
1193 PA_DCHECK(slot_span->bucket->is_direct_mapped());
1194 // Slot-span metadata isn't MTE-tagged.
1195 PA_DCHECK(
1196 internal::IsManagedByDirectMap(reinterpret_cast<uintptr_t>(slot_span)));
1197
1198 size_t raw_size = AdjustSizeForExtrasAdd(requested_size);
1199 auto* extent = DirectMapExtent::FromSlotSpan(slot_span);
1200 size_t current_reservation_size = extent->reservation_size;
1201 // Calculate the new reservation size the way PartitionDirectMap() would, but
1202 // skip the alignment, because this call isn't requesting it.
1203 size_t new_reservation_size = GetDirectMapReservationSize(raw_size);
1204
1205 // If new reservation would be larger, there is nothing we can do to
1206 // reallocate in-place.
1207 if (new_reservation_size > current_reservation_size) {
1208 return false;
1209 }
1210
1211 // Don't reallocate in-place if new reservation size would be less than 80 %
1212 // of the current one, to avoid holding on to too much unused address space.
1213 // Make this check before comparing slot sizes, as even with equal or similar
1214 // slot sizes we can save a lot if the original allocation was heavily padded
1215 // for alignment.
1216 if ((new_reservation_size >> internal::SystemPageShift()) * 5 <
1217 (current_reservation_size >> internal::SystemPageShift()) * 4) {
1218 return false;
1219 }
1220
1221 // Note that the new size isn't a bucketed size; this function is called
1222 // whenever we're reallocating a direct mapped allocation, so calculate it
1223 // the way PartitionDirectMap() would.
1224 size_t new_slot_size = GetDirectMapSlotSize(raw_size);
1225 if (new_slot_size < internal::kMinDirectMappedDownsize) {
1226 return false;
1227 }
1228
1229 // Past this point, we decided we'll attempt to reallocate without relocating,
1230 // so we have to honor the padding for alignment in front of the original
1231 // allocation, even though this function isn't requesting any alignment.
1232
1233 // bucket->slot_size is the currently committed size of the allocation.
1234 size_t current_slot_size = slot_span->bucket->slot_size;
1235 size_t current_usable_size = GetSlotUsableSize(slot_span);
1236 uintptr_t slot_start = SlotSpan::ToSlotSpanStart(slot_span);
1237 // This is the available part of the reservation up to which the new
1238 // allocation can grow.
1239 size_t available_reservation_size =
1240 current_reservation_size - extent->padding_for_alignment -
1241 PartitionRoot::GetDirectMapMetadataAndGuardPagesSize();
1242 #if BUILDFLAG(PA_DCHECK_IS_ON)
1243 uintptr_t reservation_start = slot_start & internal::kSuperPageBaseMask;
1244 PA_DCHECK(internal::IsReservationStart(reservation_start));
1245 PA_DCHECK(slot_start + available_reservation_size ==
1246 reservation_start + current_reservation_size -
1247 GetDirectMapMetadataAndGuardPagesSize() +
1248 internal::PartitionPageSize());
1249 #endif
1250
1251 PA_DCHECK(new_slot_size > internal::kMaxMemoryTaggingSize);
1252 if (new_slot_size == current_slot_size) {
1253 // No need to move any memory around, but update size and cookie below.
1254 // That's because raw_size may have changed.
1255 } else if (new_slot_size < current_slot_size) {
1256 // Shrink by decommitting unneeded pages and making them inaccessible.
1257 size_t decommit_size = current_slot_size - new_slot_size;
1258 DecommitSystemPagesForData(slot_start + new_slot_size, decommit_size,
1259 PageAccessibilityDisposition::kRequireUpdate);
1260 // Since the decommited system pages are still reserved, we don't need to
1261 // change the entries for decommitted pages in the reservation offset table.
1262 } else if (new_slot_size <= available_reservation_size) {
1263 // Grow within the actually reserved address space. Just need to make the
1264 // pages accessible again.
1265 size_t recommit_slot_size_growth = new_slot_size - current_slot_size;
1266 // Direct map never uses tagging, as size is always >kMaxMemoryTaggingSize.
1267 RecommitSystemPagesForData(
1268 slot_start + current_slot_size, recommit_slot_size_growth,
1269 PageAccessibilityDisposition::kRequireUpdate, false);
1270 // The recommited system pages had been already reserved and all the
1271 // entries in the reservation offset table (for entire reservation_size
1272 // region) have been already initialized.
1273
1274 #if BUILDFLAG(PA_DCHECK_IS_ON)
1275 memset(reinterpret_cast<void*>(slot_start + current_slot_size),
1276 internal::kUninitializedByte, recommit_slot_size_growth);
1277 #endif
1278 } else {
1279 // We can't perform the realloc in-place.
1280 // TODO: support this too when possible.
1281 return false;
1282 }
1283
1284 DecreaseTotalSizeOfAllocatedBytes(reinterpret_cast<uintptr_t>(slot_span),
1285 slot_span->bucket->slot_size);
1286 slot_span->SetRawSize(raw_size);
1287 slot_span->bucket->slot_size = new_slot_size;
1288 IncreaseTotalSizeOfAllocatedBytes(reinterpret_cast<uintptr_t>(slot_span),
1289 slot_span->bucket->slot_size, raw_size);
1290
1291 // Always record in-place realloc() as free()+malloc() pair.
1292 //
1293 // The early returns above (`return false`) will fall back to free()+malloc(),
1294 // so this is consistent.
1295 auto* thread_cache = GetOrCreateThreadCache();
1296 if (ThreadCache::IsValid(thread_cache)) {
1297 thread_cache->RecordDeallocation(current_usable_size);
1298 thread_cache->RecordAllocation(GetSlotUsableSize(slot_span));
1299 }
1300
1301 // Write a new trailing cookie.
1302 if (settings.use_cookie) {
1303 auto* object = static_cast<unsigned char*>(SlotStartToObject(slot_start));
1304 internal::PartitionCookieWriteValue(object + GetSlotUsableSize(slot_span));
1305 }
1306
1307 return true;
1308 }
1309
TryReallocInPlaceForNormalBuckets(void * object,SlotSpan * slot_span,size_t new_size)1310 bool PartitionRoot::TryReallocInPlaceForNormalBuckets(void* object,
1311 SlotSpan* slot_span,
1312 size_t new_size) {
1313 uintptr_t slot_start = ObjectToSlotStart(object);
1314 PA_DCHECK(internal::IsManagedByNormalBuckets(slot_start));
1315
1316 // TODO: note that tcmalloc will "ignore" a downsizing realloc() unless the
1317 // new size is a significant percentage smaller. We could do the same if we
1318 // determine it is a win.
1319 if (AllocationCapacityFromRequestedSize(new_size) !=
1320 AllocationCapacityFromSlotStart(slot_start)) {
1321 return false;
1322 }
1323 size_t current_usable_size = GetSlotUsableSize(slot_span);
1324
1325 // Trying to allocate |new_size| would use the same amount of underlying
1326 // memory as we're already using, so re-use the allocation after updating
1327 // statistics (and cookie, if present).
1328 if (slot_span->CanStoreRawSize()) {
1329 #if BUILDFLAG(PUT_REF_COUNT_IN_PREVIOUS_SLOT) && BUILDFLAG(PA_DCHECK_IS_ON)
1330 internal::PartitionRefCount* old_ref_count = nullptr;
1331 if (brp_enabled()) {
1332 old_ref_count = internal::PartitionRefCountPointer(slot_start);
1333 }
1334 #endif // BUILDFLAG(PUT_REF_COUNT_IN_PREVIOUS_SLOT) &&
1335 // BUILDFLAG(PA_DCHECK_IS_ON)
1336 size_t new_raw_size = AdjustSizeForExtrasAdd(new_size);
1337 slot_span->SetRawSize(new_raw_size);
1338 #if BUILDFLAG(PUT_REF_COUNT_IN_PREVIOUS_SLOT) && BUILDFLAG(PA_DCHECK_IS_ON)
1339 if (brp_enabled()) {
1340 internal::PartitionRefCount* new_ref_count =
1341 internal::PartitionRefCountPointer(slot_start);
1342 PA_DCHECK(new_ref_count == old_ref_count);
1343 }
1344 #endif // BUILDFLAG(PUT_REF_COUNT_IN_PREVIOUS_SLOT) &&
1345 // BUILDFLAG(PA_DCHECK_IS_ON)
1346 // Write a new trailing cookie only when it is possible to keep track
1347 // raw size (otherwise we wouldn't know where to look for it later).
1348 if (settings.use_cookie) {
1349 internal::PartitionCookieWriteValue(static_cast<unsigned char*>(object) +
1350 GetSlotUsableSize(slot_span));
1351 }
1352 }
1353
1354 // Always record a realloc() as a free() + malloc(), even if it's in
1355 // place. When we cannot do it in place (`return false` above), the allocator
1356 // falls back to free()+malloc(), so this is consistent.
1357 ThreadCache* thread_cache = GetOrCreateThreadCache();
1358 if (PA_LIKELY(ThreadCache::IsValid(thread_cache))) {
1359 thread_cache->RecordDeallocation(current_usable_size);
1360 thread_cache->RecordAllocation(GetSlotUsableSize(slot_span));
1361 }
1362
1363 return object;
1364 }
1365
PurgeMemory(int flags)1366 void PartitionRoot::PurgeMemory(int flags) {
1367 {
1368 ::partition_alloc::internal::ScopedGuard guard{
1369 internal::PartitionRootLock(this)};
1370 #if BUILDFLAG(USE_STARSCAN)
1371 // Avoid purging if there is PCScan task currently scheduled. Since pcscan
1372 // takes snapshot of all allocated pages, decommitting pages here (even
1373 // under the lock) is racy.
1374 // TODO(bikineev): Consider rescheduling the purging after PCScan.
1375 if (PCScan::IsInProgress()) {
1376 return;
1377 }
1378 #endif // BUILDFLAG(USE_STARSCAN)
1379
1380 if (flags & PurgeFlags::kDecommitEmptySlotSpans) {
1381 DecommitEmptySlotSpans();
1382 }
1383 if (flags & PurgeFlags::kDiscardUnusedSystemPages) {
1384 for (Bucket& bucket : buckets) {
1385 if (bucket.slot_size == internal::kInvalidBucketSize) {
1386 continue;
1387 }
1388
1389 if (bucket.slot_size >= internal::MinPurgeableSlotSize()) {
1390 internal::PartitionPurgeBucket(this, &bucket);
1391 } else {
1392 if (sort_smaller_slot_span_free_lists_) {
1393 bucket.SortSmallerSlotSpanFreeLists();
1394 }
1395 }
1396
1397 // Do it at the end, as the actions above change the status of slot
1398 // spans (e.g. empty -> decommitted).
1399 bucket.MaintainActiveList();
1400
1401 if (sort_active_slot_spans_) {
1402 bucket.SortActiveSlotSpans();
1403 }
1404 }
1405 }
1406 }
1407 }
1408
ShrinkEmptySlotSpansRing(size_t limit)1409 void PartitionRoot::ShrinkEmptySlotSpansRing(size_t limit) {
1410 int16_t index = global_empty_slot_span_ring_index;
1411 int16_t starting_index = index;
1412 while (empty_slot_spans_dirty_bytes > limit) {
1413 SlotSpan* slot_span = global_empty_slot_span_ring[index];
1414 // The ring is not always full, may be nullptr.
1415 if (slot_span) {
1416 slot_span->DecommitIfPossible(this);
1417 global_empty_slot_span_ring[index] = nullptr;
1418 }
1419 index += 1;
1420 // Walk through the entirety of possible slots, even though the last ones
1421 // are unused, if global_empty_slot_span_ring_size is smaller than
1422 // kMaxFreeableSpans. It's simpler, and does not cost anything, since all
1423 // the pointers are going to be nullptr.
1424 if (index == internal::kMaxFreeableSpans) {
1425 index = 0;
1426 }
1427
1428 // Went around the whole ring, since this is locked,
1429 // empty_slot_spans_dirty_bytes should be exactly 0.
1430 if (index == starting_index) {
1431 PA_DCHECK(empty_slot_spans_dirty_bytes == 0);
1432 // Metrics issue, don't crash, return.
1433 break;
1434 }
1435 }
1436 }
1437
DumpStats(const char * partition_name,bool is_light_dump,PartitionStatsDumper * dumper)1438 void PartitionRoot::DumpStats(const char* partition_name,
1439 bool is_light_dump,
1440 PartitionStatsDumper* dumper) {
1441 static const size_t kMaxReportableDirectMaps = 4096;
1442 // Allocate on the heap rather than on the stack to avoid stack overflow
1443 // skirmishes (on Windows, in particular). Allocate before locking below,
1444 // otherwise when PartitionAlloc is malloc() we get reentrancy issues. This
1445 // inflates reported values a bit for detailed dumps though, by 16kiB.
1446 std::unique_ptr<uint32_t[]> direct_map_lengths;
1447 if (!is_light_dump) {
1448 direct_map_lengths =
1449 std::unique_ptr<uint32_t[]>(new uint32_t[kMaxReportableDirectMaps]);
1450 }
1451 PartitionBucketMemoryStats bucket_stats[internal::kNumBuckets];
1452 size_t num_direct_mapped_allocations = 0;
1453 PartitionMemoryStats stats = {};
1454
1455 stats.syscall_count = syscall_count.load(std::memory_order_relaxed);
1456 stats.syscall_total_time_ns =
1457 syscall_total_time_ns.load(std::memory_order_relaxed);
1458
1459 // Collect data with the lock held, cannot allocate or call third-party code
1460 // below.
1461 {
1462 ::partition_alloc::internal::ScopedGuard guard{
1463 internal::PartitionRootLock(this)};
1464 PA_DCHECK(total_size_of_allocated_bytes <= max_size_of_allocated_bytes);
1465
1466 stats.total_mmapped_bytes =
1467 total_size_of_super_pages.load(std::memory_order_relaxed) +
1468 total_size_of_direct_mapped_pages.load(std::memory_order_relaxed);
1469 stats.total_committed_bytes =
1470 total_size_of_committed_pages.load(std::memory_order_relaxed);
1471 stats.max_committed_bytes =
1472 max_size_of_committed_pages.load(std::memory_order_relaxed);
1473 stats.total_allocated_bytes = total_size_of_allocated_bytes;
1474 stats.max_allocated_bytes = max_size_of_allocated_bytes;
1475 #if BUILDFLAG(ENABLE_BACKUP_REF_PTR_SUPPORT)
1476 stats.total_brp_quarantined_bytes =
1477 total_size_of_brp_quarantined_bytes.load(std::memory_order_relaxed);
1478 stats.total_brp_quarantined_count =
1479 total_count_of_brp_quarantined_slots.load(std::memory_order_relaxed);
1480 stats.cumulative_brp_quarantined_bytes =
1481 cumulative_size_of_brp_quarantined_bytes.load(
1482 std::memory_order_relaxed);
1483 stats.cumulative_brp_quarantined_count =
1484 cumulative_count_of_brp_quarantined_slots.load(
1485 std::memory_order_relaxed);
1486 #endif
1487
1488 size_t direct_mapped_allocations_total_size = 0;
1489 for (size_t i = 0; i < internal::kNumBuckets; ++i) {
1490 const Bucket* bucket = &bucket_at(i);
1491 // Don't report the pseudo buckets that the generic allocator sets up in
1492 // order to preserve a fast size->bucket map (see
1493 // PartitionRoot::Init() for details).
1494 if (!bucket->is_valid()) {
1495 bucket_stats[i].is_valid = false;
1496 } else {
1497 internal::PartitionDumpBucketStats(&bucket_stats[i], this, bucket);
1498 }
1499 if (bucket_stats[i].is_valid) {
1500 stats.total_resident_bytes += bucket_stats[i].resident_bytes;
1501 stats.total_active_bytes += bucket_stats[i].active_bytes;
1502 stats.total_active_count += bucket_stats[i].active_count;
1503 stats.total_decommittable_bytes += bucket_stats[i].decommittable_bytes;
1504 stats.total_discardable_bytes += bucket_stats[i].discardable_bytes;
1505 }
1506 }
1507
1508 for (DirectMapExtent* extent = direct_map_list;
1509 extent && num_direct_mapped_allocations < kMaxReportableDirectMaps;
1510 extent = extent->next_extent, ++num_direct_mapped_allocations) {
1511 PA_DCHECK(!extent->next_extent ||
1512 extent->next_extent->prev_extent == extent);
1513 size_t slot_size = extent->bucket->slot_size;
1514 direct_mapped_allocations_total_size += slot_size;
1515 if (is_light_dump) {
1516 continue;
1517 }
1518 direct_map_lengths[num_direct_mapped_allocations] = slot_size;
1519 }
1520
1521 stats.total_resident_bytes += direct_mapped_allocations_total_size;
1522 stats.total_active_bytes += direct_mapped_allocations_total_size;
1523 stats.total_active_count += num_direct_mapped_allocations;
1524
1525 stats.has_thread_cache = settings.with_thread_cache;
1526 if (stats.has_thread_cache) {
1527 ThreadCacheRegistry::Instance().DumpStats(
1528 true, &stats.current_thread_cache_stats);
1529 ThreadCacheRegistry::Instance().DumpStats(false,
1530 &stats.all_thread_caches_stats);
1531 }
1532 }
1533
1534 // Do not hold the lock when calling |dumper|, as it may allocate.
1535 if (!is_light_dump) {
1536 for (auto& stat : bucket_stats) {
1537 if (stat.is_valid) {
1538 dumper->PartitionsDumpBucketStats(partition_name, &stat);
1539 }
1540 }
1541
1542 for (size_t i = 0; i < num_direct_mapped_allocations; ++i) {
1543 uint32_t size = direct_map_lengths[i];
1544
1545 PartitionBucketMemoryStats mapped_stats = {};
1546 mapped_stats.is_valid = true;
1547 mapped_stats.is_direct_map = true;
1548 mapped_stats.num_full_slot_spans = 1;
1549 mapped_stats.allocated_slot_span_size = size;
1550 mapped_stats.bucket_slot_size = size;
1551 mapped_stats.active_bytes = size;
1552 mapped_stats.active_count = 1;
1553 mapped_stats.resident_bytes = size;
1554 dumper->PartitionsDumpBucketStats(partition_name, &mapped_stats);
1555 }
1556 }
1557 dumper->PartitionDumpTotals(partition_name, &stats);
1558 }
1559
1560 // static
DeleteForTesting(PartitionRoot * partition_root)1561 void PartitionRoot::DeleteForTesting(PartitionRoot* partition_root) {
1562 if (partition_root->settings.with_thread_cache) {
1563 ThreadCache::SwapForTesting(nullptr);
1564 partition_root->settings.with_thread_cache = false;
1565 }
1566
1567 partition_root->DestructForTesting(); // IN-TEST
1568
1569 delete partition_root;
1570 }
1571
ResetForTesting(bool allow_leaks)1572 void PartitionRoot::ResetForTesting(bool allow_leaks) {
1573 if (settings.with_thread_cache) {
1574 ThreadCache::SwapForTesting(nullptr);
1575 settings.with_thread_cache = false;
1576 }
1577
1578 ::partition_alloc::internal::ScopedGuard guard{
1579 internal::PartitionRootLock(this)};
1580
1581 #if BUILDFLAG(PA_DCHECK_IS_ON)
1582 if (!allow_leaks) {
1583 unsigned num_allocated_slots = 0;
1584 for (Bucket& bucket : buckets) {
1585 if (bucket.active_slot_spans_head !=
1586 internal::SlotSpanMetadata::get_sentinel_slot_span()) {
1587 for (internal::SlotSpanMetadata* slot_span =
1588 bucket.active_slot_spans_head;
1589 slot_span; slot_span = slot_span->next_slot_span) {
1590 num_allocated_slots += slot_span->num_allocated_slots;
1591 }
1592 }
1593 // Full slot spans are nowhere. Need to see bucket.num_full_slot_spans
1594 // to count the number of full slot spans' slots.
1595 if (bucket.num_full_slot_spans) {
1596 num_allocated_slots +=
1597 bucket.num_full_slot_spans * bucket.get_slots_per_span();
1598 }
1599 }
1600 PA_DCHECK(num_allocated_slots == 0);
1601
1602 // Check for direct-mapped allocations.
1603 PA_DCHECK(!direct_map_list);
1604 }
1605 #endif
1606
1607 DestructForTesting(); // IN-TEST
1608
1609 #if PA_CONFIG(USE_PARTITION_ROOT_ENUMERATOR)
1610 if (initialized) {
1611 internal::PartitionRootEnumerator::Instance().Unregister(this);
1612 }
1613 #endif // PA_CONFIG(USE_PARTITION_ROOT_ENUMERATOR)
1614
1615 for (Bucket& bucket : buckets) {
1616 bucket.active_slot_spans_head =
1617 SlotSpan::get_sentinel_slot_span_non_const();
1618 bucket.empty_slot_spans_head = nullptr;
1619 bucket.decommitted_slot_spans_head = nullptr;
1620 bucket.num_full_slot_spans = 0;
1621 }
1622
1623 next_super_page = 0;
1624 next_partition_page = 0;
1625 next_partition_page_end = 0;
1626 current_extent = nullptr;
1627 first_extent = nullptr;
1628
1629 direct_map_list = nullptr;
1630 for (auto*& entity : global_empty_slot_span_ring) {
1631 entity = nullptr;
1632 }
1633
1634 global_empty_slot_span_ring_index = 0;
1635 global_empty_slot_span_ring_size = internal::kDefaultEmptySlotSpanRingSize;
1636 initialized = false;
1637 }
1638
ResetBookkeepingForTesting()1639 void PartitionRoot::ResetBookkeepingForTesting() {
1640 ::partition_alloc::internal::ScopedGuard guard{
1641 internal::PartitionRootLock(this)};
1642 max_size_of_allocated_bytes = total_size_of_allocated_bytes;
1643 max_size_of_committed_pages.store(total_size_of_committed_pages);
1644 }
1645
MaybeInitThreadCache()1646 ThreadCache* PartitionRoot::MaybeInitThreadCache() {
1647 auto* tcache = ThreadCache::Get();
1648 // See comment in `EnableThreadCacheIfSupport()` for why this is an acquire
1649 // load.
1650 if (ThreadCache::IsTombstone(tcache) ||
1651 thread_caches_being_constructed_.load(std::memory_order_acquire)) {
1652 // Two cases:
1653 // 1. Thread is being terminated, don't try to use the thread cache, and
1654 // don't try to resurrect it.
1655 // 2. Someone, somewhere is currently allocating a thread cache. This may
1656 // be us, in which case we are re-entering and should not create a thread
1657 // cache. If it is not us, then this merely delays thread cache
1658 // construction a bit, which is not an issue.
1659 return nullptr;
1660 }
1661
1662 // There is no per-thread ThreadCache allocated here yet, and this partition
1663 // has a thread cache, allocate a new one.
1664 //
1665 // The thread cache allocation itself will not reenter here, as it sidesteps
1666 // the thread cache by using placement new and |RawAlloc()|. However,
1667 // internally to libc, allocations may happen to create a new TLS
1668 // variable. This would end up here again, which is not what we want (and
1669 // likely is not supported by libc).
1670 //
1671 // To avoid this sort of reentrancy, increase the count of thread caches that
1672 // are currently allocating a thread cache.
1673 //
1674 // Note that there is no deadlock or data inconsistency concern, since we do
1675 // not hold the lock, and has such haven't touched any internal data.
1676 int before =
1677 thread_caches_being_constructed_.fetch_add(1, std::memory_order_relaxed);
1678 PA_CHECK(before < std::numeric_limits<int>::max());
1679 tcache = ThreadCache::Create(this);
1680 thread_caches_being_constructed_.fetch_sub(1, std::memory_order_relaxed);
1681
1682 return tcache;
1683 }
1684
1685 // static
SetStraightenLargerSlotSpanFreeListsMode(StraightenLargerSlotSpanFreeListsMode new_value)1686 void PartitionRoot::SetStraightenLargerSlotSpanFreeListsMode(
1687 StraightenLargerSlotSpanFreeListsMode new_value) {
1688 straighten_larger_slot_span_free_lists_ = new_value;
1689 }
1690
1691 // static
SetSortSmallerSlotSpanFreeListsEnabled(bool new_value)1692 void PartitionRoot::SetSortSmallerSlotSpanFreeListsEnabled(bool new_value) {
1693 sort_smaller_slot_span_free_lists_ = new_value;
1694 }
1695
1696 // static
SetSortActiveSlotSpansEnabled(bool new_value)1697 void PartitionRoot::SetSortActiveSlotSpansEnabled(bool new_value) {
1698 sort_active_slot_spans_ = new_value;
1699 }
1700
1701 // Explicitly define common template instantiations to reduce compile time.
1702 #define EXPORT_TEMPLATE \
1703 template PA_EXPORT_TEMPLATE_DEFINE(PA_COMPONENT_EXPORT(PARTITION_ALLOC))
1704 EXPORT_TEMPLATE void* PartitionRoot::Alloc<AllocFlags::kNone>(size_t,
1705 const char*);
1706 EXPORT_TEMPLATE void* PartitionRoot::Alloc<AllocFlags::kReturnNull>(
1707 size_t,
1708 const char*);
1709 EXPORT_TEMPLATE void*
1710 PartitionRoot::Realloc<AllocFlags::kNone, FreeFlags::kNone>(void*,
1711 size_t,
1712 const char*);
1713 EXPORT_TEMPLATE void*
1714 PartitionRoot::Realloc<AllocFlags::kReturnNull, FreeFlags::kNone>(void*,
1715 size_t,
1716 const char*);
1717 EXPORT_TEMPLATE void* PartitionRoot::AlignedAlloc<AllocFlags::kNone>(size_t,
1718 size_t);
1719 #undef EXPORT_TEMPLATE
1720
1721 // TODO(https://crbug.com/1500662) Stop ignoring the -Winvalid-offsetof warning.
1722 #if defined(__clang__)
1723 #pragma clang diagnostic push
1724 #pragma clang diagnostic ignored "-Winvalid-offsetof"
1725 #endif
1726 static_assert(offsetof(PartitionRoot, sentinel_bucket) ==
1727 offsetof(PartitionRoot, buckets) +
1728 internal::kNumBuckets * sizeof(PartitionRoot::Bucket),
1729 "sentinel_bucket must be just after the regular buckets.");
1730
1731 static_assert(
1732 offsetof(PartitionRoot, lock_) >= 64,
1733 "The lock should not be on the same cacheline as the read-mostly flags");
1734 #if defined(__clang__)
1735 #pragma clang diagnostic pop
1736 #endif
1737
1738 } // namespace partition_alloc
1739