• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- primary64.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef SCUDO_PRIMARY64_H_
10 #define SCUDO_PRIMARY64_H_
11 
12 #include "allocator_common.h"
13 #include "bytemap.h"
14 #include "common.h"
15 #include "condition_variable.h"
16 #include "list.h"
17 #include "local_cache.h"
18 #include "mem_map.h"
19 #include "memtag.h"
20 #include "options.h"
21 #include "release.h"
22 #include "stats.h"
23 #include "string_utils.h"
24 #include "thread_annotations.h"
25 
26 namespace scudo {
27 
28 // SizeClassAllocator64 is an allocator tuned for 64-bit address space.
29 //
30 // It starts by reserving NumClasses * 2^RegionSizeLog bytes, equally divided in
31 // Regions, specific to each size class. Note that the base of that mapping is
32 // random (based to the platform specific map() capabilities). If
33 // PrimaryEnableRandomOffset is set, each Region actually starts at a random
34 // offset from its base.
35 //
36 // Regions are mapped incrementally on demand to fulfill allocation requests,
37 // those mappings being split into equally sized Blocks based on the size class
38 // they belong to. The Blocks created are shuffled to prevent predictable
39 // address patterns (the predictability increases with the size of the Blocks).
40 //
41 // The 1st Region (for size class 0) holds the TransferBatches. This is a
42 // structure used to transfer arrays of available pointers from the class size
43 // freelist to the thread specific freelist, and back.
44 //
45 // The memory used by this allocator is never unmapped, but can be partially
46 // released if the platform allows for it.
47 
48 template <typename Config> class SizeClassAllocator64 {
49 public:
50   typedef typename Config::CompactPtrT CompactPtrT;
51   typedef typename Config::SizeClassMap SizeClassMap;
52   typedef typename Config::ConditionVariableT ConditionVariableT;
53   static const uptr CompactPtrScale = Config::getCompactPtrScale();
54   static const uptr RegionSizeLog = Config::getRegionSizeLog();
55   static const uptr GroupSizeLog = Config::getGroupSizeLog();
56   static_assert(RegionSizeLog >= GroupSizeLog,
57                 "Group size shouldn't be greater than the region size");
58   static const uptr GroupScale = GroupSizeLog - CompactPtrScale;
59   typedef SizeClassAllocator64<Config> ThisT;
60   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
61   typedef TransferBatch<ThisT> TransferBatchT;
62   typedef BatchGroup<ThisT> BatchGroupT;
63 
64   // BachClass is used to store internal metadata so it needs to be at least as
65   // large as the largest data structure.
getSizeByClassId(uptr ClassId)66   static uptr getSizeByClassId(uptr ClassId) {
67     return (ClassId == SizeClassMap::BatchClassId)
68                ? roundUp(Max(sizeof(TransferBatchT), sizeof(BatchGroupT)),
69                          1U << CompactPtrScale)
70                : SizeClassMap::getSizeByClassId(ClassId);
71   }
72 
canAllocate(uptr Size)73   static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; }
74 
conditionVariableEnabled()75   static bool conditionVariableEnabled() {
76     return Config::hasConditionVariableT();
77   }
78 
init(s32 ReleaseToOsInterval)79   void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS {
80     DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT)));
81 
82     const uptr PageSize = getPageSizeCached();
83     const uptr GroupSize = (1UL << GroupSizeLog);
84     const uptr PagesInGroup = GroupSize / PageSize;
85     const uptr MinSizeClass = getSizeByClassId(1);
86     // When trying to release pages back to memory, visiting smaller size
87     // classes is expensive. Therefore, we only try to release smaller size
88     // classes when the amount of free blocks goes over a certain threshold (See
89     // the comment in releaseToOSMaybe() for more details). For example, for
90     // size class 32, we only do the release when the size of free blocks is
91     // greater than 97% of pages in a group. However, this may introduce another
92     // issue that if the number of free blocks is bouncing between 97% ~ 100%.
93     // Which means we may try many page releases but only release very few of
94     // them (less than 3% in a group). Even though we have
95     // `&ReleaseToOsIntervalMs` which slightly reduce the frequency of these
96     // calls but it will be better to have another guard to mitigate this issue.
97     //
98     // Here we add another constraint on the minimum size requirement. The
99     // constraint is determined by the size of in-use blocks in the minimal size
100     // class. Take size class 32 as an example,
101     //
102     //   +-     one memory group      -+
103     //   +----------------------+------+
104     //   |  97% of free blocks  |      |
105     //   +----------------------+------+
106     //                           \    /
107     //                      3% in-use blocks
108     //
109     //   * The release size threshold is 97%.
110     //
111     // The 3% size in a group is about 7 pages. For two consecutive
112     // releaseToOSMaybe(), we require the difference between `PushedBlocks`
113     // should be greater than 7 pages. This mitigates the page releasing
114     // thrashing which is caused by memory usage bouncing around the threshold.
115     // The smallest size class takes longest time to do the page release so we
116     // use its size of in-use blocks as a heuristic.
117     SmallerBlockReleasePageDelta =
118         PagesInGroup * (1 + MinSizeClass / 16U) / 100;
119 
120     u32 Seed;
121     const u64 Time = getMonotonicTimeFast();
122     if (!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed)))
123       Seed = static_cast<u32>(Time ^ (reinterpret_cast<uptr>(&Seed) >> 12));
124 
125     for (uptr I = 0; I < NumClasses; I++)
126       getRegionInfo(I)->RandState = getRandomU32(&Seed);
127 
128     if (Config::getEnableContiguousRegions()) {
129       ReservedMemoryT ReservedMemory = {};
130       // Reserve the space required for the Primary.
131       CHECK(ReservedMemory.create(/*Addr=*/0U, RegionSize * NumClasses,
132                                   "scudo:primary_reserve"));
133       const uptr PrimaryBase = ReservedMemory.getBase();
134 
135       for (uptr I = 0; I < NumClasses; I++) {
136         MemMapT RegionMemMap = ReservedMemory.dispatch(
137             PrimaryBase + (I << RegionSizeLog), RegionSize);
138         RegionInfo *Region = getRegionInfo(I);
139 
140         initRegion(Region, I, RegionMemMap, Config::getEnableRandomOffset());
141       }
142       shuffle(RegionInfoArray, NumClasses, &Seed);
143     }
144 
145     // The binding should be done after region shuffling so that it won't bind
146     // the FLLock from the wrong region.
147     for (uptr I = 0; I < NumClasses; I++)
148       getRegionInfo(I)->FLLockCV.bindTestOnly(getRegionInfo(I)->FLLock);
149 
150     // The default value in the primary config has the higher priority.
151     if (Config::getDefaultReleaseToOsIntervalMs() != INT32_MIN)
152       ReleaseToOsInterval = Config::getDefaultReleaseToOsIntervalMs();
153     setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
154   }
155 
unmapTestOnly()156   void unmapTestOnly() {
157     for (uptr I = 0; I < NumClasses; I++) {
158       RegionInfo *Region = getRegionInfo(I);
159       {
160         ScopedLock ML(Region->MMLock);
161         MemMapT MemMap = Region->MemMapInfo.MemMap;
162         if (MemMap.isAllocated())
163           MemMap.unmap();
164       }
165       *Region = {};
166     }
167   }
168 
169   // When all blocks are freed, it has to be the same size as `AllocatedUser`.
verifyAllBlocksAreReleasedTestOnly()170   void verifyAllBlocksAreReleasedTestOnly() {
171     // `BatchGroup` and `TransferBatch` also use the blocks from BatchClass.
172     uptr BatchClassUsedInFreeLists = 0;
173     for (uptr I = 0; I < NumClasses; I++) {
174       // We have to count BatchClassUsedInFreeLists in other regions first.
175       if (I == SizeClassMap::BatchClassId)
176         continue;
177       RegionInfo *Region = getRegionInfo(I);
178       ScopedLock ML(Region->MMLock);
179       ScopedLock FL(Region->FLLock);
180       const uptr BlockSize = getSizeByClassId(I);
181       uptr TotalBlocks = 0;
182       for (BatchGroupT &BG : Region->FreeListInfo.BlockList) {
183         // `BG::Batches` are `TransferBatches`. +1 for `BatchGroup`.
184         BatchClassUsedInFreeLists += BG.Batches.size() + 1;
185         for (const auto &It : BG.Batches)
186           TotalBlocks += It.getCount();
187       }
188 
189       DCHECK_EQ(TotalBlocks, Region->MemMapInfo.AllocatedUser / BlockSize);
190       DCHECK_EQ(Region->FreeListInfo.PushedBlocks,
191                 Region->FreeListInfo.PoppedBlocks);
192     }
193 
194     RegionInfo *Region = getRegionInfo(SizeClassMap::BatchClassId);
195     ScopedLock ML(Region->MMLock);
196     ScopedLock FL(Region->FLLock);
197     const uptr BlockSize = getSizeByClassId(SizeClassMap::BatchClassId);
198     uptr TotalBlocks = 0;
199     for (BatchGroupT &BG : Region->FreeListInfo.BlockList) {
200       if (LIKELY(!BG.Batches.empty())) {
201         for (const auto &It : BG.Batches)
202           TotalBlocks += It.getCount();
203       } else {
204         // `BatchGroup` with empty freelist doesn't have `TransferBatch` record
205         // itself.
206         ++TotalBlocks;
207       }
208     }
209     DCHECK_EQ(TotalBlocks + BatchClassUsedInFreeLists,
210               Region->MemMapInfo.AllocatedUser / BlockSize);
211     DCHECK_GE(Region->FreeListInfo.PoppedBlocks,
212               Region->FreeListInfo.PushedBlocks);
213     const uptr BlocksInUse =
214         Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks;
215     DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists);
216   }
217 
popBlocks(CacheT * C,uptr ClassId,CompactPtrT * ToArray,const u16 MaxBlockCount)218   u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray,
219                 const u16 MaxBlockCount) {
220     DCHECK_LT(ClassId, NumClasses);
221     RegionInfo *Region = getRegionInfo(ClassId);
222     u16 PopCount = 0;
223 
224     {
225       ScopedLock L(Region->FLLock);
226       PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
227       if (PopCount != 0U)
228         return PopCount;
229     }
230 
231     bool ReportRegionExhausted = false;
232 
233     if (conditionVariableEnabled()) {
234       PopCount = popBlocksWithCV(C, ClassId, Region, ToArray, MaxBlockCount,
235                                  ReportRegionExhausted);
236     } else {
237       while (true) {
238         // When two threads compete for `Region->MMLock`, we only want one of
239         // them to call populateFreeListAndPopBlocks(). To avoid both of them
240         // doing that, always check the freelist before mapping new pages.
241         ScopedLock ML(Region->MMLock);
242         {
243           ScopedLock FL(Region->FLLock);
244           PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
245           if (PopCount != 0U)
246             return PopCount;
247         }
248 
249         const bool RegionIsExhausted = Region->Exhausted;
250         if (!RegionIsExhausted) {
251           PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray,
252                                                   MaxBlockCount);
253         }
254         ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted;
255         break;
256       }
257     }
258 
259     if (UNLIKELY(ReportRegionExhausted)) {
260       Printf("Can't populate more pages for size class %zu.\n",
261              getSizeByClassId(ClassId));
262 
263       // Theoretically, BatchClass shouldn't be used up. Abort immediately  when
264       // it happens.
265       if (ClassId == SizeClassMap::BatchClassId)
266         reportOutOfBatchClass();
267     }
268 
269     return PopCount;
270   }
271 
272   // Push the array of free blocks to the designated batch group.
pushBlocks(CacheT * C,uptr ClassId,CompactPtrT * Array,u32 Size)273   void pushBlocks(CacheT *C, uptr ClassId, CompactPtrT *Array, u32 Size) {
274     DCHECK_LT(ClassId, NumClasses);
275     DCHECK_GT(Size, 0);
276 
277     RegionInfo *Region = getRegionInfo(ClassId);
278     if (ClassId == SizeClassMap::BatchClassId) {
279       ScopedLock L(Region->FLLock);
280       pushBatchClassBlocks(Region, Array, Size);
281       if (conditionVariableEnabled())
282         Region->FLLockCV.notifyAll(Region->FLLock);
283       return;
284     }
285 
286     // TODO(chiahungduan): Consider not doing grouping if the group size is not
287     // greater than the block size with a certain scale.
288 
289     bool SameGroup = true;
290     if (GroupSizeLog < RegionSizeLog) {
291       // Sort the blocks so that blocks belonging to the same group can be
292       // pushed together.
293       for (u32 I = 1; I < Size; ++I) {
294         if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I]))
295           SameGroup = false;
296         CompactPtrT Cur = Array[I];
297         u32 J = I;
298         while (J > 0 && compactPtrGroup(Cur) < compactPtrGroup(Array[J - 1])) {
299           Array[J] = Array[J - 1];
300           --J;
301         }
302         Array[J] = Cur;
303       }
304     }
305 
306     {
307       ScopedLock L(Region->FLLock);
308       pushBlocksImpl(C, ClassId, Region, Array, Size, SameGroup);
309       if (conditionVariableEnabled())
310         Region->FLLockCV.notifyAll(Region->FLLock);
311     }
312   }
313 
disable()314   void disable() NO_THREAD_SAFETY_ANALYSIS {
315     // The BatchClassId must be locked last since other classes can use it.
316     for (sptr I = static_cast<sptr>(NumClasses) - 1; I >= 0; I--) {
317       if (static_cast<uptr>(I) == SizeClassMap::BatchClassId)
318         continue;
319       getRegionInfo(static_cast<uptr>(I))->MMLock.lock();
320       getRegionInfo(static_cast<uptr>(I))->FLLock.lock();
321     }
322     getRegionInfo(SizeClassMap::BatchClassId)->MMLock.lock();
323     getRegionInfo(SizeClassMap::BatchClassId)->FLLock.lock();
324   }
325 
enable()326   void enable() NO_THREAD_SAFETY_ANALYSIS {
327     getRegionInfo(SizeClassMap::BatchClassId)->FLLock.unlock();
328     getRegionInfo(SizeClassMap::BatchClassId)->MMLock.unlock();
329     for (uptr I = 0; I < NumClasses; I++) {
330       if (I == SizeClassMap::BatchClassId)
331         continue;
332       getRegionInfo(I)->FLLock.unlock();
333       getRegionInfo(I)->MMLock.unlock();
334     }
335   }
336 
iterateOverBlocks(F Callback)337   template <typename F> void iterateOverBlocks(F Callback) {
338     for (uptr I = 0; I < NumClasses; I++) {
339       if (I == SizeClassMap::BatchClassId)
340         continue;
341       RegionInfo *Region = getRegionInfo(I);
342       // TODO: The call of `iterateOverBlocks` requires disabling
343       // SizeClassAllocator64. We may consider locking each region on demand
344       // only.
345       Region->FLLock.assertHeld();
346       Region->MMLock.assertHeld();
347       const uptr BlockSize = getSizeByClassId(I);
348       const uptr From = Region->RegionBeg;
349       const uptr To = From + Region->MemMapInfo.AllocatedUser;
350       for (uptr Block = From; Block < To; Block += BlockSize)
351         Callback(Block);
352     }
353   }
354 
getStats(ScopedString * Str)355   void getStats(ScopedString *Str) {
356     // TODO(kostyak): get the RSS per region.
357     uptr TotalMapped = 0;
358     uptr PoppedBlocks = 0;
359     uptr PushedBlocks = 0;
360     for (uptr I = 0; I < NumClasses; I++) {
361       RegionInfo *Region = getRegionInfo(I);
362       {
363         ScopedLock L(Region->MMLock);
364         TotalMapped += Region->MemMapInfo.MappedUser;
365       }
366       {
367         ScopedLock L(Region->FLLock);
368         PoppedBlocks += Region->FreeListInfo.PoppedBlocks;
369         PushedBlocks += Region->FreeListInfo.PushedBlocks;
370       }
371     }
372     const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs);
373     Str->append("Stats: SizeClassAllocator64: %zuM mapped (%uM rss) in %zu "
374                 "allocations; remains %zu; ReleaseToOsIntervalMs = %d\n",
375                 TotalMapped >> 20, 0U, PoppedBlocks,
376                 PoppedBlocks - PushedBlocks, IntervalMs >= 0 ? IntervalMs : -1);
377 
378     for (uptr I = 0; I < NumClasses; I++) {
379       RegionInfo *Region = getRegionInfo(I);
380       ScopedLock L1(Region->MMLock);
381       ScopedLock L2(Region->FLLock);
382       getStats(Str, I, Region);
383     }
384   }
385 
getFragmentationInfo(ScopedString * Str)386   void getFragmentationInfo(ScopedString *Str) {
387     Str->append(
388         "Fragmentation Stats: SizeClassAllocator64: page size = %zu bytes\n",
389         getPageSizeCached());
390 
391     for (uptr I = 1; I < NumClasses; I++) {
392       RegionInfo *Region = getRegionInfo(I);
393       ScopedLock L(Region->MMLock);
394       getRegionFragmentationInfo(Region, I, Str);
395     }
396   }
397 
getMemoryGroupFragmentationInfo(ScopedString * Str)398   void getMemoryGroupFragmentationInfo(ScopedString *Str) {
399     Str->append(
400         "Fragmentation Stats: SizeClassAllocator64: page size = %zu bytes\n",
401         getPageSizeCached());
402 
403     for (uptr I = 1; I < NumClasses; I++) {
404       RegionInfo *Region = getRegionInfo(I);
405       ScopedLock L(Region->MMLock);
406       getMemoryGroupFragmentationInfoInRegion(Region, I, Str);
407     }
408   }
409 
setOption(Option O,sptr Value)410   bool setOption(Option O, sptr Value) {
411     if (O == Option::ReleaseInterval) {
412       const s32 Interval = Max(
413           Min(static_cast<s32>(Value), Config::getMaxReleaseToOsIntervalMs()),
414           Config::getMinReleaseToOsIntervalMs());
415       atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval);
416       return true;
417     }
418     // Not supported by the Primary, but not an error either.
419     return true;
420   }
421 
tryReleaseToOS(uptr ClassId,ReleaseToOS ReleaseType)422   uptr tryReleaseToOS(uptr ClassId, ReleaseToOS ReleaseType) {
423     RegionInfo *Region = getRegionInfo(ClassId);
424     // Note that the tryLock() may fail spuriously, given that it should rarely
425     // happen and page releasing is fine to skip, we don't take certain
426     // approaches to ensure one page release is done.
427     if (Region->MMLock.tryLock()) {
428       uptr BytesReleased = releaseToOSMaybe(Region, ClassId, ReleaseType);
429       Region->MMLock.unlock();
430       return BytesReleased;
431     }
432     return 0;
433   }
434 
releaseToOS(ReleaseToOS ReleaseType)435   uptr releaseToOS(ReleaseToOS ReleaseType) {
436     uptr TotalReleasedBytes = 0;
437     for (uptr I = 0; I < NumClasses; I++) {
438       if (I == SizeClassMap::BatchClassId)
439         continue;
440       RegionInfo *Region = getRegionInfo(I);
441       ScopedLock L(Region->MMLock);
442       TotalReleasedBytes += releaseToOSMaybe(Region, I, ReleaseType);
443     }
444     return TotalReleasedBytes;
445   }
446 
getRegionInfoArrayAddress()447   const char *getRegionInfoArrayAddress() const {
448     return reinterpret_cast<const char *>(RegionInfoArray);
449   }
450 
getRegionInfoArraySize()451   static uptr getRegionInfoArraySize() { return sizeof(RegionInfoArray); }
452 
getCompactPtrBaseByClassId(uptr ClassId)453   uptr getCompactPtrBaseByClassId(uptr ClassId) {
454     return getRegionInfo(ClassId)->RegionBeg;
455   }
456 
compactPtr(uptr ClassId,uptr Ptr)457   CompactPtrT compactPtr(uptr ClassId, uptr Ptr) {
458     DCHECK_LE(ClassId, SizeClassMap::LargestClassId);
459     return compactPtrInternal(getCompactPtrBaseByClassId(ClassId), Ptr);
460   }
461 
decompactPtr(uptr ClassId,CompactPtrT CompactPtr)462   void *decompactPtr(uptr ClassId, CompactPtrT CompactPtr) {
463     DCHECK_LE(ClassId, SizeClassMap::LargestClassId);
464     return reinterpret_cast<void *>(
465         decompactPtrInternal(getCompactPtrBaseByClassId(ClassId), CompactPtr));
466   }
467 
findNearestBlock(const char * RegionInfoData,uptr Ptr)468   static BlockInfo findNearestBlock(const char *RegionInfoData,
469                                     uptr Ptr) NO_THREAD_SAFETY_ANALYSIS {
470     const RegionInfo *RegionInfoArray =
471         reinterpret_cast<const RegionInfo *>(RegionInfoData);
472 
473     uptr ClassId;
474     uptr MinDistance = -1UL;
475     for (uptr I = 0; I != NumClasses; ++I) {
476       if (I == SizeClassMap::BatchClassId)
477         continue;
478       uptr Begin = RegionInfoArray[I].RegionBeg;
479       // TODO(chiahungduan): In fact, We need to lock the RegionInfo::MMLock.
480       // However, the RegionInfoData is passed with const qualifier and lock the
481       // mutex requires modifying RegionInfoData, which means we need to remove
482       // the const qualifier. This may lead to another undefined behavior (The
483       // first one is accessing `AllocatedUser` without locking. It's better to
484       // pass `RegionInfoData` as `void *` then we can lock the mutex properly.
485       uptr End = Begin + RegionInfoArray[I].MemMapInfo.AllocatedUser;
486       if (Begin > End || End - Begin < SizeClassMap::getSizeByClassId(I))
487         continue;
488       uptr RegionDistance;
489       if (Begin <= Ptr) {
490         if (Ptr < End)
491           RegionDistance = 0;
492         else
493           RegionDistance = Ptr - End;
494       } else {
495         RegionDistance = Begin - Ptr;
496       }
497 
498       if (RegionDistance < MinDistance) {
499         MinDistance = RegionDistance;
500         ClassId = I;
501       }
502     }
503 
504     BlockInfo B = {};
505     if (MinDistance <= 8192) {
506       B.RegionBegin = RegionInfoArray[ClassId].RegionBeg;
507       B.RegionEnd =
508           B.RegionBegin + RegionInfoArray[ClassId].MemMapInfo.AllocatedUser;
509       B.BlockSize = SizeClassMap::getSizeByClassId(ClassId);
510       B.BlockBegin =
511           B.RegionBegin + uptr(sptr(Ptr - B.RegionBegin) / sptr(B.BlockSize) *
512                                sptr(B.BlockSize));
513       while (B.BlockBegin < B.RegionBegin)
514         B.BlockBegin += B.BlockSize;
515       while (B.RegionEnd < B.BlockBegin + B.BlockSize)
516         B.BlockBegin -= B.BlockSize;
517     }
518     return B;
519   }
520 
521   AtomicOptions Options;
522 
523 private:
524   static const uptr RegionSize = 1UL << RegionSizeLog;
525   static const uptr NumClasses = SizeClassMap::NumClasses;
526 
527   static const uptr MapSizeIncrement = Config::getMapSizeIncrement();
528   // Fill at most this number of batches from the newly map'd memory.
529   static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U;
530 
531   struct ReleaseToOsInfo {
532     uptr BytesInFreeListAtLastCheckpoint;
533     uptr NumReleasesAttempted;
534     uptr LastReleasedBytes;
535     // The minimum size of pushed blocks to trigger page release.
536     uptr TryReleaseThreshold;
537     // The number of bytes not triggering `releaseToOSMaybe()` because of
538     // the length of release interval.
539     uptr PendingPushedBytesDelta;
540     u64 LastReleaseAtNs;
541   };
542 
543   struct BlocksInfo {
544     SinglyLinkedList<BatchGroupT> BlockList = {};
545     uptr PoppedBlocks = 0;
546     uptr PushedBlocks = 0;
547   };
548 
549   struct PagesInfo {
550     MemMapT MemMap = {};
551     // Bytes mapped for user memory.
552     uptr MappedUser = 0;
553     // Bytes allocated for user memory.
554     uptr AllocatedUser = 0;
555   };
556 
557   struct UnpaddedRegionInfo {
558     // Mutex for operations on freelist
559     HybridMutex FLLock;
560     ConditionVariableT FLLockCV GUARDED_BY(FLLock);
561     // Mutex for memmap operations
562     HybridMutex MMLock ACQUIRED_BEFORE(FLLock);
563     // `RegionBeg` is initialized before thread creation and won't be changed.
564     uptr RegionBeg = 0;
565     u32 RandState GUARDED_BY(MMLock) = 0;
566     BlocksInfo FreeListInfo GUARDED_BY(FLLock);
567     PagesInfo MemMapInfo GUARDED_BY(MMLock);
568     ReleaseToOsInfo ReleaseInfo GUARDED_BY(MMLock) = {};
569     bool Exhausted GUARDED_BY(MMLock) = false;
570     bool isPopulatingFreeList GUARDED_BY(FLLock) = false;
571   };
572   struct RegionInfo : UnpaddedRegionInfo {
573     char Padding[SCUDO_CACHE_LINE_SIZE -
574                  (sizeof(UnpaddedRegionInfo) % SCUDO_CACHE_LINE_SIZE)] = {};
575   };
576   static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, "");
577 
getRegionInfo(uptr ClassId)578   RegionInfo *getRegionInfo(uptr ClassId) {
579     DCHECK_LT(ClassId, NumClasses);
580     return &RegionInfoArray[ClassId];
581   }
582 
getRegionBaseByClassId(uptr ClassId)583   uptr getRegionBaseByClassId(uptr ClassId) {
584     RegionInfo *Region = getRegionInfo(ClassId);
585     Region->MMLock.assertHeld();
586 
587     if (!Config::getEnableContiguousRegions() &&
588         !Region->MemMapInfo.MemMap.isAllocated()) {
589       return 0U;
590     }
591     return Region->MemMapInfo.MemMap.getBase();
592   }
593 
compactPtrInternal(uptr Base,uptr Ptr)594   static CompactPtrT compactPtrInternal(uptr Base, uptr Ptr) {
595     return static_cast<CompactPtrT>((Ptr - Base) >> CompactPtrScale);
596   }
597 
decompactPtrInternal(uptr Base,CompactPtrT CompactPtr)598   static uptr decompactPtrInternal(uptr Base, CompactPtrT CompactPtr) {
599     return Base + (static_cast<uptr>(CompactPtr) << CompactPtrScale);
600   }
601 
compactPtrGroup(CompactPtrT CompactPtr)602   static uptr compactPtrGroup(CompactPtrT CompactPtr) {
603     const uptr Mask = (static_cast<uptr>(1) << GroupScale) - 1;
604     return static_cast<uptr>(CompactPtr) & ~Mask;
605   }
decompactGroupBase(uptr Base,uptr CompactPtrGroupBase)606   static uptr decompactGroupBase(uptr Base, uptr CompactPtrGroupBase) {
607     DCHECK_EQ(CompactPtrGroupBase % (static_cast<uptr>(1) << (GroupScale)), 0U);
608     return Base + (CompactPtrGroupBase << CompactPtrScale);
609   }
610 
isSmallBlock(uptr BlockSize)611   ALWAYS_INLINE static bool isSmallBlock(uptr BlockSize) {
612     const uptr PageSize = getPageSizeCached();
613     return BlockSize < PageSize / 16U;
614   }
615 
getMinReleaseAttemptSize(uptr BlockSize)616   ALWAYS_INLINE uptr getMinReleaseAttemptSize(uptr BlockSize) {
617     return roundUp(BlockSize, getPageSizeCached());
618   }
619 
initRegion(RegionInfo * Region,uptr ClassId,MemMapT MemMap,bool EnableRandomOffset)620   ALWAYS_INLINE void initRegion(RegionInfo *Region, uptr ClassId,
621                                 MemMapT MemMap, bool EnableRandomOffset)
622       REQUIRES(Region->MMLock) {
623     DCHECK(!Region->MemMapInfo.MemMap.isAllocated());
624     DCHECK(MemMap.isAllocated());
625 
626     const uptr PageSize = getPageSizeCached();
627 
628     Region->MemMapInfo.MemMap = MemMap;
629 
630     Region->RegionBeg = MemMap.getBase();
631     if (EnableRandomOffset) {
632       Region->RegionBeg +=
633           (getRandomModN(&Region->RandState, 16) + 1) * PageSize;
634     }
635 
636     const uptr BlockSize = getSizeByClassId(ClassId);
637     // Releasing small blocks is expensive, set a higher threshold to avoid
638     // frequent page releases.
639     if (isSmallBlock(BlockSize)) {
640       Region->ReleaseInfo.TryReleaseThreshold =
641           PageSize * SmallerBlockReleasePageDelta;
642     } else {
643       Region->ReleaseInfo.TryReleaseThreshold =
644           getMinReleaseAttemptSize(BlockSize);
645     }
646   }
647 
pushBatchClassBlocks(RegionInfo * Region,CompactPtrT * Array,u32 Size)648   void pushBatchClassBlocks(RegionInfo *Region, CompactPtrT *Array, u32 Size)
649       REQUIRES(Region->FLLock) {
650     DCHECK_EQ(Region, getRegionInfo(SizeClassMap::BatchClassId));
651 
652     // Free blocks are recorded by TransferBatch in freelist for all
653     // size-classes. In addition, TransferBatch is allocated from BatchClassId.
654     // In order not to use additional block to record the free blocks in
655     // BatchClassId, they are self-contained. I.e., A TransferBatch records the
656     // block address of itself. See the figure below:
657     //
658     // TransferBatch at 0xABCD
659     // +----------------------------+
660     // | Free blocks' addr          |
661     // | +------+------+------+     |
662     // | |0xABCD|...   |...   |     |
663     // | +------+------+------+     |
664     // +----------------------------+
665     //
666     // When we allocate all the free blocks in the TransferBatch, the block used
667     // by TransferBatch is also free for use. We don't need to recycle the
668     // TransferBatch. Note that the correctness is maintained by the invariant,
669     //
670     //   Each popBlocks() request returns the entire TransferBatch. Returning
671     //   part of the blocks in a TransferBatch is invalid.
672     //
673     // This ensures that TransferBatch won't leak the address itself while it's
674     // still holding other valid data.
675     //
676     // Besides, BatchGroup is also allocated from BatchClassId and has its
677     // address recorded in the TransferBatch too. To maintain the correctness,
678     //
679     //   The address of BatchGroup is always recorded in the last TransferBatch
680     //   in the freelist (also imply that the freelist should only be
681     //   updated with push_front). Once the last TransferBatch is popped,
682     //   the block used by BatchGroup is also free for use.
683     //
684     // With this approach, the blocks used by BatchGroup and TransferBatch are
685     // reusable and don't need additional space for them.
686 
687     Region->FreeListInfo.PushedBlocks += Size;
688     BatchGroupT *BG = Region->FreeListInfo.BlockList.front();
689 
690     if (BG == nullptr) {
691       // Construct `BatchGroup` on the last element.
692       BG = reinterpret_cast<BatchGroupT *>(
693           decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1]));
694       --Size;
695       BG->Batches.clear();
696       // BatchClass hasn't enabled memory group. Use `0` to indicate there's no
697       // memory group here.
698       BG->CompactPtrGroupBase = 0;
699       BG->BytesInBGAtLastCheckpoint = 0;
700       BG->MaxCachedPerBatch =
701           CacheT::getMaxCached(getSizeByClassId(SizeClassMap::BatchClassId));
702 
703       Region->FreeListInfo.BlockList.push_front(BG);
704     }
705 
706     if (UNLIKELY(Size == 0))
707       return;
708 
709     // This happens under 2 cases.
710     //   1. just allocated a new `BatchGroup`.
711     //   2. Only 1 block is pushed when the freelist is empty.
712     if (BG->Batches.empty()) {
713       // Construct the `TransferBatch` on the last element.
714       TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(
715           decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1]));
716       TB->clear();
717       // As mentioned above, addresses of `TransferBatch` and `BatchGroup` are
718       // recorded in the TransferBatch.
719       TB->add(Array[Size - 1]);
720       TB->add(
721           compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(BG)));
722       --Size;
723       BG->Batches.push_front(TB);
724     }
725 
726     TransferBatchT *CurBatch = BG->Batches.front();
727     DCHECK_NE(CurBatch, nullptr);
728 
729     for (u32 I = 0; I < Size;) {
730       u16 UnusedSlots =
731           static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
732       if (UnusedSlots == 0) {
733         CurBatch = reinterpret_cast<TransferBatchT *>(
734             decompactPtr(SizeClassMap::BatchClassId, Array[I]));
735         CurBatch->clear();
736         // Self-contained
737         CurBatch->add(Array[I]);
738         ++I;
739         // TODO(chiahungduan): Avoid the use of push_back() in `Batches` of
740         // BatchClassId.
741         BG->Batches.push_front(CurBatch);
742         UnusedSlots = static_cast<u16>(BG->MaxCachedPerBatch - 1);
743       }
744       // `UnusedSlots` is u16 so the result will be also fit in u16.
745       const u16 AppendSize = static_cast<u16>(Min<u32>(UnusedSlots, Size - I));
746       CurBatch->appendFromArray(&Array[I], AppendSize);
747       I += AppendSize;
748     }
749   }
750 
751   // Push the blocks to their batch group. The layout will be like,
752   //
753   // FreeListInfo.BlockList - > BG -> BG -> BG
754   //                            |     |     |
755   //                            v     v     v
756   //                            TB    TB    TB
757   //                            |
758   //                            v
759   //                            TB
760   //
761   // Each BlockGroup(BG) will associate with unique group id and the free blocks
762   // are managed by a list of TransferBatch(TB). To reduce the time of inserting
763   // blocks, BGs are sorted and the input `Array` are supposed to be sorted so
764   // that we can get better performance of maintaining sorted property.
765   // Use `SameGroup=true` to indicate that all blocks in the array are from the
766   // same group then we will skip checking the group id of each block.
767   void pushBlocksImpl(CacheT *C, uptr ClassId, RegionInfo *Region,
768                       CompactPtrT *Array, u32 Size, bool SameGroup = false)
769       REQUIRES(Region->FLLock) {
770     DCHECK_NE(ClassId, SizeClassMap::BatchClassId);
771     DCHECK_GT(Size, 0U);
772 
773     auto CreateGroup = [&](uptr CompactPtrGroupBase) {
774       BatchGroupT *BG =
775           reinterpret_cast<BatchGroupT *>(C->getBatchClassBlock());
776       BG->Batches.clear();
777       TransferBatchT *TB =
778           reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock());
779       TB->clear();
780 
781       BG->CompactPtrGroupBase = CompactPtrGroupBase;
782       BG->Batches.push_front(TB);
783       BG->BytesInBGAtLastCheckpoint = 0;
784       BG->MaxCachedPerBatch = TransferBatchT::MaxNumCached;
785 
786       return BG;
787     };
788 
789     auto InsertBlocks = [&](BatchGroupT *BG, CompactPtrT *Array, u32 Size) {
790       SinglyLinkedList<TransferBatchT> &Batches = BG->Batches;
791       TransferBatchT *CurBatch = Batches.front();
792       DCHECK_NE(CurBatch, nullptr);
793 
794       for (u32 I = 0; I < Size;) {
795         DCHECK_GE(BG->MaxCachedPerBatch, CurBatch->getCount());
796         u16 UnusedSlots =
797             static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
798         if (UnusedSlots == 0) {
799           CurBatch =
800               reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock());
801           CurBatch->clear();
802           Batches.push_front(CurBatch);
803           UnusedSlots = BG->MaxCachedPerBatch;
804         }
805         // `UnusedSlots` is u16 so the result will be also fit in u16.
806         u16 AppendSize = static_cast<u16>(Min<u32>(UnusedSlots, Size - I));
807         CurBatch->appendFromArray(&Array[I], AppendSize);
808         I += AppendSize;
809       }
810     };
811 
812     Region->FreeListInfo.PushedBlocks += Size;
813     BatchGroupT *Cur = Region->FreeListInfo.BlockList.front();
814 
815     // In the following, `Cur` always points to the BatchGroup for blocks that
816     // will be pushed next. `Prev` is the element right before `Cur`.
817     BatchGroupT *Prev = nullptr;
818 
819     while (Cur != nullptr &&
820            compactPtrGroup(Array[0]) > Cur->CompactPtrGroupBase) {
821       Prev = Cur;
822       Cur = Cur->Next;
823     }
824 
825     if (Cur == nullptr ||
826         compactPtrGroup(Array[0]) != Cur->CompactPtrGroupBase) {
827       Cur = CreateGroup(compactPtrGroup(Array[0]));
828       if (Prev == nullptr)
829         Region->FreeListInfo.BlockList.push_front(Cur);
830       else
831         Region->FreeListInfo.BlockList.insert(Prev, Cur);
832     }
833 
834     // All the blocks are from the same group, just push without checking group
835     // id.
836     if (SameGroup) {
837       for (u32 I = 0; I < Size; ++I)
838         DCHECK_EQ(compactPtrGroup(Array[I]), Cur->CompactPtrGroupBase);
839 
840       InsertBlocks(Cur, Array, Size);
841       return;
842     }
843 
844     // The blocks are sorted by group id. Determine the segment of group and
845     // push them to their group together.
846     u32 Count = 1;
847     for (u32 I = 1; I < Size; ++I) {
848       if (compactPtrGroup(Array[I - 1]) != compactPtrGroup(Array[I])) {
849         DCHECK_EQ(compactPtrGroup(Array[I - 1]), Cur->CompactPtrGroupBase);
850         InsertBlocks(Cur, Array + I - Count, Count);
851 
852         while (Cur != nullptr &&
853                compactPtrGroup(Array[I]) > Cur->CompactPtrGroupBase) {
854           Prev = Cur;
855           Cur = Cur->Next;
856         }
857 
858         if (Cur == nullptr ||
859             compactPtrGroup(Array[I]) != Cur->CompactPtrGroupBase) {
860           Cur = CreateGroup(compactPtrGroup(Array[I]));
861           DCHECK_NE(Prev, nullptr);
862           Region->FreeListInfo.BlockList.insert(Prev, Cur);
863         }
864 
865         Count = 1;
866       } else {
867         ++Count;
868       }
869     }
870 
871     InsertBlocks(Cur, Array + Size - Count, Count);
872   }
873 
popBlocksWithCV(CacheT * C,uptr ClassId,RegionInfo * Region,CompactPtrT * ToArray,const u16 MaxBlockCount,bool & ReportRegionExhausted)874   u16 popBlocksWithCV(CacheT *C, uptr ClassId, RegionInfo *Region,
875                       CompactPtrT *ToArray, const u16 MaxBlockCount,
876                       bool &ReportRegionExhausted) {
877     u16 PopCount = 0;
878 
879     while (true) {
880       // We only expect one thread doing the freelist refillment and other
881       // threads will be waiting for either the completion of the
882       // `populateFreeListAndPopBlocks()` or `pushBlocks()` called by other
883       // threads.
884       bool PopulateFreeList = false;
885       {
886         ScopedLock FL(Region->FLLock);
887         if (!Region->isPopulatingFreeList) {
888           Region->isPopulatingFreeList = true;
889           PopulateFreeList = true;
890         }
891       }
892 
893       if (PopulateFreeList) {
894         ScopedLock ML(Region->MMLock);
895 
896         const bool RegionIsExhausted = Region->Exhausted;
897         if (!RegionIsExhausted) {
898           PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray,
899                                                   MaxBlockCount);
900         }
901         ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted;
902 
903         {
904           // Before reacquiring the `FLLock`, the freelist may be used up again
905           // and some threads are waiting for the freelist refillment by the
906           // current thread. It's important to set
907           // `Region->isPopulatingFreeList` to false so the threads about to
908           // sleep will notice the status change.
909           ScopedLock FL(Region->FLLock);
910           Region->isPopulatingFreeList = false;
911           Region->FLLockCV.notifyAll(Region->FLLock);
912         }
913 
914         break;
915       }
916 
917       // At here, there are two preconditions to be met before waiting,
918       //   1. The freelist is empty.
919       //   2. Region->isPopulatingFreeList == true, i.e, someone is still doing
920       //   `populateFreeListAndPopBlocks()`.
921       //
922       // Note that it has the chance that freelist is empty but
923       // Region->isPopulatingFreeList == false because all the new populated
924       // blocks were used up right after the refillment. Therefore, we have to
925       // check if someone is still populating the freelist.
926       ScopedLock FL(Region->FLLock);
927       PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
928       if (PopCount != 0U)
929         break;
930 
931       if (!Region->isPopulatingFreeList)
932         continue;
933 
934       // Now the freelist is empty and someone's doing the refillment. We will
935       // wait until anyone refills the freelist or someone finishes doing
936       // `populateFreeListAndPopBlocks()`. The refillment can be done by
937       // `populateFreeListAndPopBlocks()`, `pushBlocks()`,
938       // `pushBatchClassBlocks()` and `mergeGroupsToReleaseBack()`.
939       Region->FLLockCV.wait(Region->FLLock);
940 
941       PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
942       if (PopCount != 0U)
943         break;
944     }
945 
946     return PopCount;
947   }
948 
popBlocksImpl(CacheT * C,uptr ClassId,RegionInfo * Region,CompactPtrT * ToArray,const u16 MaxBlockCount)949   u16 popBlocksImpl(CacheT *C, uptr ClassId, RegionInfo *Region,
950                     CompactPtrT *ToArray, const u16 MaxBlockCount)
951       REQUIRES(Region->FLLock) {
952     if (Region->FreeListInfo.BlockList.empty())
953       return 0U;
954 
955     SinglyLinkedList<TransferBatchT> &Batches =
956         Region->FreeListInfo.BlockList.front()->Batches;
957 
958     if (Batches.empty()) {
959       DCHECK_EQ(ClassId, SizeClassMap::BatchClassId);
960       BatchGroupT *BG = Region->FreeListInfo.BlockList.front();
961       Region->FreeListInfo.BlockList.pop_front();
962 
963       // Block used by `BatchGroup` is from BatchClassId. Turn the block into
964       // `TransferBatch` with single block.
965       TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(BG);
966       ToArray[0] =
967           compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB));
968       Region->FreeListInfo.PoppedBlocks += 1;
969       return 1U;
970     }
971 
972     // So far, instead of always filling blocks to `MaxBlockCount`, we only
973     // examine single `TransferBatch` to minimize the time spent in the primary
974     // allocator. Besides, the sizes of `TransferBatch` and
975     // `CacheT::getMaxCached()` may also impact the time spent on accessing the
976     // primary allocator.
977     // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount`
978     // blocks and/or adjust the size of `TransferBatch` according to
979     // `CacheT::getMaxCached()`.
980     TransferBatchT *B = Batches.front();
981     DCHECK_NE(B, nullptr);
982     DCHECK_GT(B->getCount(), 0U);
983 
984     // BachClassId should always take all blocks in the TransferBatch. Read the
985     // comment in `pushBatchClassBlocks()` for more details.
986     const u16 PopCount = ClassId == SizeClassMap::BatchClassId
987                              ? B->getCount()
988                              : Min(MaxBlockCount, B->getCount());
989     B->moveNToArray(ToArray, PopCount);
990 
991     // TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be
992     // done without holding `FLLock`.
993     if (B->empty()) {
994       Batches.pop_front();
995       // `TransferBatch` of BatchClassId is self-contained, no need to
996       // deallocate. Read the comment in `pushBatchClassBlocks()` for more
997       // details.
998       if (ClassId != SizeClassMap::BatchClassId)
999         C->deallocate(SizeClassMap::BatchClassId, B);
1000 
1001       if (Batches.empty()) {
1002         BatchGroupT *BG = Region->FreeListInfo.BlockList.front();
1003         Region->FreeListInfo.BlockList.pop_front();
1004 
1005         // We don't keep BatchGroup with zero blocks to avoid empty-checking
1006         // while allocating. Note that block used for constructing BatchGroup is
1007         // recorded as free blocks in the last element of BatchGroup::Batches.
1008         // Which means, once we pop the last TransferBatch, the block is
1009         // implicitly deallocated.
1010         if (ClassId != SizeClassMap::BatchClassId)
1011           C->deallocate(SizeClassMap::BatchClassId, BG);
1012       }
1013     }
1014 
1015     Region->FreeListInfo.PoppedBlocks += PopCount;
1016 
1017     return PopCount;
1018   }
1019 
populateFreeListAndPopBlocks(CacheT * C,uptr ClassId,RegionInfo * Region,CompactPtrT * ToArray,const u16 MaxBlockCount)1020   NOINLINE u16 populateFreeListAndPopBlocks(CacheT *C, uptr ClassId,
1021                                             RegionInfo *Region,
1022                                             CompactPtrT *ToArray,
1023                                             const u16 MaxBlockCount)
1024       REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
1025     if (!Config::getEnableContiguousRegions() &&
1026         !Region->MemMapInfo.MemMap.isAllocated()) {
1027       ReservedMemoryT ReservedMemory;
1028       if (UNLIKELY(!ReservedMemory.create(/*Addr=*/0U, RegionSize,
1029                                           "scudo:primary_reserve",
1030                                           MAP_ALLOWNOMEM))) {
1031         Printf("Can't reserve pages for size class %zu.\n",
1032                getSizeByClassId(ClassId));
1033         return 0U;
1034       }
1035       initRegion(Region, ClassId,
1036                  ReservedMemory.dispatch(ReservedMemory.getBase(),
1037                                          ReservedMemory.getCapacity()),
1038                  /*EnableRandomOffset=*/false);
1039     }
1040 
1041     DCHECK(Region->MemMapInfo.MemMap.isAllocated());
1042     const uptr Size = getSizeByClassId(ClassId);
1043     const u16 MaxCount = CacheT::getMaxCached(Size);
1044     const uptr RegionBeg = Region->RegionBeg;
1045     const uptr MappedUser = Region->MemMapInfo.MappedUser;
1046     const uptr TotalUserBytes =
1047         Region->MemMapInfo.AllocatedUser + MaxCount * Size;
1048     // Map more space for blocks, if necessary.
1049     if (TotalUserBytes > MappedUser) {
1050       // Do the mmap for the user memory.
1051       const uptr MapSize =
1052           roundUp(TotalUserBytes - MappedUser, MapSizeIncrement);
1053       const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId);
1054       if (UNLIKELY(RegionBase + MappedUser + MapSize > RegionSize)) {
1055         Region->Exhausted = true;
1056         return 0U;
1057       }
1058 
1059       if (UNLIKELY(!Region->MemMapInfo.MemMap.remap(
1060               RegionBeg + MappedUser, MapSize, "scudo:primary",
1061               MAP_ALLOWNOMEM | MAP_RESIZABLE |
1062                   (useMemoryTagging<Config>(Options.load()) ? MAP_MEMTAG
1063                                                             : 0)))) {
1064         return 0U;
1065       }
1066       Region->MemMapInfo.MappedUser += MapSize;
1067       C->getStats().add(StatMapped, MapSize);
1068     }
1069 
1070     const u32 NumberOfBlocks =
1071         Min(MaxNumBatches * MaxCount,
1072             static_cast<u32>((Region->MemMapInfo.MappedUser -
1073                               Region->MemMapInfo.AllocatedUser) /
1074                              Size));
1075     DCHECK_GT(NumberOfBlocks, 0);
1076 
1077     constexpr u32 ShuffleArraySize =
1078         MaxNumBatches * TransferBatchT::MaxNumCached;
1079     CompactPtrT ShuffleArray[ShuffleArraySize];
1080     DCHECK_LE(NumberOfBlocks, ShuffleArraySize);
1081 
1082     const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId);
1083     uptr P = RegionBeg + Region->MemMapInfo.AllocatedUser;
1084     for (u32 I = 0; I < NumberOfBlocks; I++, P += Size)
1085       ShuffleArray[I] = compactPtrInternal(CompactPtrBase, P);
1086 
1087     ScopedLock L(Region->FLLock);
1088 
1089     if (ClassId != SizeClassMap::BatchClassId) {
1090       u32 N = 1;
1091       uptr CurGroup = compactPtrGroup(ShuffleArray[0]);
1092       for (u32 I = 1; I < NumberOfBlocks; I++) {
1093         if (UNLIKELY(compactPtrGroup(ShuffleArray[I]) != CurGroup)) {
1094           shuffle(ShuffleArray + I - N, N, &Region->RandState);
1095           pushBlocksImpl(C, ClassId, Region, ShuffleArray + I - N, N,
1096                          /*SameGroup=*/true);
1097           N = 1;
1098           CurGroup = compactPtrGroup(ShuffleArray[I]);
1099         } else {
1100           ++N;
1101         }
1102       }
1103 
1104       shuffle(ShuffleArray + NumberOfBlocks - N, N, &Region->RandState);
1105       pushBlocksImpl(C, ClassId, Region, &ShuffleArray[NumberOfBlocks - N], N,
1106                      /*SameGroup=*/true);
1107     } else {
1108       pushBatchClassBlocks(Region, ShuffleArray, NumberOfBlocks);
1109     }
1110 
1111     const u16 PopCount =
1112         popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
1113     DCHECK_NE(PopCount, 0U);
1114 
1115     // Note that `PushedBlocks` and `PoppedBlocks` are supposed to only record
1116     // the requests from `PushBlocks` and `PopBatch` which are external
1117     // interfaces. `populateFreeListAndPopBlocks` is the internal interface so
1118     // we should set the values back to avoid incorrectly setting the stats.
1119     Region->FreeListInfo.PushedBlocks -= NumberOfBlocks;
1120 
1121     const uptr AllocatedUser = Size * NumberOfBlocks;
1122     C->getStats().add(StatFree, AllocatedUser);
1123     Region->MemMapInfo.AllocatedUser += AllocatedUser;
1124 
1125     return PopCount;
1126   }
1127 
getStats(ScopedString * Str,uptr ClassId,RegionInfo * Region)1128   void getStats(ScopedString *Str, uptr ClassId, RegionInfo *Region)
1129       REQUIRES(Region->MMLock, Region->FLLock) {
1130     if (Region->MemMapInfo.MappedUser == 0)
1131       return;
1132     const uptr BlockSize = getSizeByClassId(ClassId);
1133     const uptr InUseBlocks =
1134         Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks;
1135     const uptr BytesInFreeList =
1136         Region->MemMapInfo.AllocatedUser - InUseBlocks * BlockSize;
1137     uptr RegionPushedBytesDelta = 0;
1138     if (BytesInFreeList >=
1139         Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) {
1140       RegionPushedBytesDelta =
1141           BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint;
1142     }
1143     const uptr TotalChunks = Region->MemMapInfo.AllocatedUser / BlockSize;
1144     Str->append("%s %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu "
1145                 "inuse: %6zu total: %6zu releases attempted: %6zu last "
1146                 "released: %6zuK latest pushed bytes: %6zuK region: 0x%zx "
1147                 "(0x%zx)\n",
1148                 Region->Exhausted ? "E" : " ", ClassId,
1149                 getSizeByClassId(ClassId), Region->MemMapInfo.MappedUser >> 10,
1150                 Region->FreeListInfo.PoppedBlocks,
1151                 Region->FreeListInfo.PushedBlocks, InUseBlocks, TotalChunks,
1152                 Region->ReleaseInfo.NumReleasesAttempted,
1153                 Region->ReleaseInfo.LastReleasedBytes >> 10,
1154                 RegionPushedBytesDelta >> 10, Region->RegionBeg,
1155                 getRegionBaseByClassId(ClassId));
1156   }
1157 
getRegionFragmentationInfo(RegionInfo * Region,uptr ClassId,ScopedString * Str)1158   void getRegionFragmentationInfo(RegionInfo *Region, uptr ClassId,
1159                                   ScopedString *Str) REQUIRES(Region->MMLock) {
1160     const uptr BlockSize = getSizeByClassId(ClassId);
1161     const uptr AllocatedUserEnd =
1162         Region->MemMapInfo.AllocatedUser + Region->RegionBeg;
1163 
1164     SinglyLinkedList<BatchGroupT> GroupsToRelease;
1165     {
1166       ScopedLock L(Region->FLLock);
1167       GroupsToRelease = Region->FreeListInfo.BlockList;
1168       Region->FreeListInfo.BlockList.clear();
1169     }
1170 
1171     FragmentationRecorder Recorder;
1172     if (!GroupsToRelease.empty()) {
1173       PageReleaseContext Context =
1174           markFreeBlocks(Region, BlockSize, AllocatedUserEnd,
1175                          getCompactPtrBaseByClassId(ClassId), GroupsToRelease);
1176       auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
1177       releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
1178 
1179       mergeGroupsToReleaseBack(Region, GroupsToRelease);
1180     }
1181 
1182     ScopedLock L(Region->FLLock);
1183     const uptr PageSize = getPageSizeCached();
1184     const uptr TotalBlocks = Region->MemMapInfo.AllocatedUser / BlockSize;
1185     const uptr InUseBlocks =
1186         Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks;
1187     const uptr AllocatedPagesCount =
1188         roundUp(Region->MemMapInfo.AllocatedUser, PageSize) / PageSize;
1189     DCHECK_GE(AllocatedPagesCount, Recorder.getReleasedPagesCount());
1190     const uptr InUsePages =
1191         AllocatedPagesCount - Recorder.getReleasedPagesCount();
1192     const uptr InUseBytes = InUsePages * PageSize;
1193 
1194     uptr Integral;
1195     uptr Fractional;
1196     computePercentage(BlockSize * InUseBlocks, InUseBytes, &Integral,
1197                       &Fractional);
1198     Str->append("  %02zu (%6zu): inuse/total blocks: %6zu/%6zu inuse/total "
1199                 "pages: %6zu/%6zu inuse bytes: %6zuK util: %3zu.%02zu%%\n",
1200                 ClassId, BlockSize, InUseBlocks, TotalBlocks, InUsePages,
1201                 AllocatedPagesCount, InUseBytes >> 10, Integral, Fractional);
1202   }
1203 
getMemoryGroupFragmentationInfoInRegion(RegionInfo * Region,uptr ClassId,ScopedString * Str)1204   void getMemoryGroupFragmentationInfoInRegion(RegionInfo *Region, uptr ClassId,
1205                                                ScopedString *Str)
1206       REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
1207     const uptr BlockSize = getSizeByClassId(ClassId);
1208     const uptr AllocatedUserEnd =
1209         Region->MemMapInfo.AllocatedUser + Region->RegionBeg;
1210 
1211     SinglyLinkedList<BatchGroupT> GroupsToRelease;
1212     {
1213       ScopedLock L(Region->FLLock);
1214       GroupsToRelease = Region->FreeListInfo.BlockList;
1215       Region->FreeListInfo.BlockList.clear();
1216     }
1217 
1218     constexpr uptr GroupSize = (1UL << GroupSizeLog);
1219     constexpr uptr MaxNumGroups = RegionSize / GroupSize;
1220 
1221     MemoryGroupFragmentationRecorder<GroupSize, MaxNumGroups> Recorder;
1222     if (!GroupsToRelease.empty()) {
1223       PageReleaseContext Context =
1224           markFreeBlocks(Region, BlockSize, AllocatedUserEnd,
1225                          getCompactPtrBaseByClassId(ClassId), GroupsToRelease);
1226       auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
1227       releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
1228 
1229       mergeGroupsToReleaseBack(Region, GroupsToRelease);
1230     }
1231 
1232     Str->append("MemoryGroupFragmentationInfo in Region %zu (%zu)\n", ClassId,
1233                 BlockSize);
1234 
1235     const uptr MaxNumGroupsInUse =
1236         roundUp(Region->MemMapInfo.AllocatedUser, GroupSize) / GroupSize;
1237     for (uptr I = 0; I < MaxNumGroupsInUse; ++I) {
1238       uptr Integral;
1239       uptr Fractional;
1240       computePercentage(Recorder.NumPagesInOneGroup -
1241                             Recorder.getNumFreePages(I),
1242                         Recorder.NumPagesInOneGroup, &Integral, &Fractional);
1243       Str->append("MemoryGroup #%zu (0x%zx): util: %3zu.%02zu%%\n", I,
1244                   Region->RegionBeg + I * GroupSize, Integral, Fractional);
1245     }
1246   }
1247 
1248   NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId,
1249                                  ReleaseToOS ReleaseType = ReleaseToOS::Normal)
1250       REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
1251     const uptr BlockSize = getSizeByClassId(ClassId);
1252     uptr BytesInFreeList;
1253     const uptr AllocatedUserEnd =
1254         Region->MemMapInfo.AllocatedUser + Region->RegionBeg;
1255     uptr RegionPushedBytesDelta = 0;
1256     SinglyLinkedList<BatchGroupT> GroupsToRelease;
1257 
1258     {
1259       ScopedLock L(Region->FLLock);
1260 
1261       BytesInFreeList = Region->MemMapInfo.AllocatedUser -
1262                         (Region->FreeListInfo.PoppedBlocks -
1263                          Region->FreeListInfo.PushedBlocks) *
1264                             BlockSize;
1265       if (UNLIKELY(BytesInFreeList == 0))
1266         return false;
1267 
1268       // ==================================================================== //
1269       // 1. Check if we have enough free blocks and if it's worth doing a page
1270       //    release.
1271       // ==================================================================== //
1272       if (ReleaseType != ReleaseToOS::ForceAll &&
1273           !hasChanceToReleasePages(Region, BlockSize, BytesInFreeList,
1274                                    ReleaseType)) {
1275         return 0;
1276       }
1277 
1278       // Given that we will unlock the freelist for block operations, cache the
1279       // value here so that when we are adapting the `TryReleaseThreshold`
1280       // later, we are using the right metric.
1281       RegionPushedBytesDelta =
1282           BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint;
1283 
1284       // ==================================================================== //
1285       // 2. Determine which groups can release the pages. Use a heuristic to
1286       //    gather groups that are candidates for doing a release.
1287       // ==================================================================== //
1288       if (ReleaseType == ReleaseToOS::ForceAll) {
1289         GroupsToRelease = Region->FreeListInfo.BlockList;
1290         Region->FreeListInfo.BlockList.clear();
1291       } else {
1292         GroupsToRelease =
1293             collectGroupsToRelease(Region, BlockSize, AllocatedUserEnd,
1294                                    getCompactPtrBaseByClassId(ClassId));
1295       }
1296       if (GroupsToRelease.empty())
1297         return 0;
1298     }
1299 
1300     // The following steps contribute to the majority time spent in page
1301     // releasing thus we increment the counter here.
1302     ++Region->ReleaseInfo.NumReleasesAttempted;
1303 
1304     // Note that we have extracted the `GroupsToRelease` from region freelist.
1305     // It's safe to let pushBlocks()/popBlocks() access the remaining region
1306     // freelist. In the steps 3 and 4, we will temporarily release the FLLock
1307     // and lock it again before step 5.
1308 
1309     // ==================================================================== //
1310     // 3. Mark the free blocks in `GroupsToRelease` in the `PageReleaseContext`.
1311     //    Then we can tell which pages are in-use by querying
1312     //    `PageReleaseContext`.
1313     // ==================================================================== //
1314     PageReleaseContext Context =
1315         markFreeBlocks(Region, BlockSize, AllocatedUserEnd,
1316                        getCompactPtrBaseByClassId(ClassId), GroupsToRelease);
1317     if (UNLIKELY(!Context.hasBlockMarked())) {
1318       mergeGroupsToReleaseBack(Region, GroupsToRelease);
1319       return 0;
1320     }
1321 
1322     // ==================================================================== //
1323     // 4. Release the unused physical pages back to the OS.
1324     // ==================================================================== //
1325     RegionReleaseRecorder<MemMapT> Recorder(&Region->MemMapInfo.MemMap,
1326                                             Region->RegionBeg,
1327                                             Context.getReleaseOffset());
1328     auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
1329     releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
1330     if (Recorder.getReleasedBytes() > 0) {
1331       // This is the case that we didn't hit the release threshold but it has
1332       // been past a certain period of time. Thus we try to release some pages
1333       // and if it does release some additional pages, it's hint that we are
1334       // able to lower the threshold. Currently, this case happens when the
1335       // `RegionPushedBytesDelta` is over half of the `TryReleaseThreshold`. As
1336       // a result, we shrink the threshold to half accordingly.
1337       // TODO(chiahungduan): Apply the same adjustment strategy to small blocks.
1338       if (!isSmallBlock(BlockSize)) {
1339         if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold &&
1340             Recorder.getReleasedBytes() >
1341                 Region->ReleaseInfo.LastReleasedBytes +
1342                     getMinReleaseAttemptSize(BlockSize)) {
1343           Region->ReleaseInfo.TryReleaseThreshold =
1344               Max(Region->ReleaseInfo.TryReleaseThreshold / 2,
1345                   getMinReleaseAttemptSize(BlockSize));
1346         }
1347       }
1348 
1349       Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList;
1350       Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes();
1351     }
1352     Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast();
1353 
1354     if (Region->ReleaseInfo.PendingPushedBytesDelta > 0) {
1355       // Instead of increasing the threshold by the amount of
1356       // `PendingPushedBytesDelta`, we only increase half of the amount so that
1357       // it won't be a leap (which may lead to higher memory pressure) because
1358       // of certain memory usage bursts which don't happen frequently.
1359       Region->ReleaseInfo.TryReleaseThreshold +=
1360           Region->ReleaseInfo.PendingPushedBytesDelta / 2;
1361       // This is another guard of avoiding the growth of threshold indefinitely.
1362       // Note that we may consider to make this configurable if we have a better
1363       // way to model this.
1364       Region->ReleaseInfo.TryReleaseThreshold = Min<uptr>(
1365           Region->ReleaseInfo.TryReleaseThreshold, (1UL << GroupSizeLog) / 2);
1366       Region->ReleaseInfo.PendingPushedBytesDelta = 0;
1367     }
1368 
1369     // ====================================================================== //
1370     // 5. Merge the `GroupsToRelease` back to the freelist.
1371     // ====================================================================== //
1372     mergeGroupsToReleaseBack(Region, GroupsToRelease);
1373 
1374     return Recorder.getReleasedBytes();
1375   }
1376 
hasChanceToReleasePages(RegionInfo * Region,uptr BlockSize,uptr BytesInFreeList,ReleaseToOS ReleaseType)1377   bool hasChanceToReleasePages(RegionInfo *Region, uptr BlockSize,
1378                                uptr BytesInFreeList, ReleaseToOS ReleaseType)
1379       REQUIRES(Region->MMLock, Region->FLLock) {
1380     DCHECK_GE(Region->FreeListInfo.PoppedBlocks,
1381               Region->FreeListInfo.PushedBlocks);
1382     // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value
1383     // so that we won't underestimate the releasable pages. For example, the
1384     // following is the region usage,
1385     //
1386     //  BytesInFreeListAtLastCheckpoint   AllocatedUser
1387     //                v                         v
1388     //  |--------------------------------------->
1389     //         ^                   ^
1390     //  BytesInFreeList     ReleaseThreshold
1391     //
1392     // In general, if we have collected enough bytes and the amount of free
1393     // bytes meets the ReleaseThreshold, we will try to do page release. If we
1394     // don't update `BytesInFreeListAtLastCheckpoint` when the current
1395     // `BytesInFreeList` is smaller, we may take longer time to wait for enough
1396     // freed blocks because we miss the bytes between
1397     // (BytesInFreeListAtLastCheckpoint - BytesInFreeList).
1398     if (BytesInFreeList <=
1399         Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) {
1400       Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList;
1401     }
1402 
1403     const uptr RegionPushedBytesDelta =
1404         BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint;
1405 
1406     if (ReleaseType == ReleaseToOS::Normal) {
1407       if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold / 2)
1408         return false;
1409 
1410       const s64 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs);
1411       if (IntervalMs < 0)
1412         return false;
1413 
1414       const u64 IntervalNs = static_cast<u64>(IntervalMs) * 1000000;
1415       const u64 CurTimeNs = getMonotonicTimeFast();
1416       const u64 DiffSinceLastReleaseNs =
1417           CurTimeNs - Region->ReleaseInfo.LastReleaseAtNs;
1418 
1419       // At here, `RegionPushedBytesDelta` is more than half of
1420       // `TryReleaseThreshold`. If the last release happened 2 release interval
1421       // before, we will still try to see if there's any chance to release some
1422       // memory even it doesn't exceed the threshold.
1423       if (RegionPushedBytesDelta < Region->ReleaseInfo.TryReleaseThreshold) {
1424         // We want the threshold to have a shorter response time to the variant
1425         // memory usage patterns. According to data collected during experiments
1426         // (which were done with 1, 2, 4, 8 intervals), `2` strikes the better
1427         // balance between the memory usage and number of page release attempts.
1428         if (DiffSinceLastReleaseNs < 2 * IntervalNs)
1429           return false;
1430       } else if (DiffSinceLastReleaseNs < IntervalNs) {
1431         // In this case, we are over the threshold but we just did some page
1432         // release in the same release interval. This is a hint that we may want
1433         // a higher threshold so that we can release more memory at once.
1434         // `TryReleaseThreshold` will be adjusted according to how many bytes
1435         // are not released, i.e., the `PendingPushedBytesdelta` here.
1436         // TODO(chiahungduan): Apply the same adjustment strategy to small
1437         // blocks.
1438         if (!isSmallBlock(BlockSize))
1439           Region->ReleaseInfo.PendingPushedBytesDelta = RegionPushedBytesDelta;
1440 
1441         // Memory was returned recently.
1442         return false;
1443       }
1444     } // if (ReleaseType == ReleaseToOS::Normal)
1445 
1446     return true;
1447   }
1448 
1449   SinglyLinkedList<BatchGroupT>
collectGroupsToRelease(RegionInfo * Region,const uptr BlockSize,const uptr AllocatedUserEnd,const uptr CompactPtrBase)1450   collectGroupsToRelease(RegionInfo *Region, const uptr BlockSize,
1451                          const uptr AllocatedUserEnd, const uptr CompactPtrBase)
1452       REQUIRES(Region->MMLock, Region->FLLock) {
1453     const uptr GroupSize = (1UL << GroupSizeLog);
1454     const uptr PageSize = getPageSizeCached();
1455     SinglyLinkedList<BatchGroupT> GroupsToRelease;
1456 
1457     // We are examining each group and will take the minimum distance to the
1458     // release threshold as the next `TryReleaseThreshold`. Note that if the
1459     // size of free blocks has reached the release threshold, the distance to
1460     // the next release will be PageSize * SmallerBlockReleasePageDelta. See the
1461     // comment on `SmallerBlockReleasePageDelta` for more details.
1462     uptr MinDistToThreshold = GroupSize;
1463 
1464     for (BatchGroupT *BG = Region->FreeListInfo.BlockList.front(),
1465                      *Prev = nullptr;
1466          BG != nullptr;) {
1467       // Group boundary is always GroupSize-aligned from CompactPtr base. The
1468       // layout of memory groups is like,
1469       //
1470       //     (CompactPtrBase)
1471       // #1 CompactPtrGroupBase   #2 CompactPtrGroupBase            ...
1472       //           |                       |                       |
1473       //           v                       v                       v
1474       //           +-----------------------+-----------------------+
1475       //            \                     / \                     /
1476       //             ---   GroupSize   ---   ---   GroupSize   ---
1477       //
1478       // After decompacting the CompactPtrGroupBase, we expect the alignment
1479       // property is held as well.
1480       const uptr BatchGroupBase =
1481           decompactGroupBase(CompactPtrBase, BG->CompactPtrGroupBase);
1482       DCHECK_LE(Region->RegionBeg, BatchGroupBase);
1483       DCHECK_GE(AllocatedUserEnd, BatchGroupBase);
1484       DCHECK_EQ((Region->RegionBeg - BatchGroupBase) % GroupSize, 0U);
1485       // TransferBatches are pushed in front of BG.Batches. The first one may
1486       // not have all caches used.
1487       const uptr NumBlocks = (BG->Batches.size() - 1) * BG->MaxCachedPerBatch +
1488                              BG->Batches.front()->getCount();
1489       const uptr BytesInBG = NumBlocks * BlockSize;
1490 
1491       if (BytesInBG <= BG->BytesInBGAtLastCheckpoint) {
1492         BG->BytesInBGAtLastCheckpoint = BytesInBG;
1493         Prev = BG;
1494         BG = BG->Next;
1495         continue;
1496       }
1497 
1498       const uptr PushedBytesDelta = BytesInBG - BG->BytesInBGAtLastCheckpoint;
1499       if (PushedBytesDelta < getMinReleaseAttemptSize(BlockSize)) {
1500         Prev = BG;
1501         BG = BG->Next;
1502         continue;
1503       }
1504 
1505       // Given the randomness property, we try to release the pages only if the
1506       // bytes used by free blocks exceed certain proportion of group size. Note
1507       // that this heuristic only applies when all the spaces in a BatchGroup
1508       // are allocated.
1509       if (isSmallBlock(BlockSize)) {
1510         const uptr BatchGroupEnd = BatchGroupBase + GroupSize;
1511         const uptr AllocatedGroupSize = AllocatedUserEnd >= BatchGroupEnd
1512                                             ? GroupSize
1513                                             : AllocatedUserEnd - BatchGroupBase;
1514         const uptr ReleaseThreshold =
1515             (AllocatedGroupSize * (100 - 1U - BlockSize / 16U)) / 100U;
1516         const bool HighDensity = BytesInBG >= ReleaseThreshold;
1517         const bool MayHaveReleasedAll = NumBlocks >= (GroupSize / BlockSize);
1518         // If all blocks in the group are released, we will do range marking
1519         // which is fast. Otherwise, we will wait until we have accumulated
1520         // a certain amount of free memory.
1521         const bool ReachReleaseDelta =
1522             MayHaveReleasedAll
1523                 ? true
1524                 : PushedBytesDelta >= PageSize * SmallerBlockReleasePageDelta;
1525 
1526         if (!HighDensity) {
1527           DCHECK_LE(BytesInBG, ReleaseThreshold);
1528           // The following is the usage of a memroy group,
1529           //
1530           //     BytesInBG             ReleaseThreshold
1531           //  /             \                 v
1532           //  +---+---------------------------+-----+
1533           //  |   |         |                 |     |
1534           //  +---+---------------------------+-----+
1535           //       \        /                       ^
1536           //    PushedBytesDelta                 GroupEnd
1537           MinDistToThreshold =
1538               Min(MinDistToThreshold,
1539                   ReleaseThreshold - BytesInBG + PushedBytesDelta);
1540         } else {
1541           // If it reaches high density at this round, the next time we will try
1542           // to release is based on SmallerBlockReleasePageDelta
1543           MinDistToThreshold =
1544               Min(MinDistToThreshold, PageSize * SmallerBlockReleasePageDelta);
1545         }
1546 
1547         if (!HighDensity || !ReachReleaseDelta) {
1548           Prev = BG;
1549           BG = BG->Next;
1550           continue;
1551         }
1552       }
1553 
1554       // If `BG` is the first BatchGroupT in the list, we only need to advance
1555       // `BG` and call FreeListInfo.BlockList::pop_front(). No update is needed
1556       // for `Prev`.
1557       //
1558       //         (BG)   (BG->Next)
1559       // Prev     Cur      BG
1560       //   |       |       |
1561       //   v       v       v
1562       //  nil     +--+    +--+
1563       //          |X | -> |  | -> ...
1564       //          +--+    +--+
1565       //
1566       // Otherwise, `Prev` will be used to extract the `Cur` from the
1567       // `FreeListInfo.BlockList`.
1568       //
1569       //         (BG)   (BG->Next)
1570       // Prev     Cur      BG
1571       //   |       |       |
1572       //   v       v       v
1573       //  +--+    +--+    +--+
1574       //  |  | -> |X | -> |  | -> ...
1575       //  +--+    +--+    +--+
1576       //
1577       // After FreeListInfo.BlockList::extract(),
1578       //
1579       // Prev     Cur       BG
1580       //   |       |        |
1581       //   v       v        v
1582       //  +--+    +--+     +--+
1583       //  |  |-+  |X |  +->|  | -> ...
1584       //  +--+ |  +--+  |  +--+
1585       //       +--------+
1586       //
1587       // Note that we need to advance before pushing this BatchGroup to
1588       // GroupsToRelease because it's a destructive operation.
1589 
1590       BatchGroupT *Cur = BG;
1591       BG = BG->Next;
1592 
1593       // Ideally, we may want to update this only after successful release.
1594       // However, for smaller blocks, each block marking is a costly operation.
1595       // Therefore, we update it earlier.
1596       // TODO: Consider updating this after releasing pages if `ReleaseRecorder`
1597       // can tell the released bytes in each group.
1598       Cur->BytesInBGAtLastCheckpoint = BytesInBG;
1599 
1600       if (Prev != nullptr)
1601         Region->FreeListInfo.BlockList.extract(Prev, Cur);
1602       else
1603         Region->FreeListInfo.BlockList.pop_front();
1604       GroupsToRelease.push_back(Cur);
1605     }
1606 
1607     // Only small blocks have the adaptive `TryReleaseThreshold`.
1608     if (isSmallBlock(BlockSize)) {
1609       // If the MinDistToThreshold is not updated, that means each memory group
1610       // may have only pushed less than a page size. In that case, just set it
1611       // back to normal.
1612       if (MinDistToThreshold == GroupSize)
1613         MinDistToThreshold = PageSize * SmallerBlockReleasePageDelta;
1614       Region->ReleaseInfo.TryReleaseThreshold = MinDistToThreshold;
1615     }
1616 
1617     return GroupsToRelease;
1618   }
1619 
1620   PageReleaseContext
markFreeBlocks(RegionInfo * Region,const uptr BlockSize,const uptr AllocatedUserEnd,const uptr CompactPtrBase,SinglyLinkedList<BatchGroupT> & GroupsToRelease)1621   markFreeBlocks(RegionInfo *Region, const uptr BlockSize,
1622                  const uptr AllocatedUserEnd, const uptr CompactPtrBase,
1623                  SinglyLinkedList<BatchGroupT> &GroupsToRelease)
1624       REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
1625     const uptr GroupSize = (1UL << GroupSizeLog);
1626     auto DecompactPtr = [CompactPtrBase](CompactPtrT CompactPtr) {
1627       return decompactPtrInternal(CompactPtrBase, CompactPtr);
1628     };
1629 
1630     const uptr ReleaseBase = decompactGroupBase(
1631         CompactPtrBase, GroupsToRelease.front()->CompactPtrGroupBase);
1632     const uptr LastGroupEnd =
1633         Min(decompactGroupBase(CompactPtrBase,
1634                                GroupsToRelease.back()->CompactPtrGroupBase) +
1635                 GroupSize,
1636             AllocatedUserEnd);
1637     // The last block may straddle the group boundary. Rounding up to BlockSize
1638     // to get the exact range.
1639     const uptr ReleaseEnd =
1640         roundUpSlow(LastGroupEnd - Region->RegionBeg, BlockSize) +
1641         Region->RegionBeg;
1642     const uptr ReleaseRangeSize = ReleaseEnd - ReleaseBase;
1643     const uptr ReleaseOffset = ReleaseBase - Region->RegionBeg;
1644 
1645     PageReleaseContext Context(BlockSize, /*NumberOfRegions=*/1U,
1646                                ReleaseRangeSize, ReleaseOffset);
1647     // We may not be able to do the page release in a rare case that we may
1648     // fail on PageMap allocation.
1649     if (UNLIKELY(!Context.ensurePageMapAllocated()))
1650       return Context;
1651 
1652     for (BatchGroupT &BG : GroupsToRelease) {
1653       const uptr BatchGroupBase =
1654           decompactGroupBase(CompactPtrBase, BG.CompactPtrGroupBase);
1655       const uptr BatchGroupEnd = BatchGroupBase + GroupSize;
1656       const uptr AllocatedGroupSize = AllocatedUserEnd >= BatchGroupEnd
1657                                           ? GroupSize
1658                                           : AllocatedUserEnd - BatchGroupBase;
1659       const uptr BatchGroupUsedEnd = BatchGroupBase + AllocatedGroupSize;
1660       const bool MayContainLastBlockInRegion =
1661           BatchGroupUsedEnd == AllocatedUserEnd;
1662       const bool BlockAlignedWithUsedEnd =
1663           (BatchGroupUsedEnd - Region->RegionBeg) % BlockSize == 0;
1664 
1665       uptr MaxContainedBlocks = AllocatedGroupSize / BlockSize;
1666       if (!BlockAlignedWithUsedEnd)
1667         ++MaxContainedBlocks;
1668 
1669       const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch +
1670                              BG.Batches.front()->getCount();
1671 
1672       if (NumBlocks == MaxContainedBlocks) {
1673         for (const auto &It : BG.Batches) {
1674           if (&It != BG.Batches.front())
1675             DCHECK_EQ(It.getCount(), BG.MaxCachedPerBatch);
1676           for (u16 I = 0; I < It.getCount(); ++I)
1677             DCHECK_EQ(compactPtrGroup(It.get(I)), BG.CompactPtrGroupBase);
1678         }
1679 
1680         Context.markRangeAsAllCounted(BatchGroupBase, BatchGroupUsedEnd,
1681                                       Region->RegionBeg, /*RegionIndex=*/0,
1682                                       Region->MemMapInfo.AllocatedUser);
1683       } else {
1684         DCHECK_LT(NumBlocks, MaxContainedBlocks);
1685         // Note that we don't always visit blocks in each BatchGroup so that we
1686         // may miss the chance of releasing certain pages that cross
1687         // BatchGroups.
1688         Context.markFreeBlocksInRegion(
1689             BG.Batches, DecompactPtr, Region->RegionBeg, /*RegionIndex=*/0,
1690             Region->MemMapInfo.AllocatedUser, MayContainLastBlockInRegion);
1691       }
1692     }
1693 
1694     DCHECK(Context.hasBlockMarked());
1695 
1696     return Context;
1697   }
1698 
mergeGroupsToReleaseBack(RegionInfo * Region,SinglyLinkedList<BatchGroupT> & GroupsToRelease)1699   void mergeGroupsToReleaseBack(RegionInfo *Region,
1700                                 SinglyLinkedList<BatchGroupT> &GroupsToRelease)
1701       REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
1702     ScopedLock L(Region->FLLock);
1703 
1704     // After merging two freelists, we may have redundant `BatchGroup`s that
1705     // need to be recycled. The number of unused `BatchGroup`s is expected to be
1706     // small. Pick a constant which is inferred from real programs.
1707     constexpr uptr MaxUnusedSize = 8;
1708     CompactPtrT Blocks[MaxUnusedSize];
1709     u32 Idx = 0;
1710     RegionInfo *BatchClassRegion = getRegionInfo(SizeClassMap::BatchClassId);
1711     // We can't call pushBatchClassBlocks() to recycle the unused `BatchGroup`s
1712     // when we are manipulating the freelist of `BatchClassRegion`. Instead, we
1713     // should just push it back to the freelist when we merge two `BatchGroup`s.
1714     // This logic hasn't been implemented because we haven't supported releasing
1715     // pages in `BatchClassRegion`.
1716     DCHECK_NE(BatchClassRegion, Region);
1717 
1718     // Merge GroupsToRelease back to the Region::FreeListInfo.BlockList. Note
1719     // that both `Region->FreeListInfo.BlockList` and `GroupsToRelease` are
1720     // sorted.
1721     for (BatchGroupT *BG = Region->FreeListInfo.BlockList.front(),
1722                      *Prev = nullptr;
1723          ;) {
1724       if (BG == nullptr || GroupsToRelease.empty()) {
1725         if (!GroupsToRelease.empty())
1726           Region->FreeListInfo.BlockList.append_back(&GroupsToRelease);
1727         break;
1728       }
1729 
1730       DCHECK(!BG->Batches.empty());
1731 
1732       if (BG->CompactPtrGroupBase <
1733           GroupsToRelease.front()->CompactPtrGroupBase) {
1734         Prev = BG;
1735         BG = BG->Next;
1736         continue;
1737       }
1738 
1739       BatchGroupT *Cur = GroupsToRelease.front();
1740       TransferBatchT *UnusedTransferBatch = nullptr;
1741       GroupsToRelease.pop_front();
1742 
1743       if (BG->CompactPtrGroupBase == Cur->CompactPtrGroupBase) {
1744         // We have updated `BatchGroup::BytesInBGAtLastCheckpoint` while
1745         // collecting the `GroupsToRelease`.
1746         BG->BytesInBGAtLastCheckpoint = Cur->BytesInBGAtLastCheckpoint;
1747         const uptr MaxCachedPerBatch = BG->MaxCachedPerBatch;
1748 
1749         // Note that the first TransferBatches in both `Batches` may not be
1750         // full and only the first TransferBatch can have non-full blocks. Thus
1751         // we have to merge them before appending one to another.
1752         if (Cur->Batches.front()->getCount() == MaxCachedPerBatch) {
1753           BG->Batches.append_back(&Cur->Batches);
1754         } else {
1755           TransferBatchT *NonFullBatch = Cur->Batches.front();
1756           Cur->Batches.pop_front();
1757           const u16 NonFullBatchCount = NonFullBatch->getCount();
1758           // The remaining Batches in `Cur` are full.
1759           BG->Batches.append_back(&Cur->Batches);
1760 
1761           if (BG->Batches.front()->getCount() == MaxCachedPerBatch) {
1762             // Only 1 non-full TransferBatch, push it to the front.
1763             BG->Batches.push_front(NonFullBatch);
1764           } else {
1765             const u16 NumBlocksToMove = static_cast<u16>(
1766                 Min(static_cast<u16>(MaxCachedPerBatch -
1767                                      BG->Batches.front()->getCount()),
1768                     NonFullBatchCount));
1769             BG->Batches.front()->appendFromTransferBatch(NonFullBatch,
1770                                                          NumBlocksToMove);
1771             if (NonFullBatch->isEmpty())
1772               UnusedTransferBatch = NonFullBatch;
1773             else
1774               BG->Batches.push_front(NonFullBatch);
1775           }
1776         }
1777 
1778         const u32 NeededSlots = UnusedTransferBatch == nullptr ? 1U : 2U;
1779         if (UNLIKELY(Idx + NeededSlots > MaxUnusedSize)) {
1780           ScopedLock L(BatchClassRegion->FLLock);
1781           pushBatchClassBlocks(BatchClassRegion, Blocks, Idx);
1782           if (conditionVariableEnabled())
1783             BatchClassRegion->FLLockCV.notifyAll(BatchClassRegion->FLLock);
1784           Idx = 0;
1785         }
1786         Blocks[Idx++] =
1787             compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(Cur));
1788         if (UnusedTransferBatch) {
1789           Blocks[Idx++] =
1790               compactPtr(SizeClassMap::BatchClassId,
1791                          reinterpret_cast<uptr>(UnusedTransferBatch));
1792         }
1793         Prev = BG;
1794         BG = BG->Next;
1795         continue;
1796       }
1797 
1798       // At here, the `BG` is the first BatchGroup with CompactPtrGroupBase
1799       // larger than the first element in `GroupsToRelease`. We need to insert
1800       // `GroupsToRelease::front()` (which is `Cur` below)  before `BG`.
1801       //
1802       //   1. If `Prev` is nullptr, we simply push `Cur` to the front of
1803       //      FreeListInfo.BlockList.
1804       //   2. Otherwise, use `insert()` which inserts an element next to `Prev`.
1805       //
1806       // Afterwards, we don't need to advance `BG` because the order between
1807       // `BG` and the new `GroupsToRelease::front()` hasn't been checked.
1808       if (Prev == nullptr)
1809         Region->FreeListInfo.BlockList.push_front(Cur);
1810       else
1811         Region->FreeListInfo.BlockList.insert(Prev, Cur);
1812       DCHECK_EQ(Cur->Next, BG);
1813       Prev = Cur;
1814     }
1815 
1816     if (Idx != 0) {
1817       ScopedLock L(BatchClassRegion->FLLock);
1818       pushBatchClassBlocks(BatchClassRegion, Blocks, Idx);
1819       if (conditionVariableEnabled())
1820         BatchClassRegion->FLLockCV.notifyAll(BatchClassRegion->FLLock);
1821     }
1822 
1823     if (SCUDO_DEBUG) {
1824       BatchGroupT *Prev = Region->FreeListInfo.BlockList.front();
1825       for (BatchGroupT *Cur = Prev->Next; Cur != nullptr;
1826            Prev = Cur, Cur = Cur->Next) {
1827         CHECK_LT(Prev->CompactPtrGroupBase, Cur->CompactPtrGroupBase);
1828       }
1829     }
1830 
1831     if (conditionVariableEnabled())
1832       Region->FLLockCV.notifyAll(Region->FLLock);
1833   }
1834 
1835   // The minimum size of pushed blocks that we will try to release the pages in
1836   // that size class.
1837   uptr SmallerBlockReleasePageDelta = 0;
1838   atomic_s32 ReleaseToOsIntervalMs = {};
1839   alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses];
1840 };
1841 
1842 } // namespace scudo
1843 
1844 #endif // SCUDO_PRIMARY64_H_
1845