• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
18 #define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
19 
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <sys/mman.h>
23 #include <memory>
24 #include <set>
25 #include <string>
26 #include <unordered_set>
27 #include <vector>
28 
29 #include <android-base/logging.h>
30 
31 #include "base/allocator.h"
32 #include "base/bit_utils.h"
33 #include "base/macros.h"
34 #include "base/mem_map.h"
35 #include "base/mutex.h"
36 #include "runtime_globals.h"
37 #include "thread.h"
38 
39 namespace art HIDDEN {
40 
41 namespace gc {
42 namespace allocator {
43 
44 // A runs-of-slots memory allocator.
45 class RosAlloc {
46  private:
47   // Represents a run of free pages.
48   class FreePageRun {
49    public:
50     uint8_t magic_num_;  // The magic number used for debugging only.
51 
IsFree()52     bool IsFree() const {
53       return !kIsDebugBuild || magic_num_ == kMagicNumFree;
54     }
ByteSize(RosAlloc * rosalloc)55     size_t ByteSize(RosAlloc* rosalloc) const REQUIRES(rosalloc->lock_) {
56       const uint8_t* fpr_base = reinterpret_cast<const uint8_t*>(this);
57       size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
58       size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx];
59       DCHECK_GE(byte_size, static_cast<size_t>(0));
60       DCHECK_ALIGNED_PARAM(byte_size, gPageSize);
61       return byte_size;
62     }
SetByteSize(RosAlloc * rosalloc,size_t byte_size)63     void SetByteSize(RosAlloc* rosalloc, size_t byte_size)
64         REQUIRES(rosalloc->lock_) {
65       DCHECK_EQ(ModuloPageSize(byte_size), static_cast<size_t>(0));
66       uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this);
67       size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
68       rosalloc->free_page_run_size_map_[pm_idx] = byte_size;
69     }
Begin()70     void* Begin() {
71       return reinterpret_cast<void*>(this);
72     }
End(RosAlloc * rosalloc)73     void* End(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
74       uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this);
75       uint8_t* end = fpr_base + ByteSize(rosalloc);
76       return end;
77     }
IsLargerThanPageReleaseThreshold(RosAlloc * rosalloc)78     bool IsLargerThanPageReleaseThreshold(RosAlloc* rosalloc)
79         REQUIRES(rosalloc->lock_) {
80       return ByteSize(rosalloc) >= rosalloc->page_release_size_threshold_;
81     }
IsAtEndOfSpace(RosAlloc * rosalloc)82     bool IsAtEndOfSpace(RosAlloc* rosalloc)
83         REQUIRES(rosalloc->lock_) {
84       return reinterpret_cast<uint8_t*>(this) + ByteSize(rosalloc) == rosalloc->base_ + rosalloc->footprint_;
85     }
ShouldReleasePages(RosAlloc * rosalloc)86     bool ShouldReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
87       switch (rosalloc->page_release_mode_) {
88         case kPageReleaseModeNone:
89           return false;
90         case kPageReleaseModeEnd:
91           return IsAtEndOfSpace(rosalloc);
92         case kPageReleaseModeSize:
93           return IsLargerThanPageReleaseThreshold(rosalloc);
94         case kPageReleaseModeSizeAndEnd:
95           return IsLargerThanPageReleaseThreshold(rosalloc) && IsAtEndOfSpace(rosalloc);
96         case kPageReleaseModeAll:
97           return true;
98       }
99     }
ReleasePages(RosAlloc * rosalloc)100     void ReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
101       uint8_t* start = reinterpret_cast<uint8_t*>(this);
102       size_t byte_size = ByteSize(rosalloc);
103       DCHECK_EQ(ModuloPageSize(byte_size), static_cast<size_t>(0));
104       if (ShouldReleasePages(rosalloc)) {
105         rosalloc->ReleasePageRange(start, start + byte_size);
106       }
107     }
108 
109    private:
110     DISALLOW_COPY_AND_ASSIGN(FreePageRun);
111   };
112 
113   // The slot header.
114   class Slot {
115    public:
Next()116     Slot* Next() const {
117       return next_;
118     }
SetNext(Slot * next)119     void SetNext(Slot* next) {
120       next_ = next;
121     }
122     // The slot right before this slot in terms of the address.
Left(size_t bracket_size)123     Slot* Left(size_t bracket_size) {
124       return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) - bracket_size);
125     }
Clear()126     void Clear() {
127       next_ = nullptr;
128     }
129 
130    private:
131     Slot* next_;  // Next slot in the list.
132     friend class RosAlloc;
133   };
134 
135   // We use the tail (kUseTail == true) for the bulk or thread-local free lists to avoid the need to
136   // traverse the list from the head to the tail when merging free lists.
137   // We don't use the tail (kUseTail == false) for the free list to avoid the need to manage the
138   // tail in the allocation fast path for a performance reason.
139   template<bool kUseTail = true>
140   class SlotFreeList {
141    public:
SlotFreeList()142     SlotFreeList() : head_(0U), tail_(0), size_(0), padding_(0) {}
Head()143     Slot* Head() const {
144       return reinterpret_cast<Slot*>(head_);
145     }
Tail()146     Slot* Tail() const {
147       CHECK(kUseTail);
148       return reinterpret_cast<Slot*>(tail_);
149     }
Size()150     size_t Size() const {
151       return size_;
152     }
153     // Removes from the head of the free list.
Remove()154     Slot* Remove() {
155       Slot* slot;
156       if (kIsDebugBuild) {
157         Verify();
158       }
159       Slot** headp = reinterpret_cast<Slot**>(&head_);
160       Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
161       Slot* old_head = *headp;
162       if (old_head == nullptr) {
163         // List was empty.
164         if (kUseTail) {
165           DCHECK(*tailp == nullptr);
166         }
167         return nullptr;
168       } else {
169         // List wasn't empty.
170         if (kUseTail) {
171           DCHECK(*tailp != nullptr);
172         }
173         Slot* old_head_next = old_head->Next();
174         slot = old_head;
175         *headp = old_head_next;
176         if (kUseTail && old_head_next == nullptr) {
177           // List becomes empty.
178           *tailp = nullptr;
179         }
180       }
181       slot->Clear();
182       --size_;
183       if (kIsDebugBuild) {
184         Verify();
185       }
186       return slot;
187     }
Add(Slot * slot)188     void Add(Slot* slot) {
189       if (kIsDebugBuild) {
190         Verify();
191       }
192       DCHECK(slot != nullptr);
193       DCHECK(slot->Next() == nullptr);
194       Slot** headp = reinterpret_cast<Slot**>(&head_);
195       Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
196       Slot* old_head = *headp;
197       if (old_head == nullptr) {
198         // List was empty.
199         if (kUseTail) {
200           DCHECK(*tailp == nullptr);
201         }
202         *headp = slot;
203         if (kUseTail) {
204           *tailp = slot;
205         }
206       } else {
207         // List wasn't empty.
208         if (kUseTail) {
209           DCHECK(*tailp != nullptr);
210         }
211         *headp = slot;
212         slot->SetNext(old_head);
213       }
214       ++size_;
215       if (kIsDebugBuild) {
216         Verify();
217       }
218     }
219     // Merge the given list into this list. Empty the given list.
220     // Deliberately support only a kUseTail == true SlotFreeList parameter because 1) we don't
221     // currently have a situation where we need a kUseTail == false SlotFreeList parameter, and 2)
222     // supporting the kUseTail == false parameter would require a O(n) linked list traversal to do
223     // the merge if 'this' SlotFreeList has kUseTail == false, which we'd like to avoid.
Merge(SlotFreeList<true> * list)224     void Merge(SlotFreeList<true>* list) {
225       if (kIsDebugBuild) {
226         Verify();
227         CHECK(list != nullptr);
228         list->Verify();
229       }
230       if (list->Size() == 0) {
231         return;
232       }
233       Slot** headp = reinterpret_cast<Slot**>(&head_);
234       Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
235       Slot* old_head = *headp;
236       if (old_head == nullptr) {
237         // List was empty.
238         *headp = list->Head();
239         if (kUseTail) {
240           *tailp = list->Tail();
241         }
242         size_ = list->Size();
243       } else {
244         // List wasn't empty.
245         DCHECK(list->Head() != nullptr);
246         *headp = list->Head();
247         DCHECK(list->Tail() != nullptr);
248         list->Tail()->SetNext(old_head);
249         // if kUseTail, no change to tailp.
250         size_ += list->Size();
251       }
252       list->Reset();
253       if (kIsDebugBuild) {
254         Verify();
255       }
256     }
257 
Reset()258     void Reset() {
259       head_ = 0;
260       if (kUseTail) {
261         tail_ = 0;
262       }
263       size_ = 0;
264     }
265 
Verify()266     void Verify() {
267       Slot* head = reinterpret_cast<Slot*>(head_);
268       Slot* tail = kUseTail ? reinterpret_cast<Slot*>(tail_) : nullptr;
269       if (size_ == 0) {
270         CHECK(head == nullptr);
271         if (kUseTail) {
272           CHECK(tail == nullptr);
273         }
274       } else {
275         CHECK(head != nullptr);
276         if (kUseTail) {
277           CHECK(tail != nullptr);
278         }
279         size_t count = 0;
280         for (Slot* slot = head; slot != nullptr; slot = slot->Next()) {
281           ++count;
282           if (kUseTail && slot->Next() == nullptr) {
283             CHECK_EQ(slot, tail);
284           }
285         }
286         CHECK_EQ(size_, count);
287       }
288     }
289 
290    private:
291     // A pointer (Slot*) to the head of the list. Always 8 bytes so that we will have the same
292     // layout between 32 bit and 64 bit, which is not strictly necessary, but we do so for 1)
293     // uniformity, 2) we won't need to change this code if we move to a non-low 4G heap in the
294     // future, and 3) the space savings by using 32 bit fields in 32 bit would be lost in noise
295     // (won't open up enough space to cause an extra slot to be available).
296     uint64_t head_;
297     // A pointer (Slot*) to the tail of the list. Always 8 bytes so that we will have the same
298     // layout between 32 bit and 64 bit. The tail is stored to speed up merging of lists.
299     // Unused if kUseTail is false.
300     uint64_t tail_;
301     // The number of slots in the list. This is used to make it fast to check if a free list is all
302     // free without traversing the whole free list.
303     uint32_t size_;
304     [[maybe_unused]] uint32_t padding_;
305     friend class RosAlloc;
306   };
307 
308   // Represents a run of memory slots of the same size.
309   //
310   // A run's memory layout:
311   //
312   // +-------------------+
313   // | magic_num         |
314   // +-------------------+
315   // | size_bracket_idx  |
316   // +-------------------+
317   // | is_thread_local   |
318   // +-------------------+
319   // | to_be_bulk_freed  |
320   // +-------------------+
321   // |                   |
322   // | free list         |
323   // |                   |
324   // +-------------------+
325   // |                   |
326   // | bulk free list    |
327   // |                   |
328   // +-------------------+
329   // |                   |
330   // | thread-local free |
331   // | list              |
332   // |                   |
333   // +-------------------+
334   // | padding due to    |
335   // | alignment         |
336   // +-------------------+
337   // | slot 0            |
338   // +-------------------+
339   // | slot 1            |
340   // +-------------------+
341   // | slot 2            |
342   // +-------------------+
343   // ...
344   // +-------------------+
345   // | last slot         |
346   // +-------------------+
347   //
348   class Run {
349    public:
350     uint8_t magic_num_;                 // The magic number used for debugging.
351     uint8_t size_bracket_idx_;          // The index of the size bracket of this run.
352     uint8_t is_thread_local_;           // True if this run is used as a thread-local run.
353     bool to_be_bulk_freed_;             // Used within BulkFree() to flag a run that's involved with
354                                         // a bulk free.
355     [[maybe_unused]] uint32_t padding_;
356     // Use a tailless free list for free_list_ so that the alloc fast path does not manage the tail.
357     SlotFreeList<false> free_list_;
358     SlotFreeList<true> bulk_free_list_;
359     SlotFreeList<true> thread_local_free_list_;
360     // Padding due to alignment
361     // Slot 0
362     // Slot 1
363     // ...
364 
365     // Returns the byte size of the header.
fixed_header_size()366     static size_t fixed_header_size() {
367       return sizeof(Run);
368     }
FirstSlot()369     Slot* FirstSlot() const {
370       const uint8_t idx = size_bracket_idx_;
371       return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) + headerSizes[idx]);
372     }
LastSlot()373     Slot* LastSlot() {
374       const uint8_t idx = size_bracket_idx_;
375       const size_t bracket_size = bracketSizes[idx];
376       uintptr_t end = reinterpret_cast<uintptr_t>(End());
377       Slot* last_slot = reinterpret_cast<Slot*>(end - bracket_size);
378       DCHECK_LE(FirstSlot(), last_slot);
379       return last_slot;
380     }
FreeList()381     SlotFreeList<false>* FreeList() {
382       return &free_list_;
383     }
BulkFreeList()384     SlotFreeList<true>* BulkFreeList() {
385       return &bulk_free_list_;
386     }
ThreadLocalFreeList()387     SlotFreeList<true>* ThreadLocalFreeList() {
388       return &thread_local_free_list_;
389     }
End()390     void* End() {
391       return reinterpret_cast<uint8_t*>(this) + gPageSize * numOfPages[size_bracket_idx_];
392     }
SetIsThreadLocal(bool is_thread_local)393     void SetIsThreadLocal(bool is_thread_local) {
394       is_thread_local_  = is_thread_local ? 1 : 0;
395     }
IsThreadLocal()396     bool IsThreadLocal() const {
397       return is_thread_local_ != 0;
398     }
399     // Set up the free list for a new/empty run.
InitFreeList()400     void InitFreeList() {
401       const uint8_t idx = size_bracket_idx_;
402       const size_t bracket_size = bracketSizes[idx];
403       Slot* first_slot = FirstSlot();
404       // Add backwards so the first slot is at the head of the list.
405       for (Slot* slot = LastSlot(); slot >= first_slot; slot = slot->Left(bracket_size)) {
406         free_list_.Add(slot);
407       }
408     }
409     // Merge the thread local free list to the free list.  Used when a thread-local run becomes
410     // full.
411     bool MergeThreadLocalFreeListToFreeList(bool* is_all_free_after_out);
412     // Merge the bulk free list to the free list. Used in a bulk free.
413     void MergeBulkFreeListToFreeList();
414     // Merge the bulk free list to the thread local free list. In a bulk free, as a two-step
415     // process, GC will first record all the slots to free in a run in the bulk free list where it
416     // can write without a lock, and later acquire a lock once per run to merge the bulk free list
417     // to the thread-local free list.
418     void MergeBulkFreeListToThreadLocalFreeList();
419     // Allocates a slot in a run.
420     ALWAYS_INLINE void* AllocSlot();
421     // Frees a slot in a run. This is used in a non-bulk free.
422     void FreeSlot(void* ptr);
423     // Add the given slot to the bulk free list. Returns the bracket size.
424     size_t AddToBulkFreeList(void* ptr);
425     // Add the given slot to the thread-local free list.
426     void AddToThreadLocalFreeList(void* ptr);
427     // Returns true if all the slots in the run are not in use.
IsAllFree()428     bool IsAllFree() const {
429       return free_list_.Size() == numOfSlots[size_bracket_idx_];
430     }
431     // Returns the number of free slots.
NumberOfFreeSlots()432     size_t NumberOfFreeSlots() {
433       return free_list_.Size();
434     }
435     // Returns true if all the slots in the run are in use.
436     ALWAYS_INLINE bool IsFull();
437     // Returns true if the bulk free list is empty.
IsBulkFreeListEmpty()438     bool IsBulkFreeListEmpty() const {
439       return bulk_free_list_.Size() == 0;
440     }
441     // Returns true if the thread local free list is empty.
IsThreadLocalFreeListEmpty()442     bool IsThreadLocalFreeListEmpty() const {
443       return thread_local_free_list_.Size() == 0;
444     }
445     // Zero the run's data.
446     void ZeroData();
447     // Zero the run's header and the slot headers.
448     void ZeroHeaderAndSlotHeaders();
449     // Iterate over all the slots and apply the given function.
450     void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
451     // Dump the run metadata for debugging.
452     std::string Dump();
453     // Verify for debugging.
454     void Verify(Thread* self, RosAlloc* rosalloc, bool running_on_memory_tool)
455         REQUIRES(Locks::mutator_lock_)
456         REQUIRES(Locks::thread_list_lock_);
457 
458    private:
459     // The common part of AddToBulkFreeList() and AddToThreadLocalFreeList(). Returns the bracket
460     // size.
461     size_t AddToFreeListShared(void* ptr, SlotFreeList<true>* free_list, const char* caller_name);
462     // Turns a FreeList into a string for debugging.
463     template<bool kUseTail>
464     std::string FreeListToStr(SlotFreeList<kUseTail>* free_list);
465     // Check a given pointer is a valid slot address and return it as Slot*.
ToSlot(void * ptr)466     Slot* ToSlot(void* ptr) {
467       const uint8_t idx = size_bracket_idx_;
468       const size_t bracket_size = bracketSizes[idx];
469       const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr)
470           - reinterpret_cast<uint8_t*>(FirstSlot());
471       DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
472       size_t slot_idx = offset_from_slot_base / bracket_size;
473       DCHECK_LT(slot_idx, numOfSlots[idx]);
474       return reinterpret_cast<Slot*>(ptr);
475     }
SlotIndex(Slot * slot)476     size_t SlotIndex(Slot* slot) const {
477       const uint8_t idx = size_bracket_idx_;
478       const size_t bracket_size = bracketSizes[idx];
479       const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot)
480           - reinterpret_cast<uint8_t*>(FirstSlot());
481       DCHECK_EQ(offset_from_slot_base % bracket_size, 0U);
482       size_t slot_idx = offset_from_slot_base / bracket_size;
483       DCHECK_LT(slot_idx, numOfSlots[idx]);
484       return slot_idx;
485     }
486 
487     // TODO: DISALLOW_COPY_AND_ASSIGN(Run);
488   };
489 
490   // The magic number for a run.
491   static constexpr uint8_t kMagicNum = 42;
492   // The magic number for free pages.
493   static constexpr uint8_t kMagicNumFree = 43;
494   // The number of size brackets.
495   static constexpr size_t kNumOfSizeBrackets = 42;
496   // The sizes (the slot sizes, in bytes) of the size brackets.
497   static size_t bracketSizes[kNumOfSizeBrackets];
498   // The numbers of pages that are used for runs for each size bracket.
499   static size_t numOfPages[kNumOfSizeBrackets];
500   // The numbers of slots of the runs for each size bracket.
501   EXPORT static size_t numOfSlots[kNumOfSizeBrackets];
502   // The header sizes in bytes of the runs for each size bracket.
503   static size_t headerSizes[kNumOfSizeBrackets];
504 
505   // Initialize the run specs (the above arrays).
506   static void Initialize();
507   static bool initialized_;
508 
509   // Returns the byte size of the bracket size from the index.
IndexToBracketSize(size_t idx)510   static size_t IndexToBracketSize(size_t idx) {
511     DCHECK_LT(idx, kNumOfSizeBrackets);
512     return bracketSizes[idx];
513   }
514   // Returns the index of the size bracket from the bracket size.
BracketSizeToIndex(size_t size)515   static size_t BracketSizeToIndex(size_t size) {
516     DCHECK(8 <= size &&
517            ((size <= kMaxThreadLocalBracketSize && size % kThreadLocalBracketQuantumSize == 0) ||
518             (size <= kMaxRegularBracketSize && size % kBracketQuantumSize == 0) ||
519             size == 1 * KB || size == 2 * KB));
520     size_t idx;
521     if (UNLIKELY(size == 1 * KB)) {
522       idx = kNumOfSizeBrackets - 2;
523     } else if (UNLIKELY(size == 2 * KB)) {
524       idx = kNumOfSizeBrackets - 1;
525     } else if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
526       DCHECK_EQ(size % kThreadLocalBracketQuantumSize, 0U);
527       idx = size / kThreadLocalBracketQuantumSize - 1;
528     } else {
529       DCHECK(size <= kMaxRegularBracketSize);
530       DCHECK_EQ((size - kMaxThreadLocalBracketSize) % kBracketQuantumSize, 0U);
531       idx = ((size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
532           + kNumThreadLocalSizeBrackets;
533     }
534     DCHECK(bracketSizes[idx] == size);
535     return idx;
536   }
537   // Returns true if the given allocation size is for a thread local allocation.
IsSizeForThreadLocal(size_t size)538   static bool IsSizeForThreadLocal(size_t size) {
539     bool is_size_for_thread_local = size <= kMaxThreadLocalBracketSize;
540     DCHECK(size > kLargeSizeThreshold ||
541            (is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets)));
542     return is_size_for_thread_local;
543   }
544   // Rounds up the size up the nearest bracket size.
RoundToBracketSize(size_t size)545   static size_t RoundToBracketSize(size_t size) {
546     DCHECK(size <= kLargeSizeThreshold);
547     if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
548       return RoundUp(size, kThreadLocalBracketQuantumSize);
549     } else if (size <= kMaxRegularBracketSize) {
550       return RoundUp(size, kBracketQuantumSize);
551     } else if (UNLIKELY(size <= 1 * KB)) {
552       return 1 * KB;
553     } else {
554       DCHECK_LE(size, 2 * KB);
555       return 2 * KB;
556     }
557   }
558   // Returns the size bracket index from the byte size with rounding.
SizeToIndex(size_t size)559   static size_t SizeToIndex(size_t size) {
560     DCHECK(size <= kLargeSizeThreshold);
561     if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
562       return RoundUp(size, kThreadLocalBracketQuantumSize) / kThreadLocalBracketQuantumSize - 1;
563     } else if (size <= kMaxRegularBracketSize) {
564       return (RoundUp(size, kBracketQuantumSize) - kMaxThreadLocalBracketSize) / kBracketQuantumSize
565           - 1 + kNumThreadLocalSizeBrackets;
566     } else if (size <= 1 * KB) {
567       return kNumOfSizeBrackets - 2;
568     } else {
569       DCHECK_LE(size, 2 * KB);
570       return kNumOfSizeBrackets - 1;
571     }
572   }
573   // A combination of SizeToIndex() and RoundToBracketSize().
SizeToIndexAndBracketSize(size_t size,size_t * bracket_size_out)574   static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) {
575     DCHECK(size <= kLargeSizeThreshold);
576     size_t idx;
577     size_t bracket_size;
578     if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
579       bracket_size = RoundUp(size, kThreadLocalBracketQuantumSize);
580       idx = bracket_size / kThreadLocalBracketQuantumSize - 1;
581     } else if (size <= kMaxRegularBracketSize) {
582       bracket_size = RoundUp(size, kBracketQuantumSize);
583       idx = ((bracket_size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
584           + kNumThreadLocalSizeBrackets;
585     } else if (size <= 1 * KB) {
586       bracket_size = 1 * KB;
587       idx = kNumOfSizeBrackets - 2;
588     } else {
589       DCHECK(size <= 2 * KB);
590       bracket_size = 2 * KB;
591       idx = kNumOfSizeBrackets - 1;
592     }
593     DCHECK_EQ(idx, SizeToIndex(size)) << idx;
594     DCHECK_EQ(bracket_size, IndexToBracketSize(idx)) << idx;
595     DCHECK_EQ(bracket_size, bracketSizes[idx]) << idx;
596     DCHECK_LE(size, bracket_size) << idx;
597     DCHECK(size > kMaxRegularBracketSize ||
598            (size <= kMaxThreadLocalBracketSize &&
599             bracket_size - size < kThreadLocalBracketQuantumSize) ||
600            (size <= kMaxRegularBracketSize && bracket_size - size < kBracketQuantumSize)) << idx;
601     *bracket_size_out = bracket_size;
602     return idx;
603   }
604 
605   // Returns the page map index from an address. Requires that the
606   // address is page size aligned.
ToPageMapIndex(const void * addr)607   size_t ToPageMapIndex(const void* addr) const {
608     DCHECK_LE(base_, addr);
609     DCHECK_LT(addr, base_ + capacity_);
610     size_t byte_offset = reinterpret_cast<const uint8_t*>(addr) - base_;
611     DCHECK_EQ(ModuloPageSize(byte_offset), static_cast<size_t>(0));
612     return DivideByPageSize(byte_offset);
613   }
614   // Returns the page map index from an address with rounding.
RoundDownToPageMapIndex(const void * addr)615   size_t RoundDownToPageMapIndex(const void* addr) const {
616     DCHECK(base_ <= addr && addr < reinterpret_cast<uint8_t*>(base_) + capacity_);
617     return DivideByPageSize(reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_));
618   }
619 
620   // A memory allocation request larger than this size is treated as a large object and allocated
621   // at a page-granularity.
622   static constexpr size_t kLargeSizeThreshold = 2048;
623 
624   // If true, check that the returned memory is actually zero.
625   static constexpr bool kCheckZeroMemory = kIsDebugBuild;
626   // Do not check memory when running under a memory tool. In a normal
627   // build with kCheckZeroMemory the whole test should be optimized away.
628   // TODO: Unprotect before checks.
629   ALWAYS_INLINE bool ShouldCheckZeroMemory();
630 
631   // If true, log verbose details of operations.
632   static constexpr bool kTraceRosAlloc = false;
633 
634   struct hash_run {
operatorhash_run635     size_t operator()(const RosAlloc::Run* r) const {
636       return reinterpret_cast<size_t>(r);
637     }
638   };
639 
640   struct eq_run {
operatoreq_run641     bool operator()(const RosAlloc::Run* r1, const RosAlloc::Run* r2) const {
642       return r1 == r2;
643     }
644   };
645 
646  public:
647   // Different page release modes.
648   enum PageReleaseMode {
649     kPageReleaseModeNone,         // Release no empty pages.
650     kPageReleaseModeEnd,          // Release empty pages at the end of the space.
651     kPageReleaseModeSize,         // Release empty pages that are larger than the threshold.
652     kPageReleaseModeSizeAndEnd,   // Release empty pages that are larger than the threshold or
653                                   // at the end of the space.
654     kPageReleaseModeAll,          // Release all empty pages.
655   };
656 
657   // The default value for page_release_size_threshold_.
658   static constexpr size_t kDefaultPageReleaseSizeThreshold = 4 * MB;
659 
660   // We use thread-local runs for the size brackets whose indexes
661   // are less than this index. We use shared (current) runs for the rest.
662   // Sync this with the length of Thread::rosalloc_runs_.
663   static constexpr size_t kNumThreadLocalSizeBrackets = 16;
664   static_assert(kNumThreadLocalSizeBrackets == kNumRosAllocThreadLocalSizeBracketsInThread,
665                 "Mismatch between kNumThreadLocalSizeBrackets and "
666                 "kNumRosAllocThreadLocalSizeBracketsInThread");
667 
668   // The size of the largest bracket we use thread-local runs for.
669   // This should be equal to bracketSizes[kNumThreadLocalSizeBrackets - 1].
670   static constexpr size_t kMaxThreadLocalBracketSize = 128;
671 
672   // We use regular (8 or 16-bytes increment) runs for the size brackets whose indexes are less than
673   // this index.
674   static const size_t kNumRegularSizeBrackets = 40;
675 
676   // The size of the largest regular (8 or 16-byte increment) bracket. Non-regular brackets are the
677   // 1 KB and the 2 KB brackets. This should be equal to bracketSizes[kNumRegularSizeBrackets - 1].
678   static constexpr size_t kMaxRegularBracketSize = 512;
679 
680   // The bracket size increment for the thread-local brackets (<= kMaxThreadLocalBracketSize bytes).
681   static constexpr size_t kThreadLocalBracketQuantumSize = 8;
682 
683   // Equal to Log2(kThreadLocalBracketQuantumSize).
684   static constexpr size_t kThreadLocalBracketQuantumSizeShift = 3;
685 
686   // The bracket size increment for the non-thread-local, regular brackets (of size <=
687   // kMaxRegularBracketSize bytes and > kMaxThreadLocalBracketSize bytes).
688   static constexpr size_t kBracketQuantumSize = 16;
689 
690   // Equal to Log2(kBracketQuantumSize).
691   static constexpr size_t kBracketQuantumSizeShift = 4;
692 
693  private:
694   // The base address of the memory region that's managed by this allocator.
695   uint8_t* base_;
696 
697   // The footprint in bytes of the currently allocated portion of the
698   // memory region.
699   size_t footprint_;
700 
701   // The maximum footprint. The address, base_ + capacity_, indicates
702   // the end of the memory region that's currently managed by this allocator.
703   size_t capacity_;
704 
705   // The maximum capacity. The address, base_ + max_capacity_, indicates
706   // the end of the memory region that's ever managed by this allocator.
707   size_t max_capacity_;
708 
709   template<class Key, AllocatorTag kTag, class Compare = std::less<Key>>
710   using AllocationTrackingSet = std::set<Key, Compare, TrackingAllocator<Key, kTag>>;
711 
712   // The run sets that hold the runs whose slots are not all
713   // full. non_full_runs_[i] is guarded by size_bracket_locks_[i].
714   AllocationTrackingSet<Run*, kAllocatorTagRosAlloc> non_full_runs_[kNumOfSizeBrackets];
715   // The run sets that hold the runs whose slots are all full. This is
716   // debug only. full_runs_[i] is guarded by size_bracket_locks_[i].
717   std::unordered_set<Run*, hash_run, eq_run, TrackingAllocator<Run*, kAllocatorTagRosAlloc>>
718       full_runs_[kNumOfSizeBrackets];
719   // The set of free pages.
720   AllocationTrackingSet<FreePageRun*, kAllocatorTagRosAlloc> free_page_runs_ GUARDED_BY(lock_);
721   // The dedicated full run, it is always full and shared by all threads when revoking happens.
722   // This is an optimization since enables us to avoid a null check for revoked runs.
723   static Run* dedicated_full_run_;
724   // Using size_t to ensure that it is at least word aligned.
725   static size_t dedicated_full_run_storage_[];
726   // The current runs where the allocations are first attempted for
727   // the size brackes that do not use thread-local
728   // runs. current_runs_[i] is guarded by size_bracket_locks_[i].
729   Run* current_runs_[kNumOfSizeBrackets];
730   // The mutexes, one per size bracket.
731   Mutex* size_bracket_locks_[kNumOfSizeBrackets];
732   // Bracket lock names (since locks only have char* names).
733   std::string size_bracket_lock_names_[kNumOfSizeBrackets];
734   // The types of page map entries.
735   enum PageMapKind {
736     kPageMapReleased = 0,     // Zero and released back to the OS.
737     kPageMapEmpty,            // Zero but probably dirty.
738     kPageMapRun,              // The beginning of a run.
739     kPageMapRunPart,          // The non-beginning part of a run.
740     kPageMapLargeObject,      // The beginning of a large object.
741     kPageMapLargeObjectPart,  // The non-beginning part of a large object.
742   };
743   // The table that indicates what pages are currently used for.
744   volatile uint8_t* page_map_;  // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree.
745   size_t page_map_size_;
746   size_t max_page_map_size_;
747   MemMap page_map_mem_map_;
748 
749   // The table that indicates the size of free page runs. These sizes
750   // are stored here to avoid storing in the free page header and
751   // release backing pages.
752   std::vector<size_t, TrackingAllocator<size_t, kAllocatorTagRosAlloc>> free_page_run_size_map_
753       GUARDED_BY(lock_);
754   // The global lock. Used to guard the page map, the free page set,
755   // and the footprint.
756   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
757   // The reader-writer lock to allow one bulk free at a time while
758   // allowing multiple individual frees at the same time. Also, this
759   // is used to avoid race conditions between BulkFree() and
760   // RevokeThreadLocalRuns() on the bulk free list.
761   ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
762 
763   // The page release mode.
764   const PageReleaseMode page_release_mode_;
765   // Under kPageReleaseModeSize(AndEnd), if the free page run size is
766   // greater than or equal to this value, release pages.
767   const size_t page_release_size_threshold_;
768 
769   // Whether this allocator is running on a memory tool.
770   bool is_running_on_memory_tool_;
771 
772   // The base address of the memory region that's managed by this allocator.
Begin()773   uint8_t* Begin() { return base_; }
774   // The end address of the memory region that's managed by this allocator.
End()775   uint8_t* End() { return base_ + capacity_; }
776 
777   // Page-granularity alloc/free
778   void* AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type)
779       REQUIRES(lock_);
780   // Returns how many bytes were freed.
781   size_t FreePages(Thread* self, void* ptr, bool already_zero) REQUIRES(lock_);
782 
783   // Allocate/free a run slot.
784   EXPORT void* AllocFromRun(Thread* self,
785                             size_t size,
786                             size_t* bytes_allocated,
787                             size_t* usable_size,
788                             size_t* bytes_tl_bulk_allocated) REQUIRES(!lock_);
789   // Allocate/free a run slot without acquiring locks.
790   // TODO: REQUIRES(Locks::mutator_lock_)
791   void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated,
792                                  size_t* usable_size, size_t* bytes_tl_bulk_allocated)
793       REQUIRES(!lock_);
794   void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx) REQUIRES(!lock_);
795 
796   // Returns the bracket size.
797   size_t FreeFromRun(Thread* self, void* ptr, Run* run)
798       REQUIRES(!lock_);
799 
800   // Used to allocate a new thread local run for a size bracket.
801   Run* AllocRun(Thread* self, size_t idx) REQUIRES(!lock_);
802 
803   // Used to acquire a new/reused run for a size bracket. Used when a
804   // thread-local or current run gets full.
805   Run* RefillRun(Thread* self, size_t idx) REQUIRES(!lock_);
806 
807   // The internal of non-bulk Free().
808   size_t FreeInternal(Thread* self, void* ptr) REQUIRES(!lock_);
809 
810   // Allocates large objects.
811   EXPORT void* AllocLargeObject(Thread* self,
812                                 size_t size,
813                                 size_t* bytes_allocated,
814                                 size_t* usable_size,
815                                 size_t* bytes_tl_bulk_allocated) REQUIRES(!lock_);
816 
817   // Revoke a run by adding it to non_full_runs_ or freeing the pages.
818   void RevokeRun(Thread* self, size_t idx, Run* run) REQUIRES(!lock_);
819 
820   // Revoke the current runs which share an index with the thread local runs.
821   void RevokeThreadUnsafeCurrentRuns() REQUIRES(!lock_);
822 
823   // Release a range of pages.
824   size_t ReleasePageRange(uint8_t* start, uint8_t* end) REQUIRES(lock_);
825 
826   // Dumps the page map for debugging.
827   std::string DumpPageMap() REQUIRES(lock_);
828 
829  public:
830   RosAlloc(void* base, size_t capacity, size_t max_capacity,
831            PageReleaseMode page_release_mode,
832            bool running_on_memory_tool,
833            size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold);
834   ~RosAlloc();
835 
RunFreeListOffset()836   static constexpr size_t RunFreeListOffset() {
837     return OFFSETOF_MEMBER(Run, free_list_);
838   }
RunFreeListHeadOffset()839   static constexpr size_t RunFreeListHeadOffset() {
840     return OFFSETOF_MEMBER(SlotFreeList<false>, head_);
841   }
RunFreeListSizeOffset()842   static constexpr size_t RunFreeListSizeOffset() {
843     return OFFSETOF_MEMBER(SlotFreeList<false>, size_);
844   }
RunSlotNextOffset()845   static constexpr size_t RunSlotNextOffset() {
846     return OFFSETOF_MEMBER(Slot, next_);
847   }
848 
849   // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization.
850   // If used, this may cause race conditions if multiple threads are allocating at the same time.
851   template<bool kThreadSafe = true>
852   void* Alloc(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size,
853               size_t* bytes_tl_bulk_allocated)
854       REQUIRES(!lock_);
855   size_t Free(Thread* self, void* ptr)
856       REQUIRES(!bulk_free_lock_, !lock_);
857   size_t BulkFree(Thread* self, void** ptrs, size_t num_ptrs)
858       REQUIRES(!bulk_free_lock_, !lock_);
859 
860   // Returns true if the given allocation request can be allocated in
861   // an existing thread local run without allocating a new run.
862   ALWAYS_INLINE bool CanAllocFromThreadLocalRun(Thread* self, size_t size);
863   // Allocate the given allocation request in an existing thread local
864   // run without allocating a new run.
865   ALWAYS_INLINE void* AllocFromThreadLocalRun(Thread* self, size_t size, size_t* bytes_allocated);
866 
867   // Returns the maximum bytes that could be allocated for the given
868   // size in bulk, that is the maximum value for the
869   // bytes_allocated_bulk out param returned by RosAlloc::Alloc().
870   ALWAYS_INLINE size_t MaxBytesBulkAllocatedFor(size_t size);
871 
872   // Returns the size of the allocated slot for a given allocated memory chunk.
873   size_t UsableSize(const void* ptr) REQUIRES(!lock_);
874   // Returns the size of the allocated slot for a given size.
UsableSize(size_t bytes)875   size_t UsableSize(size_t bytes) {
876     if (UNLIKELY(bytes > kLargeSizeThreshold)) {
877       return RoundUp(bytes, gPageSize);
878     } else {
879       return RoundToBracketSize(bytes);
880     }
881   }
882   // Try to reduce the current footprint by releasing the free page
883   // run at the end of the memory region, if any.
884   bool Trim() REQUIRES(!lock_);
885   // Iterates over all the memory slots and apply the given function.
886   void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
887                   void* arg)
888       REQUIRES(!lock_);
889 
890   // Release empty pages.
891   size_t ReleasePages() REQUIRES(!lock_);
892   // Returns the current footprint.
893   size_t Footprint() REQUIRES(!lock_);
894   // Returns the current capacity, maximum footprint.
895   size_t FootprintLimit() REQUIRES(!lock_);
896   // Update the current capacity.
897   void SetFootprintLimit(size_t bytes) REQUIRES(!lock_);
898 
899   // Releases the thread-local runs assigned to the given thread back to the common set of runs.
900   // Returns the total bytes of free slots in the revoked thread local runs. This is to be
901   // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting.
902   size_t RevokeThreadLocalRuns(Thread* thread) REQUIRES(!lock_, !bulk_free_lock_);
903   // Releases the thread-local runs assigned to all the threads back to the common set of runs.
904   // Returns the total bytes of free slots in the revoked thread local runs. This is to be
905   // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting.
906   size_t RevokeAllThreadLocalRuns() REQUIRES(!Locks::thread_list_lock_, !lock_, !bulk_free_lock_);
907   // Assert the thread local runs of a thread are revoked.
908   void AssertThreadLocalRunsAreRevoked(Thread* thread) REQUIRES(!bulk_free_lock_);
909   // Assert all the thread local runs are revoked.
910   void AssertAllThreadLocalRunsAreRevoked() REQUIRES(!Locks::thread_list_lock_, !bulk_free_lock_);
911 
GetDedicatedFullRun()912   static Run* GetDedicatedFullRun() {
913     return dedicated_full_run_;
914   }
IsFreePage(size_t idx)915   bool IsFreePage(size_t idx) const {
916     DCHECK_LT(idx, DivideByPageSize(capacity_));
917     uint8_t pm_type = page_map_[idx];
918     return pm_type == kPageMapReleased || pm_type == kPageMapEmpty;
919   }
920 
921   // Callbacks for InspectAll that will count the number of bytes
922   // allocated and objects allocated, respectively.
923   static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
924   static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
925 
DoesReleaseAllPages()926   bool DoesReleaseAllPages() const {
927     return page_release_mode_ == kPageReleaseModeAll;
928   }
929 
930   // Verify for debugging.
931   void Verify() REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !bulk_free_lock_,
932                          !lock_);
933 
934   bool LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes)
935       REQUIRES(!bulk_free_lock_, !lock_);
936 
937   void DumpStats(std::ostream& os)
938       REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_) REQUIRES(!bulk_free_lock_);
939 
940  private:
941   friend std::ostream& operator<<(std::ostream& os, RosAlloc::PageMapKind rhs);
942 
943   DISALLOW_COPY_AND_ASSIGN(RosAlloc);
944 };
945 std::ostream& operator<<(std::ostream& os, RosAlloc::PageMapKind rhs);
946 
947 // Callback from rosalloc when it needs to increase the footprint. Must be implemented somewhere
948 // else (currently rosalloc_space.cc).
949 void* ArtRosAllocMoreCore(allocator::RosAlloc* rosalloc, intptr_t increment);
950 
951 }  // namespace allocator
952 }  // namespace gc
953 }  // namespace art
954 
955 #endif  // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
956