1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_ 18 #define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_ 19 20 #include <stdint.h> 21 #include <stdlib.h> 22 #include <sys/mman.h> 23 #include <memory> 24 #include <set> 25 #include <string> 26 #include <unordered_set> 27 #include <vector> 28 29 #include <android-base/logging.h> 30 31 #include "base/allocator.h" 32 #include "base/bit_utils.h" 33 #include "base/macros.h" 34 #include "base/mem_map.h" 35 #include "base/mutex.h" 36 #include "runtime_globals.h" 37 #include "thread.h" 38 39 namespace art HIDDEN { 40 41 namespace gc { 42 namespace allocator { 43 44 // A runs-of-slots memory allocator. 45 class RosAlloc { 46 private: 47 // Represents a run of free pages. 48 class FreePageRun { 49 public: 50 uint8_t magic_num_; // The magic number used for debugging only. 51 IsFree()52 bool IsFree() const { 53 return !kIsDebugBuild || magic_num_ == kMagicNumFree; 54 } ByteSize(RosAlloc * rosalloc)55 size_t ByteSize(RosAlloc* rosalloc) const REQUIRES(rosalloc->lock_) { 56 const uint8_t* fpr_base = reinterpret_cast<const uint8_t*>(this); 57 size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base); 58 size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx]; 59 DCHECK_GE(byte_size, static_cast<size_t>(0)); 60 DCHECK_ALIGNED_PARAM(byte_size, gPageSize); 61 return byte_size; 62 } SetByteSize(RosAlloc * rosalloc,size_t byte_size)63 void SetByteSize(RosAlloc* rosalloc, size_t byte_size) 64 REQUIRES(rosalloc->lock_) { 65 DCHECK_EQ(ModuloPageSize(byte_size), static_cast<size_t>(0)); 66 uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this); 67 size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base); 68 rosalloc->free_page_run_size_map_[pm_idx] = byte_size; 69 } Begin()70 void* Begin() { 71 return reinterpret_cast<void*>(this); 72 } End(RosAlloc * rosalloc)73 void* End(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) { 74 uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this); 75 uint8_t* end = fpr_base + ByteSize(rosalloc); 76 return end; 77 } IsLargerThanPageReleaseThreshold(RosAlloc * rosalloc)78 bool IsLargerThanPageReleaseThreshold(RosAlloc* rosalloc) 79 REQUIRES(rosalloc->lock_) { 80 return ByteSize(rosalloc) >= rosalloc->page_release_size_threshold_; 81 } IsAtEndOfSpace(RosAlloc * rosalloc)82 bool IsAtEndOfSpace(RosAlloc* rosalloc) 83 REQUIRES(rosalloc->lock_) { 84 return reinterpret_cast<uint8_t*>(this) + ByteSize(rosalloc) == rosalloc->base_ + rosalloc->footprint_; 85 } ShouldReleasePages(RosAlloc * rosalloc)86 bool ShouldReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) { 87 switch (rosalloc->page_release_mode_) { 88 case kPageReleaseModeNone: 89 return false; 90 case kPageReleaseModeEnd: 91 return IsAtEndOfSpace(rosalloc); 92 case kPageReleaseModeSize: 93 return IsLargerThanPageReleaseThreshold(rosalloc); 94 case kPageReleaseModeSizeAndEnd: 95 return IsLargerThanPageReleaseThreshold(rosalloc) && IsAtEndOfSpace(rosalloc); 96 case kPageReleaseModeAll: 97 return true; 98 } 99 } ReleasePages(RosAlloc * rosalloc)100 void ReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) { 101 uint8_t* start = reinterpret_cast<uint8_t*>(this); 102 size_t byte_size = ByteSize(rosalloc); 103 DCHECK_EQ(ModuloPageSize(byte_size), static_cast<size_t>(0)); 104 if (ShouldReleasePages(rosalloc)) { 105 rosalloc->ReleasePageRange(start, start + byte_size); 106 } 107 } 108 109 private: 110 DISALLOW_COPY_AND_ASSIGN(FreePageRun); 111 }; 112 113 // The slot header. 114 class Slot { 115 public: Next()116 Slot* Next() const { 117 return next_; 118 } SetNext(Slot * next)119 void SetNext(Slot* next) { 120 next_ = next; 121 } 122 // The slot right before this slot in terms of the address. Left(size_t bracket_size)123 Slot* Left(size_t bracket_size) { 124 return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) - bracket_size); 125 } Clear()126 void Clear() { 127 next_ = nullptr; 128 } 129 130 private: 131 Slot* next_; // Next slot in the list. 132 friend class RosAlloc; 133 }; 134 135 // We use the tail (kUseTail == true) for the bulk or thread-local free lists to avoid the need to 136 // traverse the list from the head to the tail when merging free lists. 137 // We don't use the tail (kUseTail == false) for the free list to avoid the need to manage the 138 // tail in the allocation fast path for a performance reason. 139 template<bool kUseTail = true> 140 class SlotFreeList { 141 public: SlotFreeList()142 SlotFreeList() : head_(0U), tail_(0), size_(0), padding_(0) {} Head()143 Slot* Head() const { 144 return reinterpret_cast<Slot*>(head_); 145 } Tail()146 Slot* Tail() const { 147 CHECK(kUseTail); 148 return reinterpret_cast<Slot*>(tail_); 149 } Size()150 size_t Size() const { 151 return size_; 152 } 153 // Removes from the head of the free list. Remove()154 Slot* Remove() { 155 Slot* slot; 156 if (kIsDebugBuild) { 157 Verify(); 158 } 159 Slot** headp = reinterpret_cast<Slot**>(&head_); 160 Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr; 161 Slot* old_head = *headp; 162 if (old_head == nullptr) { 163 // List was empty. 164 if (kUseTail) { 165 DCHECK(*tailp == nullptr); 166 } 167 return nullptr; 168 } else { 169 // List wasn't empty. 170 if (kUseTail) { 171 DCHECK(*tailp != nullptr); 172 } 173 Slot* old_head_next = old_head->Next(); 174 slot = old_head; 175 *headp = old_head_next; 176 if (kUseTail && old_head_next == nullptr) { 177 // List becomes empty. 178 *tailp = nullptr; 179 } 180 } 181 slot->Clear(); 182 --size_; 183 if (kIsDebugBuild) { 184 Verify(); 185 } 186 return slot; 187 } Add(Slot * slot)188 void Add(Slot* slot) { 189 if (kIsDebugBuild) { 190 Verify(); 191 } 192 DCHECK(slot != nullptr); 193 DCHECK(slot->Next() == nullptr); 194 Slot** headp = reinterpret_cast<Slot**>(&head_); 195 Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr; 196 Slot* old_head = *headp; 197 if (old_head == nullptr) { 198 // List was empty. 199 if (kUseTail) { 200 DCHECK(*tailp == nullptr); 201 } 202 *headp = slot; 203 if (kUseTail) { 204 *tailp = slot; 205 } 206 } else { 207 // List wasn't empty. 208 if (kUseTail) { 209 DCHECK(*tailp != nullptr); 210 } 211 *headp = slot; 212 slot->SetNext(old_head); 213 } 214 ++size_; 215 if (kIsDebugBuild) { 216 Verify(); 217 } 218 } 219 // Merge the given list into this list. Empty the given list. 220 // Deliberately support only a kUseTail == true SlotFreeList parameter because 1) we don't 221 // currently have a situation where we need a kUseTail == false SlotFreeList parameter, and 2) 222 // supporting the kUseTail == false parameter would require a O(n) linked list traversal to do 223 // the merge if 'this' SlotFreeList has kUseTail == false, which we'd like to avoid. Merge(SlotFreeList<true> * list)224 void Merge(SlotFreeList<true>* list) { 225 if (kIsDebugBuild) { 226 Verify(); 227 CHECK(list != nullptr); 228 list->Verify(); 229 } 230 if (list->Size() == 0) { 231 return; 232 } 233 Slot** headp = reinterpret_cast<Slot**>(&head_); 234 Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr; 235 Slot* old_head = *headp; 236 if (old_head == nullptr) { 237 // List was empty. 238 *headp = list->Head(); 239 if (kUseTail) { 240 *tailp = list->Tail(); 241 } 242 size_ = list->Size(); 243 } else { 244 // List wasn't empty. 245 DCHECK(list->Head() != nullptr); 246 *headp = list->Head(); 247 DCHECK(list->Tail() != nullptr); 248 list->Tail()->SetNext(old_head); 249 // if kUseTail, no change to tailp. 250 size_ += list->Size(); 251 } 252 list->Reset(); 253 if (kIsDebugBuild) { 254 Verify(); 255 } 256 } 257 Reset()258 void Reset() { 259 head_ = 0; 260 if (kUseTail) { 261 tail_ = 0; 262 } 263 size_ = 0; 264 } 265 Verify()266 void Verify() { 267 Slot* head = reinterpret_cast<Slot*>(head_); 268 Slot* tail = kUseTail ? reinterpret_cast<Slot*>(tail_) : nullptr; 269 if (size_ == 0) { 270 CHECK(head == nullptr); 271 if (kUseTail) { 272 CHECK(tail == nullptr); 273 } 274 } else { 275 CHECK(head != nullptr); 276 if (kUseTail) { 277 CHECK(tail != nullptr); 278 } 279 size_t count = 0; 280 for (Slot* slot = head; slot != nullptr; slot = slot->Next()) { 281 ++count; 282 if (kUseTail && slot->Next() == nullptr) { 283 CHECK_EQ(slot, tail); 284 } 285 } 286 CHECK_EQ(size_, count); 287 } 288 } 289 290 private: 291 // A pointer (Slot*) to the head of the list. Always 8 bytes so that we will have the same 292 // layout between 32 bit and 64 bit, which is not strictly necessary, but we do so for 1) 293 // uniformity, 2) we won't need to change this code if we move to a non-low 4G heap in the 294 // future, and 3) the space savings by using 32 bit fields in 32 bit would be lost in noise 295 // (won't open up enough space to cause an extra slot to be available). 296 uint64_t head_; 297 // A pointer (Slot*) to the tail of the list. Always 8 bytes so that we will have the same 298 // layout between 32 bit and 64 bit. The tail is stored to speed up merging of lists. 299 // Unused if kUseTail is false. 300 uint64_t tail_; 301 // The number of slots in the list. This is used to make it fast to check if a free list is all 302 // free without traversing the whole free list. 303 uint32_t size_; 304 [[maybe_unused]] uint32_t padding_; 305 friend class RosAlloc; 306 }; 307 308 // Represents a run of memory slots of the same size. 309 // 310 // A run's memory layout: 311 // 312 // +-------------------+ 313 // | magic_num | 314 // +-------------------+ 315 // | size_bracket_idx | 316 // +-------------------+ 317 // | is_thread_local | 318 // +-------------------+ 319 // | to_be_bulk_freed | 320 // +-------------------+ 321 // | | 322 // | free list | 323 // | | 324 // +-------------------+ 325 // | | 326 // | bulk free list | 327 // | | 328 // +-------------------+ 329 // | | 330 // | thread-local free | 331 // | list | 332 // | | 333 // +-------------------+ 334 // | padding due to | 335 // | alignment | 336 // +-------------------+ 337 // | slot 0 | 338 // +-------------------+ 339 // | slot 1 | 340 // +-------------------+ 341 // | slot 2 | 342 // +-------------------+ 343 // ... 344 // +-------------------+ 345 // | last slot | 346 // +-------------------+ 347 // 348 class Run { 349 public: 350 uint8_t magic_num_; // The magic number used for debugging. 351 uint8_t size_bracket_idx_; // The index of the size bracket of this run. 352 uint8_t is_thread_local_; // True if this run is used as a thread-local run. 353 bool to_be_bulk_freed_; // Used within BulkFree() to flag a run that's involved with 354 // a bulk free. 355 [[maybe_unused]] uint32_t padding_; 356 // Use a tailless free list for free_list_ so that the alloc fast path does not manage the tail. 357 SlotFreeList<false> free_list_; 358 SlotFreeList<true> bulk_free_list_; 359 SlotFreeList<true> thread_local_free_list_; 360 // Padding due to alignment 361 // Slot 0 362 // Slot 1 363 // ... 364 365 // Returns the byte size of the header. fixed_header_size()366 static size_t fixed_header_size() { 367 return sizeof(Run); 368 } FirstSlot()369 Slot* FirstSlot() const { 370 const uint8_t idx = size_bracket_idx_; 371 return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) + headerSizes[idx]); 372 } LastSlot()373 Slot* LastSlot() { 374 const uint8_t idx = size_bracket_idx_; 375 const size_t bracket_size = bracketSizes[idx]; 376 uintptr_t end = reinterpret_cast<uintptr_t>(End()); 377 Slot* last_slot = reinterpret_cast<Slot*>(end - bracket_size); 378 DCHECK_LE(FirstSlot(), last_slot); 379 return last_slot; 380 } FreeList()381 SlotFreeList<false>* FreeList() { 382 return &free_list_; 383 } BulkFreeList()384 SlotFreeList<true>* BulkFreeList() { 385 return &bulk_free_list_; 386 } ThreadLocalFreeList()387 SlotFreeList<true>* ThreadLocalFreeList() { 388 return &thread_local_free_list_; 389 } End()390 void* End() { 391 return reinterpret_cast<uint8_t*>(this) + gPageSize * numOfPages[size_bracket_idx_]; 392 } SetIsThreadLocal(bool is_thread_local)393 void SetIsThreadLocal(bool is_thread_local) { 394 is_thread_local_ = is_thread_local ? 1 : 0; 395 } IsThreadLocal()396 bool IsThreadLocal() const { 397 return is_thread_local_ != 0; 398 } 399 // Set up the free list for a new/empty run. InitFreeList()400 void InitFreeList() { 401 const uint8_t idx = size_bracket_idx_; 402 const size_t bracket_size = bracketSizes[idx]; 403 Slot* first_slot = FirstSlot(); 404 // Add backwards so the first slot is at the head of the list. 405 for (Slot* slot = LastSlot(); slot >= first_slot; slot = slot->Left(bracket_size)) { 406 free_list_.Add(slot); 407 } 408 } 409 // Merge the thread local free list to the free list. Used when a thread-local run becomes 410 // full. 411 bool MergeThreadLocalFreeListToFreeList(bool* is_all_free_after_out); 412 // Merge the bulk free list to the free list. Used in a bulk free. 413 void MergeBulkFreeListToFreeList(); 414 // Merge the bulk free list to the thread local free list. In a bulk free, as a two-step 415 // process, GC will first record all the slots to free in a run in the bulk free list where it 416 // can write without a lock, and later acquire a lock once per run to merge the bulk free list 417 // to the thread-local free list. 418 void MergeBulkFreeListToThreadLocalFreeList(); 419 // Allocates a slot in a run. 420 ALWAYS_INLINE void* AllocSlot(); 421 // Frees a slot in a run. This is used in a non-bulk free. 422 void FreeSlot(void* ptr); 423 // Add the given slot to the bulk free list. Returns the bracket size. 424 size_t AddToBulkFreeList(void* ptr); 425 // Add the given slot to the thread-local free list. 426 void AddToThreadLocalFreeList(void* ptr); 427 // Returns true if all the slots in the run are not in use. IsAllFree()428 bool IsAllFree() const { 429 return free_list_.Size() == numOfSlots[size_bracket_idx_]; 430 } 431 // Returns the number of free slots. NumberOfFreeSlots()432 size_t NumberOfFreeSlots() { 433 return free_list_.Size(); 434 } 435 // Returns true if all the slots in the run are in use. 436 ALWAYS_INLINE bool IsFull(); 437 // Returns true if the bulk free list is empty. IsBulkFreeListEmpty()438 bool IsBulkFreeListEmpty() const { 439 return bulk_free_list_.Size() == 0; 440 } 441 // Returns true if the thread local free list is empty. IsThreadLocalFreeListEmpty()442 bool IsThreadLocalFreeListEmpty() const { 443 return thread_local_free_list_.Size() == 0; 444 } 445 // Zero the run's data. 446 void ZeroData(); 447 // Zero the run's header and the slot headers. 448 void ZeroHeaderAndSlotHeaders(); 449 // Iterate over all the slots and apply the given function. 450 void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg); 451 // Dump the run metadata for debugging. 452 std::string Dump(); 453 // Verify for debugging. 454 void Verify(Thread* self, RosAlloc* rosalloc, bool running_on_memory_tool) 455 REQUIRES(Locks::mutator_lock_) 456 REQUIRES(Locks::thread_list_lock_); 457 458 private: 459 // The common part of AddToBulkFreeList() and AddToThreadLocalFreeList(). Returns the bracket 460 // size. 461 size_t AddToFreeListShared(void* ptr, SlotFreeList<true>* free_list, const char* caller_name); 462 // Turns a FreeList into a string for debugging. 463 template<bool kUseTail> 464 std::string FreeListToStr(SlotFreeList<kUseTail>* free_list); 465 // Check a given pointer is a valid slot address and return it as Slot*. ToSlot(void * ptr)466 Slot* ToSlot(void* ptr) { 467 const uint8_t idx = size_bracket_idx_; 468 const size_t bracket_size = bracketSizes[idx]; 469 const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr) 470 - reinterpret_cast<uint8_t*>(FirstSlot()); 471 DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0)); 472 size_t slot_idx = offset_from_slot_base / bracket_size; 473 DCHECK_LT(slot_idx, numOfSlots[idx]); 474 return reinterpret_cast<Slot*>(ptr); 475 } SlotIndex(Slot * slot)476 size_t SlotIndex(Slot* slot) const { 477 const uint8_t idx = size_bracket_idx_; 478 const size_t bracket_size = bracketSizes[idx]; 479 const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot) 480 - reinterpret_cast<uint8_t*>(FirstSlot()); 481 DCHECK_EQ(offset_from_slot_base % bracket_size, 0U); 482 size_t slot_idx = offset_from_slot_base / bracket_size; 483 DCHECK_LT(slot_idx, numOfSlots[idx]); 484 return slot_idx; 485 } 486 487 // TODO: DISALLOW_COPY_AND_ASSIGN(Run); 488 }; 489 490 // The magic number for a run. 491 static constexpr uint8_t kMagicNum = 42; 492 // The magic number for free pages. 493 static constexpr uint8_t kMagicNumFree = 43; 494 // The number of size brackets. 495 static constexpr size_t kNumOfSizeBrackets = 42; 496 // The sizes (the slot sizes, in bytes) of the size brackets. 497 static size_t bracketSizes[kNumOfSizeBrackets]; 498 // The numbers of pages that are used for runs for each size bracket. 499 static size_t numOfPages[kNumOfSizeBrackets]; 500 // The numbers of slots of the runs for each size bracket. 501 EXPORT static size_t numOfSlots[kNumOfSizeBrackets]; 502 // The header sizes in bytes of the runs for each size bracket. 503 static size_t headerSizes[kNumOfSizeBrackets]; 504 505 // Initialize the run specs (the above arrays). 506 static void Initialize(); 507 static bool initialized_; 508 509 // Returns the byte size of the bracket size from the index. IndexToBracketSize(size_t idx)510 static size_t IndexToBracketSize(size_t idx) { 511 DCHECK_LT(idx, kNumOfSizeBrackets); 512 return bracketSizes[idx]; 513 } 514 // Returns the index of the size bracket from the bracket size. BracketSizeToIndex(size_t size)515 static size_t BracketSizeToIndex(size_t size) { 516 DCHECK(8 <= size && 517 ((size <= kMaxThreadLocalBracketSize && size % kThreadLocalBracketQuantumSize == 0) || 518 (size <= kMaxRegularBracketSize && size % kBracketQuantumSize == 0) || 519 size == 1 * KB || size == 2 * KB)); 520 size_t idx; 521 if (UNLIKELY(size == 1 * KB)) { 522 idx = kNumOfSizeBrackets - 2; 523 } else if (UNLIKELY(size == 2 * KB)) { 524 idx = kNumOfSizeBrackets - 1; 525 } else if (LIKELY(size <= kMaxThreadLocalBracketSize)) { 526 DCHECK_EQ(size % kThreadLocalBracketQuantumSize, 0U); 527 idx = size / kThreadLocalBracketQuantumSize - 1; 528 } else { 529 DCHECK(size <= kMaxRegularBracketSize); 530 DCHECK_EQ((size - kMaxThreadLocalBracketSize) % kBracketQuantumSize, 0U); 531 idx = ((size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1) 532 + kNumThreadLocalSizeBrackets; 533 } 534 DCHECK(bracketSizes[idx] == size); 535 return idx; 536 } 537 // Returns true if the given allocation size is for a thread local allocation. IsSizeForThreadLocal(size_t size)538 static bool IsSizeForThreadLocal(size_t size) { 539 bool is_size_for_thread_local = size <= kMaxThreadLocalBracketSize; 540 DCHECK(size > kLargeSizeThreshold || 541 (is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets))); 542 return is_size_for_thread_local; 543 } 544 // Rounds up the size up the nearest bracket size. RoundToBracketSize(size_t size)545 static size_t RoundToBracketSize(size_t size) { 546 DCHECK(size <= kLargeSizeThreshold); 547 if (LIKELY(size <= kMaxThreadLocalBracketSize)) { 548 return RoundUp(size, kThreadLocalBracketQuantumSize); 549 } else if (size <= kMaxRegularBracketSize) { 550 return RoundUp(size, kBracketQuantumSize); 551 } else if (UNLIKELY(size <= 1 * KB)) { 552 return 1 * KB; 553 } else { 554 DCHECK_LE(size, 2 * KB); 555 return 2 * KB; 556 } 557 } 558 // Returns the size bracket index from the byte size with rounding. SizeToIndex(size_t size)559 static size_t SizeToIndex(size_t size) { 560 DCHECK(size <= kLargeSizeThreshold); 561 if (LIKELY(size <= kMaxThreadLocalBracketSize)) { 562 return RoundUp(size, kThreadLocalBracketQuantumSize) / kThreadLocalBracketQuantumSize - 1; 563 } else if (size <= kMaxRegularBracketSize) { 564 return (RoundUp(size, kBracketQuantumSize) - kMaxThreadLocalBracketSize) / kBracketQuantumSize 565 - 1 + kNumThreadLocalSizeBrackets; 566 } else if (size <= 1 * KB) { 567 return kNumOfSizeBrackets - 2; 568 } else { 569 DCHECK_LE(size, 2 * KB); 570 return kNumOfSizeBrackets - 1; 571 } 572 } 573 // A combination of SizeToIndex() and RoundToBracketSize(). SizeToIndexAndBracketSize(size_t size,size_t * bracket_size_out)574 static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) { 575 DCHECK(size <= kLargeSizeThreshold); 576 size_t idx; 577 size_t bracket_size; 578 if (LIKELY(size <= kMaxThreadLocalBracketSize)) { 579 bracket_size = RoundUp(size, kThreadLocalBracketQuantumSize); 580 idx = bracket_size / kThreadLocalBracketQuantumSize - 1; 581 } else if (size <= kMaxRegularBracketSize) { 582 bracket_size = RoundUp(size, kBracketQuantumSize); 583 idx = ((bracket_size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1) 584 + kNumThreadLocalSizeBrackets; 585 } else if (size <= 1 * KB) { 586 bracket_size = 1 * KB; 587 idx = kNumOfSizeBrackets - 2; 588 } else { 589 DCHECK(size <= 2 * KB); 590 bracket_size = 2 * KB; 591 idx = kNumOfSizeBrackets - 1; 592 } 593 DCHECK_EQ(idx, SizeToIndex(size)) << idx; 594 DCHECK_EQ(bracket_size, IndexToBracketSize(idx)) << idx; 595 DCHECK_EQ(bracket_size, bracketSizes[idx]) << idx; 596 DCHECK_LE(size, bracket_size) << idx; 597 DCHECK(size > kMaxRegularBracketSize || 598 (size <= kMaxThreadLocalBracketSize && 599 bracket_size - size < kThreadLocalBracketQuantumSize) || 600 (size <= kMaxRegularBracketSize && bracket_size - size < kBracketQuantumSize)) << idx; 601 *bracket_size_out = bracket_size; 602 return idx; 603 } 604 605 // Returns the page map index from an address. Requires that the 606 // address is page size aligned. ToPageMapIndex(const void * addr)607 size_t ToPageMapIndex(const void* addr) const { 608 DCHECK_LE(base_, addr); 609 DCHECK_LT(addr, base_ + capacity_); 610 size_t byte_offset = reinterpret_cast<const uint8_t*>(addr) - base_; 611 DCHECK_EQ(ModuloPageSize(byte_offset), static_cast<size_t>(0)); 612 return DivideByPageSize(byte_offset); 613 } 614 // Returns the page map index from an address with rounding. RoundDownToPageMapIndex(const void * addr)615 size_t RoundDownToPageMapIndex(const void* addr) const { 616 DCHECK(base_ <= addr && addr < reinterpret_cast<uint8_t*>(base_) + capacity_); 617 return DivideByPageSize(reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)); 618 } 619 620 // A memory allocation request larger than this size is treated as a large object and allocated 621 // at a page-granularity. 622 static constexpr size_t kLargeSizeThreshold = 2048; 623 624 // If true, check that the returned memory is actually zero. 625 static constexpr bool kCheckZeroMemory = kIsDebugBuild; 626 // Do not check memory when running under a memory tool. In a normal 627 // build with kCheckZeroMemory the whole test should be optimized away. 628 // TODO: Unprotect before checks. 629 ALWAYS_INLINE bool ShouldCheckZeroMemory(); 630 631 // If true, log verbose details of operations. 632 static constexpr bool kTraceRosAlloc = false; 633 634 struct hash_run { operatorhash_run635 size_t operator()(const RosAlloc::Run* r) const { 636 return reinterpret_cast<size_t>(r); 637 } 638 }; 639 640 struct eq_run { operatoreq_run641 bool operator()(const RosAlloc::Run* r1, const RosAlloc::Run* r2) const { 642 return r1 == r2; 643 } 644 }; 645 646 public: 647 // Different page release modes. 648 enum PageReleaseMode { 649 kPageReleaseModeNone, // Release no empty pages. 650 kPageReleaseModeEnd, // Release empty pages at the end of the space. 651 kPageReleaseModeSize, // Release empty pages that are larger than the threshold. 652 kPageReleaseModeSizeAndEnd, // Release empty pages that are larger than the threshold or 653 // at the end of the space. 654 kPageReleaseModeAll, // Release all empty pages. 655 }; 656 657 // The default value for page_release_size_threshold_. 658 static constexpr size_t kDefaultPageReleaseSizeThreshold = 4 * MB; 659 660 // We use thread-local runs for the size brackets whose indexes 661 // are less than this index. We use shared (current) runs for the rest. 662 // Sync this with the length of Thread::rosalloc_runs_. 663 static constexpr size_t kNumThreadLocalSizeBrackets = 16; 664 static_assert(kNumThreadLocalSizeBrackets == kNumRosAllocThreadLocalSizeBracketsInThread, 665 "Mismatch between kNumThreadLocalSizeBrackets and " 666 "kNumRosAllocThreadLocalSizeBracketsInThread"); 667 668 // The size of the largest bracket we use thread-local runs for. 669 // This should be equal to bracketSizes[kNumThreadLocalSizeBrackets - 1]. 670 static constexpr size_t kMaxThreadLocalBracketSize = 128; 671 672 // We use regular (8 or 16-bytes increment) runs for the size brackets whose indexes are less than 673 // this index. 674 static const size_t kNumRegularSizeBrackets = 40; 675 676 // The size of the largest regular (8 or 16-byte increment) bracket. Non-regular brackets are the 677 // 1 KB and the 2 KB brackets. This should be equal to bracketSizes[kNumRegularSizeBrackets - 1]. 678 static constexpr size_t kMaxRegularBracketSize = 512; 679 680 // The bracket size increment for the thread-local brackets (<= kMaxThreadLocalBracketSize bytes). 681 static constexpr size_t kThreadLocalBracketQuantumSize = 8; 682 683 // Equal to Log2(kThreadLocalBracketQuantumSize). 684 static constexpr size_t kThreadLocalBracketQuantumSizeShift = 3; 685 686 // The bracket size increment for the non-thread-local, regular brackets (of size <= 687 // kMaxRegularBracketSize bytes and > kMaxThreadLocalBracketSize bytes). 688 static constexpr size_t kBracketQuantumSize = 16; 689 690 // Equal to Log2(kBracketQuantumSize). 691 static constexpr size_t kBracketQuantumSizeShift = 4; 692 693 private: 694 // The base address of the memory region that's managed by this allocator. 695 uint8_t* base_; 696 697 // The footprint in bytes of the currently allocated portion of the 698 // memory region. 699 size_t footprint_; 700 701 // The maximum footprint. The address, base_ + capacity_, indicates 702 // the end of the memory region that's currently managed by this allocator. 703 size_t capacity_; 704 705 // The maximum capacity. The address, base_ + max_capacity_, indicates 706 // the end of the memory region that's ever managed by this allocator. 707 size_t max_capacity_; 708 709 template<class Key, AllocatorTag kTag, class Compare = std::less<Key>> 710 using AllocationTrackingSet = std::set<Key, Compare, TrackingAllocator<Key, kTag>>; 711 712 // The run sets that hold the runs whose slots are not all 713 // full. non_full_runs_[i] is guarded by size_bracket_locks_[i]. 714 AllocationTrackingSet<Run*, kAllocatorTagRosAlloc> non_full_runs_[kNumOfSizeBrackets]; 715 // The run sets that hold the runs whose slots are all full. This is 716 // debug only. full_runs_[i] is guarded by size_bracket_locks_[i]. 717 std::unordered_set<Run*, hash_run, eq_run, TrackingAllocator<Run*, kAllocatorTagRosAlloc>> 718 full_runs_[kNumOfSizeBrackets]; 719 // The set of free pages. 720 AllocationTrackingSet<FreePageRun*, kAllocatorTagRosAlloc> free_page_runs_ GUARDED_BY(lock_); 721 // The dedicated full run, it is always full and shared by all threads when revoking happens. 722 // This is an optimization since enables us to avoid a null check for revoked runs. 723 static Run* dedicated_full_run_; 724 // Using size_t to ensure that it is at least word aligned. 725 static size_t dedicated_full_run_storage_[]; 726 // The current runs where the allocations are first attempted for 727 // the size brackes that do not use thread-local 728 // runs. current_runs_[i] is guarded by size_bracket_locks_[i]. 729 Run* current_runs_[kNumOfSizeBrackets]; 730 // The mutexes, one per size bracket. 731 Mutex* size_bracket_locks_[kNumOfSizeBrackets]; 732 // Bracket lock names (since locks only have char* names). 733 std::string size_bracket_lock_names_[kNumOfSizeBrackets]; 734 // The types of page map entries. 735 enum PageMapKind { 736 kPageMapReleased = 0, // Zero and released back to the OS. 737 kPageMapEmpty, // Zero but probably dirty. 738 kPageMapRun, // The beginning of a run. 739 kPageMapRunPart, // The non-beginning part of a run. 740 kPageMapLargeObject, // The beginning of a large object. 741 kPageMapLargeObjectPart, // The non-beginning part of a large object. 742 }; 743 // The table that indicates what pages are currently used for. 744 volatile uint8_t* page_map_; // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree. 745 size_t page_map_size_; 746 size_t max_page_map_size_; 747 MemMap page_map_mem_map_; 748 749 // The table that indicates the size of free page runs. These sizes 750 // are stored here to avoid storing in the free page header and 751 // release backing pages. 752 std::vector<size_t, TrackingAllocator<size_t, kAllocatorTagRosAlloc>> free_page_run_size_map_ 753 GUARDED_BY(lock_); 754 // The global lock. Used to guard the page map, the free page set, 755 // and the footprint. 756 Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; 757 // The reader-writer lock to allow one bulk free at a time while 758 // allowing multiple individual frees at the same time. Also, this 759 // is used to avoid race conditions between BulkFree() and 760 // RevokeThreadLocalRuns() on the bulk free list. 761 ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; 762 763 // The page release mode. 764 const PageReleaseMode page_release_mode_; 765 // Under kPageReleaseModeSize(AndEnd), if the free page run size is 766 // greater than or equal to this value, release pages. 767 const size_t page_release_size_threshold_; 768 769 // Whether this allocator is running on a memory tool. 770 bool is_running_on_memory_tool_; 771 772 // The base address of the memory region that's managed by this allocator. Begin()773 uint8_t* Begin() { return base_; } 774 // The end address of the memory region that's managed by this allocator. End()775 uint8_t* End() { return base_ + capacity_; } 776 777 // Page-granularity alloc/free 778 void* AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type) 779 REQUIRES(lock_); 780 // Returns how many bytes were freed. 781 size_t FreePages(Thread* self, void* ptr, bool already_zero) REQUIRES(lock_); 782 783 // Allocate/free a run slot. 784 EXPORT void* AllocFromRun(Thread* self, 785 size_t size, 786 size_t* bytes_allocated, 787 size_t* usable_size, 788 size_t* bytes_tl_bulk_allocated) REQUIRES(!lock_); 789 // Allocate/free a run slot without acquiring locks. 790 // TODO: REQUIRES(Locks::mutator_lock_) 791 void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated, 792 size_t* usable_size, size_t* bytes_tl_bulk_allocated) 793 REQUIRES(!lock_); 794 void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx) REQUIRES(!lock_); 795 796 // Returns the bracket size. 797 size_t FreeFromRun(Thread* self, void* ptr, Run* run) 798 REQUIRES(!lock_); 799 800 // Used to allocate a new thread local run for a size bracket. 801 Run* AllocRun(Thread* self, size_t idx) REQUIRES(!lock_); 802 803 // Used to acquire a new/reused run for a size bracket. Used when a 804 // thread-local or current run gets full. 805 Run* RefillRun(Thread* self, size_t idx) REQUIRES(!lock_); 806 807 // The internal of non-bulk Free(). 808 size_t FreeInternal(Thread* self, void* ptr) REQUIRES(!lock_); 809 810 // Allocates large objects. 811 EXPORT void* AllocLargeObject(Thread* self, 812 size_t size, 813 size_t* bytes_allocated, 814 size_t* usable_size, 815 size_t* bytes_tl_bulk_allocated) REQUIRES(!lock_); 816 817 // Revoke a run by adding it to non_full_runs_ or freeing the pages. 818 void RevokeRun(Thread* self, size_t idx, Run* run) REQUIRES(!lock_); 819 820 // Revoke the current runs which share an index with the thread local runs. 821 void RevokeThreadUnsafeCurrentRuns() REQUIRES(!lock_); 822 823 // Release a range of pages. 824 size_t ReleasePageRange(uint8_t* start, uint8_t* end) REQUIRES(lock_); 825 826 // Dumps the page map for debugging. 827 std::string DumpPageMap() REQUIRES(lock_); 828 829 public: 830 RosAlloc(void* base, size_t capacity, size_t max_capacity, 831 PageReleaseMode page_release_mode, 832 bool running_on_memory_tool, 833 size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold); 834 ~RosAlloc(); 835 RunFreeListOffset()836 static constexpr size_t RunFreeListOffset() { 837 return OFFSETOF_MEMBER(Run, free_list_); 838 } RunFreeListHeadOffset()839 static constexpr size_t RunFreeListHeadOffset() { 840 return OFFSETOF_MEMBER(SlotFreeList<false>, head_); 841 } RunFreeListSizeOffset()842 static constexpr size_t RunFreeListSizeOffset() { 843 return OFFSETOF_MEMBER(SlotFreeList<false>, size_); 844 } RunSlotNextOffset()845 static constexpr size_t RunSlotNextOffset() { 846 return OFFSETOF_MEMBER(Slot, next_); 847 } 848 849 // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization. 850 // If used, this may cause race conditions if multiple threads are allocating at the same time. 851 template<bool kThreadSafe = true> 852 void* Alloc(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size, 853 size_t* bytes_tl_bulk_allocated) 854 REQUIRES(!lock_); 855 size_t Free(Thread* self, void* ptr) 856 REQUIRES(!bulk_free_lock_, !lock_); 857 size_t BulkFree(Thread* self, void** ptrs, size_t num_ptrs) 858 REQUIRES(!bulk_free_lock_, !lock_); 859 860 // Returns true if the given allocation request can be allocated in 861 // an existing thread local run without allocating a new run. 862 ALWAYS_INLINE bool CanAllocFromThreadLocalRun(Thread* self, size_t size); 863 // Allocate the given allocation request in an existing thread local 864 // run without allocating a new run. 865 ALWAYS_INLINE void* AllocFromThreadLocalRun(Thread* self, size_t size, size_t* bytes_allocated); 866 867 // Returns the maximum bytes that could be allocated for the given 868 // size in bulk, that is the maximum value for the 869 // bytes_allocated_bulk out param returned by RosAlloc::Alloc(). 870 ALWAYS_INLINE size_t MaxBytesBulkAllocatedFor(size_t size); 871 872 // Returns the size of the allocated slot for a given allocated memory chunk. 873 size_t UsableSize(const void* ptr) REQUIRES(!lock_); 874 // Returns the size of the allocated slot for a given size. UsableSize(size_t bytes)875 size_t UsableSize(size_t bytes) { 876 if (UNLIKELY(bytes > kLargeSizeThreshold)) { 877 return RoundUp(bytes, gPageSize); 878 } else { 879 return RoundToBracketSize(bytes); 880 } 881 } 882 // Try to reduce the current footprint by releasing the free page 883 // run at the end of the memory region, if any. 884 bool Trim() REQUIRES(!lock_); 885 // Iterates over all the memory slots and apply the given function. 886 void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), 887 void* arg) 888 REQUIRES(!lock_); 889 890 // Release empty pages. 891 size_t ReleasePages() REQUIRES(!lock_); 892 // Returns the current footprint. 893 size_t Footprint() REQUIRES(!lock_); 894 // Returns the current capacity, maximum footprint. 895 size_t FootprintLimit() REQUIRES(!lock_); 896 // Update the current capacity. 897 void SetFootprintLimit(size_t bytes) REQUIRES(!lock_); 898 899 // Releases the thread-local runs assigned to the given thread back to the common set of runs. 900 // Returns the total bytes of free slots in the revoked thread local runs. This is to be 901 // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting. 902 size_t RevokeThreadLocalRuns(Thread* thread) REQUIRES(!lock_, !bulk_free_lock_); 903 // Releases the thread-local runs assigned to all the threads back to the common set of runs. 904 // Returns the total bytes of free slots in the revoked thread local runs. This is to be 905 // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting. 906 size_t RevokeAllThreadLocalRuns() REQUIRES(!Locks::thread_list_lock_, !lock_, !bulk_free_lock_); 907 // Assert the thread local runs of a thread are revoked. 908 void AssertThreadLocalRunsAreRevoked(Thread* thread) REQUIRES(!bulk_free_lock_); 909 // Assert all the thread local runs are revoked. 910 void AssertAllThreadLocalRunsAreRevoked() REQUIRES(!Locks::thread_list_lock_, !bulk_free_lock_); 911 GetDedicatedFullRun()912 static Run* GetDedicatedFullRun() { 913 return dedicated_full_run_; 914 } IsFreePage(size_t idx)915 bool IsFreePage(size_t idx) const { 916 DCHECK_LT(idx, DivideByPageSize(capacity_)); 917 uint8_t pm_type = page_map_[idx]; 918 return pm_type == kPageMapReleased || pm_type == kPageMapEmpty; 919 } 920 921 // Callbacks for InspectAll that will count the number of bytes 922 // allocated and objects allocated, respectively. 923 static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg); 924 static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg); 925 DoesReleaseAllPages()926 bool DoesReleaseAllPages() const { 927 return page_release_mode_ == kPageReleaseModeAll; 928 } 929 930 // Verify for debugging. 931 void Verify() REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !bulk_free_lock_, 932 !lock_); 933 934 bool LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) 935 REQUIRES(!bulk_free_lock_, !lock_); 936 937 void DumpStats(std::ostream& os) 938 REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_) REQUIRES(!bulk_free_lock_); 939 940 private: 941 friend std::ostream& operator<<(std::ostream& os, RosAlloc::PageMapKind rhs); 942 943 DISALLOW_COPY_AND_ASSIGN(RosAlloc); 944 }; 945 std::ostream& operator<<(std::ostream& os, RosAlloc::PageMapKind rhs); 946 947 // Callback from rosalloc when it needs to increase the footprint. Must be implemented somewhere 948 // else (currently rosalloc_space.cc). 949 void* ArtRosAllocMoreCore(allocator::RosAlloc* rosalloc, intptr_t increment); 950 951 } // namespace allocator 952 } // namespace gc 953 } // namespace art 954 955 #endif // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_ 956