1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_ 18 #define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_ 19 20 #include <stdint.h> 21 #include <stdlib.h> 22 #include <sys/mman.h> 23 #include <memory> 24 #include <set> 25 #include <string> 26 #include <unordered_set> 27 #include <vector> 28 29 #include <android-base/logging.h> 30 31 #include "base/allocator.h" 32 #include "base/bit_utils.h" 33 #include "base/mem_map.h" 34 #include "base/mutex.h" 35 #include "runtime_globals.h" 36 #include "thread.h" 37 38 namespace art { 39 40 namespace gc { 41 namespace allocator { 42 43 // A runs-of-slots memory allocator. 44 class RosAlloc { 45 private: 46 // Represents a run of free pages. 47 class FreePageRun { 48 public: 49 uint8_t magic_num_; // The magic number used for debugging only. 50 IsFree()51 bool IsFree() const { 52 return !kIsDebugBuild || magic_num_ == kMagicNumFree; 53 } ByteSize(RosAlloc * rosalloc)54 size_t ByteSize(RosAlloc* rosalloc) const REQUIRES(rosalloc->lock_) { 55 const uint8_t* fpr_base = reinterpret_cast<const uint8_t*>(this); 56 size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base); 57 size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx]; 58 DCHECK_GE(byte_size, static_cast<size_t>(0)); 59 DCHECK_ALIGNED(byte_size, kPageSize); 60 return byte_size; 61 } SetByteSize(RosAlloc * rosalloc,size_t byte_size)62 void SetByteSize(RosAlloc* rosalloc, size_t byte_size) 63 REQUIRES(rosalloc->lock_) { 64 DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0)); 65 uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this); 66 size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base); 67 rosalloc->free_page_run_size_map_[pm_idx] = byte_size; 68 } Begin()69 void* Begin() { 70 return reinterpret_cast<void*>(this); 71 } End(RosAlloc * rosalloc)72 void* End(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) { 73 uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this); 74 uint8_t* end = fpr_base + ByteSize(rosalloc); 75 return end; 76 } IsLargerThanPageReleaseThreshold(RosAlloc * rosalloc)77 bool IsLargerThanPageReleaseThreshold(RosAlloc* rosalloc) 78 REQUIRES(rosalloc->lock_) { 79 return ByteSize(rosalloc) >= rosalloc->page_release_size_threshold_; 80 } IsAtEndOfSpace(RosAlloc * rosalloc)81 bool IsAtEndOfSpace(RosAlloc* rosalloc) 82 REQUIRES(rosalloc->lock_) { 83 return reinterpret_cast<uint8_t*>(this) + ByteSize(rosalloc) == rosalloc->base_ + rosalloc->footprint_; 84 } ShouldReleasePages(RosAlloc * rosalloc)85 bool ShouldReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) { 86 switch (rosalloc->page_release_mode_) { 87 case kPageReleaseModeNone: 88 return false; 89 case kPageReleaseModeEnd: 90 return IsAtEndOfSpace(rosalloc); 91 case kPageReleaseModeSize: 92 return IsLargerThanPageReleaseThreshold(rosalloc); 93 case kPageReleaseModeSizeAndEnd: 94 return IsLargerThanPageReleaseThreshold(rosalloc) && IsAtEndOfSpace(rosalloc); 95 case kPageReleaseModeAll: 96 return true; 97 default: 98 LOG(FATAL) << "Unexpected page release mode "; 99 return false; 100 } 101 } ReleasePages(RosAlloc * rosalloc)102 void ReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) { 103 uint8_t* start = reinterpret_cast<uint8_t*>(this); 104 size_t byte_size = ByteSize(rosalloc); 105 DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0)); 106 if (ShouldReleasePages(rosalloc)) { 107 rosalloc->ReleasePageRange(start, start + byte_size); 108 } 109 } 110 111 private: 112 DISALLOW_COPY_AND_ASSIGN(FreePageRun); 113 }; 114 115 // The slot header. 116 class Slot { 117 public: Next()118 Slot* Next() const { 119 return next_; 120 } SetNext(Slot * next)121 void SetNext(Slot* next) { 122 next_ = next; 123 } 124 // The slot right before this slot in terms of the address. Left(size_t bracket_size)125 Slot* Left(size_t bracket_size) { 126 return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) - bracket_size); 127 } Clear()128 void Clear() { 129 next_ = nullptr; 130 } 131 132 private: 133 Slot* next_; // Next slot in the list. 134 friend class RosAlloc; 135 }; 136 137 // We use the tail (kUseTail == true) for the bulk or thread-local free lists to avoid the need to 138 // traverse the list from the head to the tail when merging free lists. 139 // We don't use the tail (kUseTail == false) for the free list to avoid the need to manage the 140 // tail in the allocation fast path for a performance reason. 141 template<bool kUseTail = true> 142 class SlotFreeList { 143 public: SlotFreeList()144 SlotFreeList() : head_(0U), tail_(0), size_(0), padding_(0) {} Head()145 Slot* Head() const { 146 return reinterpret_cast<Slot*>(head_); 147 } Tail()148 Slot* Tail() const { 149 CHECK(kUseTail); 150 return reinterpret_cast<Slot*>(tail_); 151 } Size()152 size_t Size() const { 153 return size_; 154 } 155 // Removes from the head of the free list. Remove()156 Slot* Remove() { 157 Slot* slot; 158 if (kIsDebugBuild) { 159 Verify(); 160 } 161 Slot** headp = reinterpret_cast<Slot**>(&head_); 162 Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr; 163 Slot* old_head = *headp; 164 if (old_head == nullptr) { 165 // List was empty. 166 if (kUseTail) { 167 DCHECK(*tailp == nullptr); 168 } 169 return nullptr; 170 } else { 171 // List wasn't empty. 172 if (kUseTail) { 173 DCHECK(*tailp != nullptr); 174 } 175 Slot* old_head_next = old_head->Next(); 176 slot = old_head; 177 *headp = old_head_next; 178 if (kUseTail && old_head_next == nullptr) { 179 // List becomes empty. 180 *tailp = nullptr; 181 } 182 } 183 slot->Clear(); 184 --size_; 185 if (kIsDebugBuild) { 186 Verify(); 187 } 188 return slot; 189 } Add(Slot * slot)190 void Add(Slot* slot) { 191 if (kIsDebugBuild) { 192 Verify(); 193 } 194 DCHECK(slot != nullptr); 195 DCHECK(slot->Next() == nullptr); 196 Slot** headp = reinterpret_cast<Slot**>(&head_); 197 Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr; 198 Slot* old_head = *headp; 199 if (old_head == nullptr) { 200 // List was empty. 201 if (kUseTail) { 202 DCHECK(*tailp == nullptr); 203 } 204 *headp = slot; 205 if (kUseTail) { 206 *tailp = slot; 207 } 208 } else { 209 // List wasn't empty. 210 if (kUseTail) { 211 DCHECK(*tailp != nullptr); 212 } 213 *headp = slot; 214 slot->SetNext(old_head); 215 } 216 ++size_; 217 if (kIsDebugBuild) { 218 Verify(); 219 } 220 } 221 // Merge the given list into this list. Empty the given list. 222 // Deliberately support only a kUseTail == true SlotFreeList parameter because 1) we don't 223 // currently have a situation where we need a kUseTail == false SlotFreeList parameter, and 2) 224 // supporting the kUseTail == false parameter would require a O(n) linked list traversal to do 225 // the merge if 'this' SlotFreeList has kUseTail == false, which we'd like to avoid. Merge(SlotFreeList<true> * list)226 void Merge(SlotFreeList<true>* list) { 227 if (kIsDebugBuild) { 228 Verify(); 229 CHECK(list != nullptr); 230 list->Verify(); 231 } 232 if (list->Size() == 0) { 233 return; 234 } 235 Slot** headp = reinterpret_cast<Slot**>(&head_); 236 Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr; 237 Slot* old_head = *headp; 238 if (old_head == nullptr) { 239 // List was empty. 240 *headp = list->Head(); 241 if (kUseTail) { 242 *tailp = list->Tail(); 243 } 244 size_ = list->Size(); 245 } else { 246 // List wasn't empty. 247 DCHECK(list->Head() != nullptr); 248 *headp = list->Head(); 249 DCHECK(list->Tail() != nullptr); 250 list->Tail()->SetNext(old_head); 251 // if kUseTail, no change to tailp. 252 size_ += list->Size(); 253 } 254 list->Reset(); 255 if (kIsDebugBuild) { 256 Verify(); 257 } 258 } 259 Reset()260 void Reset() { 261 head_ = 0; 262 if (kUseTail) { 263 tail_ = 0; 264 } 265 size_ = 0; 266 } 267 Verify()268 void Verify() { 269 Slot* head = reinterpret_cast<Slot*>(head_); 270 Slot* tail = kUseTail ? reinterpret_cast<Slot*>(tail_) : nullptr; 271 if (size_ == 0) { 272 CHECK(head == nullptr); 273 if (kUseTail) { 274 CHECK(tail == nullptr); 275 } 276 } else { 277 CHECK(head != nullptr); 278 if (kUseTail) { 279 CHECK(tail != nullptr); 280 } 281 size_t count = 0; 282 for (Slot* slot = head; slot != nullptr; slot = slot->Next()) { 283 ++count; 284 if (kUseTail && slot->Next() == nullptr) { 285 CHECK_EQ(slot, tail); 286 } 287 } 288 CHECK_EQ(size_, count); 289 } 290 } 291 292 private: 293 // A pointer (Slot*) to the head of the list. Always 8 bytes so that we will have the same 294 // layout between 32 bit and 64 bit, which is not strictly necessary, but we do so for 1) 295 // uniformity, 2) we won't need to change this code if we move to a non-low 4G heap in the 296 // future, and 3) the space savings by using 32 bit fields in 32 bit would be lost in noise 297 // (won't open up enough space to cause an extra slot to be available). 298 uint64_t head_; 299 // A pointer (Slot*) to the tail of the list. Always 8 bytes so that we will have the same 300 // layout between 32 bit and 64 bit. The tail is stored to speed up merging of lists. 301 // Unused if kUseTail is false. 302 uint64_t tail_; 303 // The number of slots in the list. This is used to make it fast to check if a free list is all 304 // free without traversing the whole free list. 305 uint32_t size_; 306 uint32_t padding_ ATTRIBUTE_UNUSED; 307 friend class RosAlloc; 308 }; 309 310 // Represents a run of memory slots of the same size. 311 // 312 // A run's memory layout: 313 // 314 // +-------------------+ 315 // | magic_num | 316 // +-------------------+ 317 // | size_bracket_idx | 318 // +-------------------+ 319 // | is_thread_local | 320 // +-------------------+ 321 // | to_be_bulk_freed | 322 // +-------------------+ 323 // | | 324 // | free list | 325 // | | 326 // +-------------------+ 327 // | | 328 // | bulk free list | 329 // | | 330 // +-------------------+ 331 // | | 332 // | thread-local free | 333 // | list | 334 // | | 335 // +-------------------+ 336 // | padding due to | 337 // | alignment | 338 // +-------------------+ 339 // | slot 0 | 340 // +-------------------+ 341 // | slot 1 | 342 // +-------------------+ 343 // | slot 2 | 344 // +-------------------+ 345 // ... 346 // +-------------------+ 347 // | last slot | 348 // +-------------------+ 349 // 350 class Run { 351 public: 352 uint8_t magic_num_; // The magic number used for debugging. 353 uint8_t size_bracket_idx_; // The index of the size bracket of this run. 354 uint8_t is_thread_local_; // True if this run is used as a thread-local run. 355 uint8_t to_be_bulk_freed_; // Used within BulkFree() to flag a run that's involved with a bulk free. 356 uint32_t padding_ ATTRIBUTE_UNUSED; 357 // Use a tailless free list for free_list_ so that the alloc fast path does not manage the tail. 358 SlotFreeList<false> free_list_; 359 SlotFreeList<true> bulk_free_list_; 360 SlotFreeList<true> thread_local_free_list_; 361 // Padding due to alignment 362 // Slot 0 363 // Slot 1 364 // ... 365 366 // Returns the byte size of the header. fixed_header_size()367 static size_t fixed_header_size() { 368 return sizeof(Run); 369 } FirstSlot()370 Slot* FirstSlot() const { 371 const uint8_t idx = size_bracket_idx_; 372 return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) + headerSizes[idx]); 373 } LastSlot()374 Slot* LastSlot() { 375 const uint8_t idx = size_bracket_idx_; 376 const size_t bracket_size = bracketSizes[idx]; 377 uintptr_t end = reinterpret_cast<uintptr_t>(End()); 378 Slot* last_slot = reinterpret_cast<Slot*>(end - bracket_size); 379 DCHECK_LE(FirstSlot(), last_slot); 380 return last_slot; 381 } FreeList()382 SlotFreeList<false>* FreeList() { 383 return &free_list_; 384 } BulkFreeList()385 SlotFreeList<true>* BulkFreeList() { 386 return &bulk_free_list_; 387 } ThreadLocalFreeList()388 SlotFreeList<true>* ThreadLocalFreeList() { 389 return &thread_local_free_list_; 390 } End()391 void* End() { 392 return reinterpret_cast<uint8_t*>(this) + kPageSize * numOfPages[size_bracket_idx_]; 393 } SetIsThreadLocal(bool is_thread_local)394 void SetIsThreadLocal(bool is_thread_local) { 395 is_thread_local_ = is_thread_local ? 1 : 0; 396 } IsThreadLocal()397 bool IsThreadLocal() const { 398 return is_thread_local_ != 0; 399 } 400 // Set up the free list for a new/empty run. InitFreeList()401 void InitFreeList() { 402 const uint8_t idx = size_bracket_idx_; 403 const size_t bracket_size = bracketSizes[idx]; 404 Slot* first_slot = FirstSlot(); 405 // Add backwards so the first slot is at the head of the list. 406 for (Slot* slot = LastSlot(); slot >= first_slot; slot = slot->Left(bracket_size)) { 407 free_list_.Add(slot); 408 } 409 } 410 // Merge the thread local free list to the free list. Used when a thread-local run becomes 411 // full. 412 bool MergeThreadLocalFreeListToFreeList(bool* is_all_free_after_out); 413 // Merge the bulk free list to the free list. Used in a bulk free. 414 void MergeBulkFreeListToFreeList(); 415 // Merge the bulk free list to the thread local free list. In a bulk free, as a two-step 416 // process, GC will first record all the slots to free in a run in the bulk free list where it 417 // can write without a lock, and later acquire a lock once per run to merge the bulk free list 418 // to the thread-local free list. 419 void MergeBulkFreeListToThreadLocalFreeList(); 420 // Allocates a slot in a run. 421 ALWAYS_INLINE void* AllocSlot(); 422 // Frees a slot in a run. This is used in a non-bulk free. 423 void FreeSlot(void* ptr); 424 // Add the given slot to the bulk free list. Returns the bracket size. 425 size_t AddToBulkFreeList(void* ptr); 426 // Add the given slot to the thread-local free list. 427 void AddToThreadLocalFreeList(void* ptr); 428 // Returns true if all the slots in the run are not in use. IsAllFree()429 bool IsAllFree() const { 430 return free_list_.Size() == numOfSlots[size_bracket_idx_]; 431 } 432 // Returns the number of free slots. NumberOfFreeSlots()433 size_t NumberOfFreeSlots() { 434 return free_list_.Size(); 435 } 436 // Returns true if all the slots in the run are in use. 437 ALWAYS_INLINE bool IsFull(); 438 // Returns true if the bulk free list is empty. IsBulkFreeListEmpty()439 bool IsBulkFreeListEmpty() const { 440 return bulk_free_list_.Size() == 0; 441 } 442 // Returns true if the thread local free list is empty. IsThreadLocalFreeListEmpty()443 bool IsThreadLocalFreeListEmpty() const { 444 return thread_local_free_list_.Size() == 0; 445 } 446 // Zero the run's data. 447 void ZeroData(); 448 // Zero the run's header and the slot headers. 449 void ZeroHeaderAndSlotHeaders(); 450 // Iterate over all the slots and apply the given function. 451 void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg); 452 // Dump the run metadata for debugging. 453 std::string Dump(); 454 // Verify for debugging. 455 void Verify(Thread* self, RosAlloc* rosalloc, bool running_on_memory_tool) 456 REQUIRES(Locks::mutator_lock_) 457 REQUIRES(Locks::thread_list_lock_); 458 459 private: 460 // The common part of AddToBulkFreeList() and AddToThreadLocalFreeList(). Returns the bracket 461 // size. 462 size_t AddToFreeListShared(void* ptr, SlotFreeList<true>* free_list, const char* caller_name); 463 // Turns a FreeList into a string for debugging. 464 template<bool kUseTail> 465 std::string FreeListToStr(SlotFreeList<kUseTail>* free_list); 466 // Check a given pointer is a valid slot address and return it as Slot*. ToSlot(void * ptr)467 Slot* ToSlot(void* ptr) { 468 const uint8_t idx = size_bracket_idx_; 469 const size_t bracket_size = bracketSizes[idx]; 470 const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr) 471 - reinterpret_cast<uint8_t*>(FirstSlot()); 472 DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0)); 473 size_t slot_idx = offset_from_slot_base / bracket_size; 474 DCHECK_LT(slot_idx, numOfSlots[idx]); 475 return reinterpret_cast<Slot*>(ptr); 476 } SlotIndex(Slot * slot)477 size_t SlotIndex(Slot* slot) const { 478 const uint8_t idx = size_bracket_idx_; 479 const size_t bracket_size = bracketSizes[idx]; 480 const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot) 481 - reinterpret_cast<uint8_t*>(FirstSlot()); 482 DCHECK_EQ(offset_from_slot_base % bracket_size, 0U); 483 size_t slot_idx = offset_from_slot_base / bracket_size; 484 DCHECK_LT(slot_idx, numOfSlots[idx]); 485 return slot_idx; 486 } 487 488 // TODO: DISALLOW_COPY_AND_ASSIGN(Run); 489 }; 490 491 // The magic number for a run. 492 static constexpr uint8_t kMagicNum = 42; 493 // The magic number for free pages. 494 static constexpr uint8_t kMagicNumFree = 43; 495 // The number of size brackets. 496 static constexpr size_t kNumOfSizeBrackets = 42; 497 // The sizes (the slot sizes, in bytes) of the size brackets. 498 static size_t bracketSizes[kNumOfSizeBrackets]; 499 // The numbers of pages that are used for runs for each size bracket. 500 static size_t numOfPages[kNumOfSizeBrackets]; 501 // The numbers of slots of the runs for each size bracket. 502 static size_t numOfSlots[kNumOfSizeBrackets]; 503 // The header sizes in bytes of the runs for each size bracket. 504 static size_t headerSizes[kNumOfSizeBrackets]; 505 506 // Initialize the run specs (the above arrays). 507 static void Initialize(); 508 static bool initialized_; 509 510 // Returns the byte size of the bracket size from the index. IndexToBracketSize(size_t idx)511 static size_t IndexToBracketSize(size_t idx) { 512 DCHECK_LT(idx, kNumOfSizeBrackets); 513 return bracketSizes[idx]; 514 } 515 // Returns the index of the size bracket from the bracket size. BracketSizeToIndex(size_t size)516 static size_t BracketSizeToIndex(size_t size) { 517 DCHECK(8 <= size && 518 ((size <= kMaxThreadLocalBracketSize && size % kThreadLocalBracketQuantumSize == 0) || 519 (size <= kMaxRegularBracketSize && size % kBracketQuantumSize == 0) || 520 size == 1 * KB || size == 2 * KB)); 521 size_t idx; 522 if (UNLIKELY(size == 1 * KB)) { 523 idx = kNumOfSizeBrackets - 2; 524 } else if (UNLIKELY(size == 2 * KB)) { 525 idx = kNumOfSizeBrackets - 1; 526 } else if (LIKELY(size <= kMaxThreadLocalBracketSize)) { 527 DCHECK_EQ(size % kThreadLocalBracketQuantumSize, 0U); 528 idx = size / kThreadLocalBracketQuantumSize - 1; 529 } else { 530 DCHECK(size <= kMaxRegularBracketSize); 531 DCHECK_EQ((size - kMaxThreadLocalBracketSize) % kBracketQuantumSize, 0U); 532 idx = ((size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1) 533 + kNumThreadLocalSizeBrackets; 534 } 535 DCHECK(bracketSizes[idx] == size); 536 return idx; 537 } 538 // Returns true if the given allocation size is for a thread local allocation. IsSizeForThreadLocal(size_t size)539 static bool IsSizeForThreadLocal(size_t size) { 540 bool is_size_for_thread_local = size <= kMaxThreadLocalBracketSize; 541 DCHECK(size > kLargeSizeThreshold || 542 (is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets))); 543 return is_size_for_thread_local; 544 } 545 // Rounds up the size up the nearest bracket size. RoundToBracketSize(size_t size)546 static size_t RoundToBracketSize(size_t size) { 547 DCHECK(size <= kLargeSizeThreshold); 548 if (LIKELY(size <= kMaxThreadLocalBracketSize)) { 549 return RoundUp(size, kThreadLocalBracketQuantumSize); 550 } else if (size <= kMaxRegularBracketSize) { 551 return RoundUp(size, kBracketQuantumSize); 552 } else if (UNLIKELY(size <= 1 * KB)) { 553 return 1 * KB; 554 } else { 555 DCHECK_LE(size, 2 * KB); 556 return 2 * KB; 557 } 558 } 559 // Returns the size bracket index from the byte size with rounding. SizeToIndex(size_t size)560 static size_t SizeToIndex(size_t size) { 561 DCHECK(size <= kLargeSizeThreshold); 562 if (LIKELY(size <= kMaxThreadLocalBracketSize)) { 563 return RoundUp(size, kThreadLocalBracketQuantumSize) / kThreadLocalBracketQuantumSize - 1; 564 } else if (size <= kMaxRegularBracketSize) { 565 return (RoundUp(size, kBracketQuantumSize) - kMaxThreadLocalBracketSize) / kBracketQuantumSize 566 - 1 + kNumThreadLocalSizeBrackets; 567 } else if (size <= 1 * KB) { 568 return kNumOfSizeBrackets - 2; 569 } else { 570 DCHECK_LE(size, 2 * KB); 571 return kNumOfSizeBrackets - 1; 572 } 573 } 574 // A combination of SizeToIndex() and RoundToBracketSize(). SizeToIndexAndBracketSize(size_t size,size_t * bracket_size_out)575 static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) { 576 DCHECK(size <= kLargeSizeThreshold); 577 size_t idx; 578 size_t bracket_size; 579 if (LIKELY(size <= kMaxThreadLocalBracketSize)) { 580 bracket_size = RoundUp(size, kThreadLocalBracketQuantumSize); 581 idx = bracket_size / kThreadLocalBracketQuantumSize - 1; 582 } else if (size <= kMaxRegularBracketSize) { 583 bracket_size = RoundUp(size, kBracketQuantumSize); 584 idx = ((bracket_size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1) 585 + kNumThreadLocalSizeBrackets; 586 } else if (size <= 1 * KB) { 587 bracket_size = 1 * KB; 588 idx = kNumOfSizeBrackets - 2; 589 } else { 590 DCHECK(size <= 2 * KB); 591 bracket_size = 2 * KB; 592 idx = kNumOfSizeBrackets - 1; 593 } 594 DCHECK_EQ(idx, SizeToIndex(size)) << idx; 595 DCHECK_EQ(bracket_size, IndexToBracketSize(idx)) << idx; 596 DCHECK_EQ(bracket_size, bracketSizes[idx]) << idx; 597 DCHECK_LE(size, bracket_size) << idx; 598 DCHECK(size > kMaxRegularBracketSize || 599 (size <= kMaxThreadLocalBracketSize && 600 bracket_size - size < kThreadLocalBracketQuantumSize) || 601 (size <= kMaxRegularBracketSize && bracket_size - size < kBracketQuantumSize)) << idx; 602 *bracket_size_out = bracket_size; 603 return idx; 604 } 605 606 // Returns the page map index from an address. Requires that the 607 // address is page size aligned. ToPageMapIndex(const void * addr)608 size_t ToPageMapIndex(const void* addr) const { 609 DCHECK_LE(base_, addr); 610 DCHECK_LT(addr, base_ + capacity_); 611 size_t byte_offset = reinterpret_cast<const uint8_t*>(addr) - base_; 612 DCHECK_EQ(byte_offset % static_cast<size_t>(kPageSize), static_cast<size_t>(0)); 613 return byte_offset / kPageSize; 614 } 615 // Returns the page map index from an address with rounding. RoundDownToPageMapIndex(const void * addr)616 size_t RoundDownToPageMapIndex(const void* addr) const { 617 DCHECK(base_ <= addr && addr < reinterpret_cast<uint8_t*>(base_) + capacity_); 618 return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize; 619 } 620 621 // A memory allocation request larger than this size is treated as a large object and allocated 622 // at a page-granularity. 623 static const size_t kLargeSizeThreshold = 2048; 624 625 // If true, check that the returned memory is actually zero. 626 static constexpr bool kCheckZeroMemory = kIsDebugBuild; 627 // Do not check memory when running under a memory tool. In a normal 628 // build with kCheckZeroMemory the whole test should be optimized away. 629 // TODO: Unprotect before checks. 630 ALWAYS_INLINE bool ShouldCheckZeroMemory(); 631 632 // If true, log verbose details of operations. 633 static constexpr bool kTraceRosAlloc = false; 634 635 struct hash_run { operatorhash_run636 size_t operator()(const RosAlloc::Run* r) const { 637 return reinterpret_cast<size_t>(r); 638 } 639 }; 640 641 struct eq_run { operatoreq_run642 bool operator()(const RosAlloc::Run* r1, const RosAlloc::Run* r2) const { 643 return r1 == r2; 644 } 645 }; 646 647 public: 648 // Different page release modes. 649 enum PageReleaseMode { 650 kPageReleaseModeNone, // Release no empty pages. 651 kPageReleaseModeEnd, // Release empty pages at the end of the space. 652 kPageReleaseModeSize, // Release empty pages that are larger than the threshold. 653 kPageReleaseModeSizeAndEnd, // Release empty pages that are larger than the threshold or 654 // at the end of the space. 655 kPageReleaseModeAll, // Release all empty pages. 656 }; 657 658 // The default value for page_release_size_threshold_. 659 static constexpr size_t kDefaultPageReleaseSizeThreshold = 4 * MB; 660 661 // We use thread-local runs for the size brackets whose indexes 662 // are less than this index. We use shared (current) runs for the rest. 663 // Sync this with the length of Thread::rosalloc_runs_. 664 static const size_t kNumThreadLocalSizeBrackets = 16; 665 static_assert(kNumThreadLocalSizeBrackets == kNumRosAllocThreadLocalSizeBracketsInThread, 666 "Mismatch between kNumThreadLocalSizeBrackets and " 667 "kNumRosAllocThreadLocalSizeBracketsInThread"); 668 669 // The size of the largest bracket we use thread-local runs for. 670 // This should be equal to bracketSizes[kNumThreadLocalSizeBrackets - 1]. 671 static const size_t kMaxThreadLocalBracketSize = 128; 672 673 // We use regular (8 or 16-bytes increment) runs for the size brackets whose indexes are less than 674 // this index. 675 static const size_t kNumRegularSizeBrackets = 40; 676 677 // The size of the largest regular (8 or 16-byte increment) bracket. Non-regular brackets are the 678 // 1 KB and the 2 KB brackets. This should be equal to bracketSizes[kNumRegularSizeBrackets - 1]. 679 static const size_t kMaxRegularBracketSize = 512; 680 681 // The bracket size increment for the thread-local brackets (<= kMaxThreadLocalBracketSize bytes). 682 static constexpr size_t kThreadLocalBracketQuantumSize = 8; 683 684 // Equal to Log2(kThreadLocalBracketQuantumSize). 685 static constexpr size_t kThreadLocalBracketQuantumSizeShift = 3; 686 687 // The bracket size increment for the non-thread-local, regular brackets (of size <= 688 // kMaxRegularBracketSize bytes and > kMaxThreadLocalBracketSize bytes). 689 static constexpr size_t kBracketQuantumSize = 16; 690 691 // Equal to Log2(kBracketQuantumSize). 692 static constexpr size_t kBracketQuantumSizeShift = 4; 693 694 private: 695 // The base address of the memory region that's managed by this allocator. 696 uint8_t* base_; 697 698 // The footprint in bytes of the currently allocated portion of the 699 // memory region. 700 size_t footprint_; 701 702 // The maximum footprint. The address, base_ + capacity_, indicates 703 // the end of the memory region that's currently managed by this allocator. 704 size_t capacity_; 705 706 // The maximum capacity. The address, base_ + max_capacity_, indicates 707 // the end of the memory region that's ever managed by this allocator. 708 size_t max_capacity_; 709 710 template<class Key, AllocatorTag kTag, class Compare = std::less<Key>> 711 using AllocationTrackingSet = std::set<Key, Compare, TrackingAllocator<Key, kTag>>; 712 713 // The run sets that hold the runs whose slots are not all 714 // full. non_full_runs_[i] is guarded by size_bracket_locks_[i]. 715 AllocationTrackingSet<Run*, kAllocatorTagRosAlloc> non_full_runs_[kNumOfSizeBrackets]; 716 // The run sets that hold the runs whose slots are all full. This is 717 // debug only. full_runs_[i] is guarded by size_bracket_locks_[i]. 718 std::unordered_set<Run*, hash_run, eq_run, TrackingAllocator<Run*, kAllocatorTagRosAlloc>> 719 full_runs_[kNumOfSizeBrackets]; 720 // The set of free pages. 721 AllocationTrackingSet<FreePageRun*, kAllocatorTagRosAlloc> free_page_runs_ GUARDED_BY(lock_); 722 // The dedicated full run, it is always full and shared by all threads when revoking happens. 723 // This is an optimization since enables us to avoid a null check for revoked runs. 724 static Run* dedicated_full_run_; 725 // Using size_t to ensure that it is at least word aligned. 726 static size_t dedicated_full_run_storage_[]; 727 // The current runs where the allocations are first attempted for 728 // the size brackes that do not use thread-local 729 // runs. current_runs_[i] is guarded by size_bracket_locks_[i]. 730 Run* current_runs_[kNumOfSizeBrackets]; 731 // The mutexes, one per size bracket. 732 Mutex* size_bracket_locks_[kNumOfSizeBrackets]; 733 // Bracket lock names (since locks only have char* names). 734 std::string size_bracket_lock_names_[kNumOfSizeBrackets]; 735 // The types of page map entries. 736 enum PageMapKind { 737 kPageMapReleased = 0, // Zero and released back to the OS. 738 kPageMapEmpty, // Zero but probably dirty. 739 kPageMapRun, // The beginning of a run. 740 kPageMapRunPart, // The non-beginning part of a run. 741 kPageMapLargeObject, // The beginning of a large object. 742 kPageMapLargeObjectPart, // The non-beginning part of a large object. 743 }; 744 // The table that indicates what pages are currently used for. 745 volatile uint8_t* page_map_; // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree. 746 size_t page_map_size_; 747 size_t max_page_map_size_; 748 MemMap page_map_mem_map_; 749 750 // The table that indicates the size of free page runs. These sizes 751 // are stored here to avoid storing in the free page header and 752 // release backing pages. 753 std::vector<size_t, TrackingAllocator<size_t, kAllocatorTagRosAlloc>> free_page_run_size_map_ 754 GUARDED_BY(lock_); 755 // The global lock. Used to guard the page map, the free page set, 756 // and the footprint. 757 Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; 758 // The reader-writer lock to allow one bulk free at a time while 759 // allowing multiple individual frees at the same time. Also, this 760 // is used to avoid race conditions between BulkFree() and 761 // RevokeThreadLocalRuns() on the bulk free list. 762 ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; 763 764 // The page release mode. 765 const PageReleaseMode page_release_mode_; 766 // Under kPageReleaseModeSize(AndEnd), if the free page run size is 767 // greater than or equal to this value, release pages. 768 const size_t page_release_size_threshold_; 769 770 // Whether this allocator is running on a memory tool. 771 bool is_running_on_memory_tool_; 772 773 // The base address of the memory region that's managed by this allocator. Begin()774 uint8_t* Begin() { return base_; } 775 // The end address of the memory region that's managed by this allocator. End()776 uint8_t* End() { return base_ + capacity_; } 777 778 // Page-granularity alloc/free 779 void* AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type) 780 REQUIRES(lock_); 781 // Returns how many bytes were freed. 782 size_t FreePages(Thread* self, void* ptr, bool already_zero) REQUIRES(lock_); 783 784 // Allocate/free a run slot. 785 void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size, 786 size_t* bytes_tl_bulk_allocated) 787 REQUIRES(!lock_); 788 // Allocate/free a run slot without acquiring locks. 789 // TODO: REQUIRES(Locks::mutator_lock_) 790 void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated, 791 size_t* usable_size, size_t* bytes_tl_bulk_allocated) 792 REQUIRES(!lock_); 793 void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx) REQUIRES(!lock_); 794 795 // Returns the bracket size. 796 size_t FreeFromRun(Thread* self, void* ptr, Run* run) 797 REQUIRES(!lock_); 798 799 // Used to allocate a new thread local run for a size bracket. 800 Run* AllocRun(Thread* self, size_t idx) REQUIRES(!lock_); 801 802 // Used to acquire a new/reused run for a size bracket. Used when a 803 // thread-local or current run gets full. 804 Run* RefillRun(Thread* self, size_t idx) REQUIRES(!lock_); 805 806 // The internal of non-bulk Free(). 807 size_t FreeInternal(Thread* self, void* ptr) REQUIRES(!lock_); 808 809 // Allocates large objects. 810 void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated, 811 size_t* usable_size, size_t* bytes_tl_bulk_allocated) 812 REQUIRES(!lock_); 813 814 // Revoke a run by adding it to non_full_runs_ or freeing the pages. 815 void RevokeRun(Thread* self, size_t idx, Run* run) REQUIRES(!lock_); 816 817 // Revoke the current runs which share an index with the thread local runs. 818 void RevokeThreadUnsafeCurrentRuns() REQUIRES(!lock_); 819 820 // Release a range of pages. 821 size_t ReleasePageRange(uint8_t* start, uint8_t* end) REQUIRES(lock_); 822 823 // Dumps the page map for debugging. 824 std::string DumpPageMap() REQUIRES(lock_); 825 826 public: 827 RosAlloc(void* base, size_t capacity, size_t max_capacity, 828 PageReleaseMode page_release_mode, 829 bool running_on_memory_tool, 830 size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold); 831 ~RosAlloc(); 832 RunFreeListOffset()833 static constexpr size_t RunFreeListOffset() { 834 return OFFSETOF_MEMBER(Run, free_list_); 835 } RunFreeListHeadOffset()836 static constexpr size_t RunFreeListHeadOffset() { 837 return OFFSETOF_MEMBER(SlotFreeList<false>, head_); 838 } RunFreeListSizeOffset()839 static constexpr size_t RunFreeListSizeOffset() { 840 return OFFSETOF_MEMBER(SlotFreeList<false>, size_); 841 } RunSlotNextOffset()842 static constexpr size_t RunSlotNextOffset() { 843 return OFFSETOF_MEMBER(Slot, next_); 844 } 845 846 // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization. 847 // If used, this may cause race conditions if multiple threads are allocating at the same time. 848 template<bool kThreadSafe = true> 849 void* Alloc(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size, 850 size_t* bytes_tl_bulk_allocated) 851 REQUIRES(!lock_); 852 size_t Free(Thread* self, void* ptr) 853 REQUIRES(!bulk_free_lock_, !lock_); 854 size_t BulkFree(Thread* self, void** ptrs, size_t num_ptrs) 855 REQUIRES(!bulk_free_lock_, !lock_); 856 857 // Returns true if the given allocation request can be allocated in 858 // an existing thread local run without allocating a new run. 859 ALWAYS_INLINE bool CanAllocFromThreadLocalRun(Thread* self, size_t size); 860 // Allocate the given allocation request in an existing thread local 861 // run without allocating a new run. 862 ALWAYS_INLINE void* AllocFromThreadLocalRun(Thread* self, size_t size, size_t* bytes_allocated); 863 864 // Returns the maximum bytes that could be allocated for the given 865 // size in bulk, that is the maximum value for the 866 // bytes_allocated_bulk out param returned by RosAlloc::Alloc(). 867 ALWAYS_INLINE size_t MaxBytesBulkAllocatedFor(size_t size); 868 869 // Returns the size of the allocated slot for a given allocated memory chunk. 870 size_t UsableSize(const void* ptr) REQUIRES(!lock_); 871 // Returns the size of the allocated slot for a given size. UsableSize(size_t bytes)872 size_t UsableSize(size_t bytes) { 873 if (UNLIKELY(bytes > kLargeSizeThreshold)) { 874 return RoundUp(bytes, kPageSize); 875 } else { 876 return RoundToBracketSize(bytes); 877 } 878 } 879 // Try to reduce the current footprint by releasing the free page 880 // run at the end of the memory region, if any. 881 bool Trim() REQUIRES(!lock_); 882 // Iterates over all the memory slots and apply the given function. 883 void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), 884 void* arg) 885 REQUIRES(!lock_); 886 887 // Release empty pages. 888 size_t ReleasePages() REQUIRES(!lock_); 889 // Returns the current footprint. 890 size_t Footprint() REQUIRES(!lock_); 891 // Returns the current capacity, maximum footprint. 892 size_t FootprintLimit() REQUIRES(!lock_); 893 // Update the current capacity. 894 void SetFootprintLimit(size_t bytes) REQUIRES(!lock_); 895 896 // Releases the thread-local runs assigned to the given thread back to the common set of runs. 897 // Returns the total bytes of free slots in the revoked thread local runs. This is to be 898 // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting. 899 size_t RevokeThreadLocalRuns(Thread* thread) REQUIRES(!lock_, !bulk_free_lock_); 900 // Releases the thread-local runs assigned to all the threads back to the common set of runs. 901 // Returns the total bytes of free slots in the revoked thread local runs. This is to be 902 // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting. 903 size_t RevokeAllThreadLocalRuns() REQUIRES(!Locks::thread_list_lock_, !lock_, !bulk_free_lock_); 904 // Assert the thread local runs of a thread are revoked. 905 void AssertThreadLocalRunsAreRevoked(Thread* thread) REQUIRES(!bulk_free_lock_); 906 // Assert all the thread local runs are revoked. 907 void AssertAllThreadLocalRunsAreRevoked() REQUIRES(!Locks::thread_list_lock_, !bulk_free_lock_); 908 GetDedicatedFullRun()909 static Run* GetDedicatedFullRun() { 910 return dedicated_full_run_; 911 } IsFreePage(size_t idx)912 bool IsFreePage(size_t idx) const { 913 DCHECK_LT(idx, capacity_ / kPageSize); 914 uint8_t pm_type = page_map_[idx]; 915 return pm_type == kPageMapReleased || pm_type == kPageMapEmpty; 916 } 917 918 // Callbacks for InspectAll that will count the number of bytes 919 // allocated and objects allocated, respectively. 920 static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg); 921 static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg); 922 DoesReleaseAllPages()923 bool DoesReleaseAllPages() const { 924 return page_release_mode_ == kPageReleaseModeAll; 925 } 926 927 // Verify for debugging. 928 void Verify() REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !bulk_free_lock_, 929 !lock_); 930 931 void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) 932 REQUIRES(!bulk_free_lock_, !lock_); 933 934 void DumpStats(std::ostream& os) 935 REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_) REQUIRES(!bulk_free_lock_); 936 937 private: 938 friend std::ostream& operator<<(std::ostream& os, const RosAlloc::PageMapKind& rhs); 939 940 DISALLOW_COPY_AND_ASSIGN(RosAlloc); 941 }; 942 std::ostream& operator<<(std::ostream& os, const RosAlloc::PageMapKind& rhs); 943 944 // Callback from rosalloc when it needs to increase the footprint. Must be implemented somewhere 945 // else (currently rosalloc_space.cc). 946 void* ArtRosAllocMoreCore(allocator::RosAlloc* rosalloc, intptr_t increment); 947 948 } // namespace allocator 949 } // namespace gc 950 } // namespace art 951 952 #endif // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_ 953