1 /* 2 * Copyright 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_ 18 #define ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_ 19 20 #include <signal.h> 21 22 #include <map> 23 #include <memory> 24 #include <unordered_map> 25 #include <unordered_set> 26 27 #include "barrier.h" 28 #include "base/atomic.h" 29 #include "base/gc_visited_arena_pool.h" 30 #include "base/macros.h" 31 #include "base/mutex.h" 32 #include "garbage_collector.h" 33 #include "gc/accounting/atomic_stack.h" 34 #include "gc/accounting/bitmap-inl.h" 35 #include "gc/accounting/heap_bitmap.h" 36 #include "gc_root.h" 37 #include "immune_spaces.h" 38 #include "offsets.h" 39 40 namespace art { 41 42 bool KernelSupportsUffd(); 43 44 namespace mirror { 45 class DexCache; 46 } // namespace mirror 47 48 namespace gc { 49 50 class Heap; 51 52 namespace space { 53 class BumpPointerSpace; 54 } // namespace space 55 56 namespace collector { 57 class MarkCompact final : public GarbageCollector { 58 public: 59 using SigbusCounterType = uint32_t; 60 61 static constexpr size_t kAlignment = kObjectAlignment; 62 static constexpr int kCopyMode = -1; 63 static constexpr int kMinorFaultMode = -2; 64 // Fake file descriptor for fall back mode (when uffd isn't available) 65 static constexpr int kFallbackMode = -3; 66 static constexpr int kFdSharedAnon = -1; 67 static constexpr int kFdUnused = -2; 68 69 // Bitmask for the compaction-done bit in the sigbus_in_progress_count_. 70 static constexpr SigbusCounterType kSigbusCounterCompactionDoneMask = 71 1u << (BitSizeOf<SigbusCounterType>() - 1); 72 73 explicit MarkCompact(Heap* heap); 74 ~MarkCompact()75 ~MarkCompact() {} 76 77 void RunPhases() override REQUIRES(!Locks::mutator_lock_, !lock_); 78 79 // Updated before (or in) pre-compaction pause and is accessed only in the 80 // pause or during concurrent compaction. The flag is reset in next GC cycle's 81 // InitializePhase(). Therefore, it's safe to update without any memory ordering. IsCompacting()82 bool IsCompacting() const { return compacting_; } 83 IsUsingSigbusFeature()84 bool IsUsingSigbusFeature() const { return use_uffd_sigbus_; } 85 86 // Called by SIGBUS handler. NO_THREAD_SAFETY_ANALYSIS for mutator-lock, which 87 // is asserted in the function. 88 bool SigbusHandler(siginfo_t* info) REQUIRES(!lock_) NO_THREAD_SAFETY_ANALYSIS; 89 GetGcType()90 GcType GetGcType() const override { 91 return kGcTypeFull; 92 } 93 GetCollectorType()94 CollectorType GetCollectorType() const override { 95 return kCollectorTypeCMC; 96 } 97 GetBarrier()98 Barrier& GetBarrier() { 99 return gc_barrier_; 100 } 101 102 mirror::Object* MarkObject(mirror::Object* obj) override 103 REQUIRES_SHARED(Locks::mutator_lock_) 104 REQUIRES(Locks::heap_bitmap_lock_); 105 106 void MarkHeapReference(mirror::HeapReference<mirror::Object>* obj, 107 bool do_atomic_update) override 108 REQUIRES_SHARED(Locks::mutator_lock_) 109 REQUIRES(Locks::heap_bitmap_lock_); 110 111 void VisitRoots(mirror::Object*** roots, 112 size_t count, 113 const RootInfo& info) override 114 REQUIRES_SHARED(Locks::mutator_lock_) 115 REQUIRES(Locks::heap_bitmap_lock_); 116 void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, 117 size_t count, 118 const RootInfo& info) override 119 REQUIRES_SHARED(Locks::mutator_lock_) 120 REQUIRES(Locks::heap_bitmap_lock_); 121 122 bool IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj, 123 bool do_atomic_update) override 124 REQUIRES_SHARED(Locks::mutator_lock_) 125 REQUIRES(Locks::heap_bitmap_lock_); 126 127 void RevokeAllThreadLocalBuffers() override; 128 129 void DelayReferenceReferent(ObjPtr<mirror::Class> klass, 130 ObjPtr<mirror::Reference> reference) override 131 REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_); 132 133 mirror::Object* IsMarked(mirror::Object* obj) override 134 REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_); 135 GetFromSpaceAddrFromBarrier(mirror::Object * old_ref)136 mirror::Object* GetFromSpaceAddrFromBarrier(mirror::Object* old_ref) { 137 CHECK(compacting_); 138 if (live_words_bitmap_->HasAddress(old_ref)) { 139 return GetFromSpaceAddr(old_ref); 140 } 141 return old_ref; 142 } 143 // Called from Heap::PostForkChildAction() for non-zygote processes and from 144 // PrepareForCompaction() for zygote processes. Returns true if uffd was 145 // created or was already done. 146 bool CreateUserfaultfd(bool post_fork); 147 148 // Returns a pair indicating if userfaultfd itself is available (first) and if 149 // so then whether its minor-fault feature is available or not (second). 150 static std::pair<bool, bool> GetUffdAndMinorFault(); 151 152 // Add linear-alloc space data when a new space is added to 153 // GcVisitedArenaPool, which mostly happens only once. 154 void AddLinearAllocSpaceData(uint8_t* begin, size_t len); 155 156 // In copy-mode of userfaultfd, we don't need to reach a 'processed' state as 157 // it's given that processing thread also copies the page, thereby mapping it. 158 // The order is important as we may treat them as integers. 159 enum class PageState : uint8_t { 160 kUnprocessed = 0, // Not processed yet 161 kProcessing = 1, // Being processed by GC thread and will not be mapped 162 kProcessed = 2, // Processed but not mapped 163 kProcessingAndMapping = 3, // Being processed by GC or mutator and will be mapped 164 kMutatorProcessing = 4, // Being processed by mutator thread 165 kProcessedAndMapping = 5, // Processed and will be mapped 166 kProcessedAndMapped = 6 // Processed and mapped. For SIGBUS. 167 }; 168 169 private: 170 using ObjReference = mirror::CompressedReference<mirror::Object>; 171 // Number of bits (live-words) covered by a single chunk-info (below) 172 // entry/word. 173 // TODO: Since popcount is performed usomg SIMD instructions, we should 174 // consider using 128-bit in order to halve the chunk-info size. 175 static constexpr uint32_t kBitsPerVectorWord = kBitsPerIntPtrT; 176 static constexpr uint32_t kOffsetChunkSize = kBitsPerVectorWord * kAlignment; 177 static_assert(kOffsetChunkSize < kPageSize); 178 // Bitmap with bits corresponding to every live word set. For an object 179 // which is 4 words in size will have the corresponding 4 bits set. This is 180 // required for efficient computation of new-address (post-compaction) from 181 // the given old-address (pre-compaction). 182 template <size_t kAlignment> 183 class LiveWordsBitmap : private accounting::MemoryRangeBitmap<kAlignment> { 184 using Bitmap = accounting::Bitmap; 185 using MemRangeBitmap = accounting::MemoryRangeBitmap<kAlignment>; 186 187 public: 188 static_assert(IsPowerOfTwo(kBitsPerVectorWord)); 189 static_assert(IsPowerOfTwo(Bitmap::kBitsPerBitmapWord)); 190 static_assert(kBitsPerVectorWord >= Bitmap::kBitsPerBitmapWord); 191 static constexpr uint32_t kBitmapWordsPerVectorWord = 192 kBitsPerVectorWord / Bitmap::kBitsPerBitmapWord; 193 static_assert(IsPowerOfTwo(kBitmapWordsPerVectorWord)); 194 static LiveWordsBitmap* Create(uintptr_t begin, uintptr_t end); 195 196 // Return offset (within the indexed chunk-info) of the nth live word. 197 uint32_t FindNthLiveWordOffset(size_t chunk_idx, uint32_t n) const; 198 // Sets all bits in the bitmap corresponding to the given range. Also 199 // returns the bit-index of the first word. 200 ALWAYS_INLINE uintptr_t SetLiveWords(uintptr_t begin, size_t size); 201 // Count number of live words upto the given bit-index. This is to be used 202 // to compute the post-compact address of an old reference. 203 ALWAYS_INLINE size_t CountLiveWordsUpto(size_t bit_idx) const; 204 // Call 'visitor' for every stride of contiguous marked bits in the live-words 205 // bitmap, starting from begin_bit_idx. Only visit 'bytes' live bytes or 206 // until 'end', whichever comes first. 207 // Visitor is called with index of the first marked bit in the stride, 208 // stride size and whether it's the last stride in the given range or not. 209 template <typename Visitor> 210 ALWAYS_INLINE void VisitLiveStrides(uintptr_t begin_bit_idx, 211 uint8_t* end, 212 const size_t bytes, 213 Visitor&& visitor) const 214 REQUIRES_SHARED(Locks::mutator_lock_); 215 // Count the number of live bytes in the given vector entry. 216 size_t LiveBytesInBitmapWord(size_t chunk_idx) const; ClearBitmap()217 void ClearBitmap() { Bitmap::Clear(); } Begin()218 ALWAYS_INLINE uintptr_t Begin() const { return MemRangeBitmap::CoverBegin(); } HasAddress(mirror::Object * obj)219 ALWAYS_INLINE bool HasAddress(mirror::Object* obj) const { 220 return MemRangeBitmap::HasAddress(reinterpret_cast<uintptr_t>(obj)); 221 } Test(uintptr_t bit_index)222 ALWAYS_INLINE bool Test(uintptr_t bit_index) const { 223 return Bitmap::TestBit(bit_index); 224 } Test(mirror::Object * obj)225 ALWAYS_INLINE bool Test(mirror::Object* obj) const { 226 return MemRangeBitmap::Test(reinterpret_cast<uintptr_t>(obj)); 227 } GetWord(size_t index)228 ALWAYS_INLINE uintptr_t GetWord(size_t index) const { 229 static_assert(kBitmapWordsPerVectorWord == 1); 230 return Bitmap::Begin()[index * kBitmapWordsPerVectorWord]; 231 } 232 }; 233 234 // For a given object address in pre-compact space, return the corresponding 235 // address in the from-space, where heap pages are relocated in the compaction 236 // pause. GetFromSpaceAddr(mirror::Object * obj)237 mirror::Object* GetFromSpaceAddr(mirror::Object* obj) const { 238 DCHECK(live_words_bitmap_->HasAddress(obj)) << " obj=" << obj; 239 return reinterpret_cast<mirror::Object*>(reinterpret_cast<uintptr_t>(obj) 240 + from_space_slide_diff_); 241 } 242 243 // Verifies that that given object reference refers to a valid object. 244 // Otherwise fataly dumps logs, including those from callback. 245 template <typename Callback> 246 void VerifyObject(mirror::Object* ref, Callback& callback) const 247 REQUIRES_SHARED(Locks::mutator_lock_); 248 // Check if the obj is within heap and has a klass which is likely to be valid 249 // mirror::Class. 250 bool IsValidObject(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_); 251 void InitializePhase(); 252 void FinishPhase() REQUIRES(!Locks::mutator_lock_, !Locks::heap_bitmap_lock_, !lock_); 253 void MarkingPhase() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_); 254 void CompactionPhase() REQUIRES_SHARED(Locks::mutator_lock_); 255 256 void SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused) 257 REQUIRES_SHARED(Locks::mutator_lock_) 258 REQUIRES(!Locks::heap_bitmap_lock_); 259 // Update the reference at given offset in the given object with post-compact 260 // address. 261 ALWAYS_INLINE void UpdateRef(mirror::Object* obj, MemberOffset offset) 262 REQUIRES_SHARED(Locks::mutator_lock_); 263 264 // Verify that the gc-root is updated only once. Returns false if the update 265 // shouldn't be done. 266 ALWAYS_INLINE bool VerifyRootSingleUpdate(void* root, 267 mirror::Object* old_ref, 268 const RootInfo& info) 269 REQUIRES_SHARED(Locks::mutator_lock_); 270 // Update the given root with post-compact address. 271 ALWAYS_INLINE void UpdateRoot(mirror::CompressedReference<mirror::Object>* root, 272 const RootInfo& info = RootInfo(RootType::kRootUnknown)) 273 REQUIRES_SHARED(Locks::mutator_lock_); 274 ALWAYS_INLINE void UpdateRoot(mirror::Object** root, 275 const RootInfo& info = RootInfo(RootType::kRootUnknown)) 276 REQUIRES_SHARED(Locks::mutator_lock_); 277 // Given the pre-compact address, the function returns the post-compact 278 // address of the given object. 279 ALWAYS_INLINE mirror::Object* PostCompactAddress(mirror::Object* old_ref) const 280 REQUIRES_SHARED(Locks::mutator_lock_); 281 // Compute post-compact address of an object in moving space. This function 282 // assumes that old_ref is in moving space. 283 ALWAYS_INLINE mirror::Object* PostCompactAddressUnchecked(mirror::Object* old_ref) const 284 REQUIRES_SHARED(Locks::mutator_lock_); 285 // Compute the new address for an object which was allocated prior to starting 286 // this GC cycle. 287 ALWAYS_INLINE mirror::Object* PostCompactOldObjAddr(mirror::Object* old_ref) const 288 REQUIRES_SHARED(Locks::mutator_lock_); 289 // Compute the new address for an object which was black allocated during this 290 // GC cycle. 291 ALWAYS_INLINE mirror::Object* PostCompactBlackObjAddr(mirror::Object* old_ref) const 292 REQUIRES_SHARED(Locks::mutator_lock_); 293 // Identify immune spaces and reset card-table, mod-union-table, and mark 294 // bitmaps. 295 void BindAndResetBitmaps() REQUIRES_SHARED(Locks::mutator_lock_) 296 REQUIRES(Locks::heap_bitmap_lock_); 297 298 // Perform one last round of marking, identifying roots from dirty cards 299 // during a stop-the-world (STW) pause. 300 void MarkingPause() REQUIRES(Locks::mutator_lock_, !Locks::heap_bitmap_lock_); 301 // Perform stop-the-world pause prior to concurrent compaction. 302 // Updates GC-roots and protects heap so that during the concurrent 303 // compaction phase we can receive faults and compact the corresponding pages 304 // on the fly. 305 void CompactionPause() REQUIRES(Locks::mutator_lock_); 306 // Compute offsets (in chunk_info_vec_) and other data structures required 307 // during concurrent compaction. 308 void PrepareForCompaction() REQUIRES_SHARED(Locks::mutator_lock_); 309 310 // Copy kPageSize live bytes starting from 'offset' (within the moving space), 311 // which must be within 'obj', into the kPageSize sized memory pointed by 'addr'. 312 // Then update the references within the copied objects. The boundary objects are 313 // partially updated such that only the references that lie in the page are updated. 314 // This is necessary to avoid cascading userfaults. 315 void CompactPage(mirror::Object* obj, uint32_t offset, uint8_t* addr, bool needs_memset_zero) 316 REQUIRES_SHARED(Locks::mutator_lock_); 317 // Compact the bump-pointer space. Pass page that should be used as buffer for 318 // userfaultfd. 319 template <int kMode> 320 void CompactMovingSpace(uint8_t* page) REQUIRES_SHARED(Locks::mutator_lock_); 321 322 // Compact the given page as per func and change its state. Also map/copy the 323 // page, if required. 324 template <int kMode, typename CompactionFn> 325 ALWAYS_INLINE void DoPageCompactionWithStateChange(size_t page_idx, 326 size_t status_arr_len, 327 uint8_t* to_space_page, 328 uint8_t* page, 329 CompactionFn func) 330 REQUIRES_SHARED(Locks::mutator_lock_); 331 332 // Update all the objects in the given non-moving space page. 'first' object 333 // could have started in some preceding page. 334 void UpdateNonMovingPage(mirror::Object* first, uint8_t* page) 335 REQUIRES_SHARED(Locks::mutator_lock_); 336 // Update all the references in the non-moving space. 337 void UpdateNonMovingSpace() REQUIRES_SHARED(Locks::mutator_lock_); 338 339 // For all the pages in non-moving space, find the first object that overlaps 340 // with the pages' start address, and store in first_objs_non_moving_space_ array. 341 void InitNonMovingSpaceFirstObjects() REQUIRES_SHARED(Locks::mutator_lock_); 342 // In addition to the first-objects for every post-compact moving space page, 343 // also find offsets within those objects from where the contents should be 344 // copied to the page. The offsets are relative to the moving-space's 345 // beginning. Store the computed first-object and offset in first_objs_moving_space_ 346 // and pre_compact_offset_moving_space_ respectively. 347 void InitMovingSpaceFirstObjects(const size_t vec_len) REQUIRES_SHARED(Locks::mutator_lock_); 348 349 // Gather the info related to black allocations from bump-pointer space to 350 // enable concurrent sliding of these pages. 351 void UpdateMovingSpaceBlackAllocations() REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_); 352 // Update first-object info from allocation-stack for non-moving space black 353 // allocations. 354 void UpdateNonMovingSpaceBlackAllocations() REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_); 355 356 // Slides (retain the empty holes, which are usually part of some in-use TLAB) 357 // black page in the moving space. 'first_obj' is the object that overlaps with 358 // the first byte of the page being slid. pre_compact_page is the pre-compact 359 // address of the page being slid. 'page_idx' is used to fetch the first 360 // allocated chunk's size and next page's first_obj. 'dest' is the kPageSize 361 // sized memory where the contents would be copied. 362 void SlideBlackPage(mirror::Object* first_obj, 363 const size_t page_idx, 364 uint8_t* const pre_compact_page, 365 uint8_t* dest, 366 bool needs_memset_zero) REQUIRES_SHARED(Locks::mutator_lock_); 367 368 // Perform reference-processing and the likes before sweeping the non-movable 369 // spaces. 370 void ReclaimPhase() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_); 371 372 // Mark GC-roots (except from immune spaces and thread-stacks) during a STW pause. 373 void ReMarkRoots(Runtime* runtime) REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_); 374 // Concurrently mark GC-roots, except from immune spaces. 375 void MarkRoots(VisitRootFlags flags) REQUIRES_SHARED(Locks::mutator_lock_) 376 REQUIRES(Locks::heap_bitmap_lock_); 377 // Collect thread stack roots via a checkpoint. 378 void MarkRootsCheckpoint(Thread* self, Runtime* runtime) REQUIRES_SHARED(Locks::mutator_lock_) 379 REQUIRES(Locks::heap_bitmap_lock_); 380 // Second round of concurrent marking. Mark all gray objects that got dirtied 381 // since the first round. 382 void PreCleanCards() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_); 383 384 void MarkNonThreadRoots(Runtime* runtime) REQUIRES_SHARED(Locks::mutator_lock_) 385 REQUIRES(Locks::heap_bitmap_lock_); 386 void MarkConcurrentRoots(VisitRootFlags flags, Runtime* runtime) 387 REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_); 388 389 // Traverse through the reachable objects and mark them. 390 void MarkReachableObjects() REQUIRES_SHARED(Locks::mutator_lock_) 391 REQUIRES(Locks::heap_bitmap_lock_); 392 // Scan (only) immune spaces looking for references into the garbage collected 393 // spaces. 394 void UpdateAndMarkModUnion() REQUIRES_SHARED(Locks::mutator_lock_) 395 REQUIRES(Locks::heap_bitmap_lock_); 396 // Scan mod-union and card tables, covering all the spaces, to identify dirty objects. 397 // These are in 'minimum age' cards, which is 'kCardAged' in case of concurrent (second round) 398 // marking and kCardDirty during the STW pause. 399 void ScanDirtyObjects(bool paused, uint8_t minimum_age) REQUIRES_SHARED(Locks::mutator_lock_) 400 REQUIRES(Locks::heap_bitmap_lock_); 401 // Recursively mark dirty objects. Invoked both concurrently as well in a STW 402 // pause in PausePhase(). 403 void RecursiveMarkDirtyObjects(bool paused, uint8_t minimum_age) 404 REQUIRES_SHARED(Locks::mutator_lock_) 405 REQUIRES(Locks::heap_bitmap_lock_); 406 // Go through all the objects in the mark-stack until it's empty. 407 void ProcessMarkStack() override REQUIRES_SHARED(Locks::mutator_lock_) 408 REQUIRES(Locks::heap_bitmap_lock_); 409 void ExpandMarkStack() REQUIRES_SHARED(Locks::mutator_lock_) 410 REQUIRES(Locks::heap_bitmap_lock_); 411 412 // Scan object for references. If kUpdateLivewords is true then set bits in 413 // the live-words bitmap and add size to chunk-info. 414 template <bool kUpdateLiveWords> 415 void ScanObject(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) 416 REQUIRES(Locks::heap_bitmap_lock_); 417 // Push objects to the mark-stack right after successfully marking objects. 418 void PushOnMarkStack(mirror::Object* obj) 419 REQUIRES_SHARED(Locks::mutator_lock_) 420 REQUIRES(Locks::heap_bitmap_lock_); 421 422 // Update the live-words bitmap as well as add the object size to the 423 // chunk-info vector. Both are required for computation of post-compact addresses. 424 // Also updates freed_objects_ counter. 425 void UpdateLivenessInfo(mirror::Object* obj, size_t obj_size) 426 REQUIRES_SHARED(Locks::mutator_lock_); 427 428 void ProcessReferences(Thread* self) 429 REQUIRES_SHARED(Locks::mutator_lock_) 430 REQUIRES(!Locks::heap_bitmap_lock_); 431 432 void MarkObjectNonNull(mirror::Object* obj, 433 mirror::Object* holder = nullptr, 434 MemberOffset offset = MemberOffset(0)) 435 REQUIRES_SHARED(Locks::mutator_lock_) 436 REQUIRES(Locks::heap_bitmap_lock_); 437 438 void MarkObject(mirror::Object* obj, mirror::Object* holder, MemberOffset offset) 439 REQUIRES_SHARED(Locks::mutator_lock_) 440 REQUIRES(Locks::heap_bitmap_lock_); 441 442 template <bool kParallel> 443 bool MarkObjectNonNullNoPush(mirror::Object* obj, 444 mirror::Object* holder = nullptr, 445 MemberOffset offset = MemberOffset(0)) 446 REQUIRES(Locks::heap_bitmap_lock_) 447 REQUIRES_SHARED(Locks::mutator_lock_); 448 449 void Sweep(bool swap_bitmaps) REQUIRES_SHARED(Locks::mutator_lock_) 450 REQUIRES(Locks::heap_bitmap_lock_); 451 void SweepLargeObjects(bool swap_bitmaps) REQUIRES_SHARED(Locks::mutator_lock_) 452 REQUIRES(Locks::heap_bitmap_lock_); 453 454 // Perform all kernel operations required for concurrent compaction. Includes 455 // mremap to move pre-compact pages to from-space, followed by userfaultfd 456 // registration on the moving space and linear-alloc. 457 void KernelPreparation(); 458 // Called by KernelPreparation() for every memory range being prepared for 459 // userfaultfd registration. 460 void KernelPrepareRangeForUffd(uint8_t* to_addr, 461 uint8_t* from_addr, 462 size_t map_size, 463 int fd, 464 uint8_t* shadow_addr = nullptr); 465 466 void RegisterUffd(void* addr, size_t size, int mode); 467 void UnregisterUffd(uint8_t* start, size_t len); 468 469 // Called by thread-pool workers to read uffd_ and process fault events. 470 template <int kMode> 471 void ConcurrentCompaction(uint8_t* buf) REQUIRES_SHARED(Locks::mutator_lock_); 472 // Called by thread-pool workers to compact and copy/map the fault page in 473 // moving space. 474 template <int kMode> 475 void ConcurrentlyProcessMovingPage(uint8_t* fault_page, 476 uint8_t* buf, 477 size_t nr_moving_space_used_pages) 478 REQUIRES_SHARED(Locks::mutator_lock_); 479 // Called by thread-pool workers to process and copy/map the fault page in 480 // linear-alloc. 481 template <int kMode> 482 void ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool is_minor_fault) 483 REQUIRES_SHARED(Locks::mutator_lock_); 484 485 // Process concurrently all the pages in linear-alloc. Called by gc-thread. 486 void ProcessLinearAlloc() REQUIRES_SHARED(Locks::mutator_lock_); 487 488 // Returns true if the moving space can be compacted using uffd's minor-fault 489 // feature. 490 bool CanCompactMovingSpaceWithMinorFault(); 491 492 void FreeFromSpacePages(size_t cur_page_idx, int mode) REQUIRES_SHARED(Locks::mutator_lock_); 493 494 // Maps processed pages (from moving space and linear-alloc) for uffd's 495 // minor-fault feature. We try to 'claim' all processed (and unmapped) pages 496 // contiguous to 'to_space_start'. 497 // kFirstPageMapping indicates if the first page is already claimed or not. It 498 // also indicates that the ioctl must succeed in mapping the first page. 499 template <bool kFirstPageMapping> 500 void MapProcessedPages(uint8_t* to_space_start, 501 Atomic<PageState>* state_arr, 502 size_t arr_idx, 503 size_t arr_len) REQUIRES_SHARED(Locks::mutator_lock_); 504 IsValidFd(int fd)505 bool IsValidFd(int fd) const { return fd >= 0; } 506 // Add/update <class, obj> pair if class > obj and obj is the lowest address 507 // object of class. 508 ALWAYS_INLINE void UpdateClassAfterObjectMap(mirror::Object* obj) 509 REQUIRES_SHARED(Locks::mutator_lock_); 510 511 // Updates 'class_after_obj_map_' map by updating the keys (class) with its 512 // highest-address super-class (obtained from 'super_class_after_class_map_'), 513 // if there is any. This is to ensure we don't free from-space pages before 514 // the lowest-address obj is compacted. 515 void UpdateClassAfterObjMap(); 516 517 void MarkZygoteLargeObjects() REQUIRES_SHARED(Locks::mutator_lock_) 518 REQUIRES(Locks::heap_bitmap_lock_); 519 520 void ZeropageIoctl(void* addr, bool tolerate_eexist, bool tolerate_enoent); 521 void CopyIoctl(void* dst, void* buffer); 522 // Called after updating a linear-alloc page to either map a zero-page if the 523 // page wasn't touched during updation, or map the page via copy-ioctl. And 524 // then updates the page's state to indicate the page is mapped. 525 void MapUpdatedLinearAllocPage(uint8_t* page, 526 uint8_t* shadow_page, 527 Atomic<PageState>& state, 528 bool page_touched); 529 530 // For checkpoints 531 Barrier gc_barrier_; 532 // Every object inside the immune spaces is assumed to be marked. 533 ImmuneSpaces immune_spaces_; 534 // Required only when mark-stack is accessed in shared mode, which happens 535 // when collecting thread-stack roots using checkpoint. Otherwise, we use it 536 // to synchronize on updated_roots_ in debug-builds. 537 Mutex lock_; 538 accounting::ObjectStack* mark_stack_; 539 // Special bitmap wherein all the bits corresponding to an object are set. 540 // TODO: make LiveWordsBitmap encapsulated in this class rather than a 541 // pointer. We tend to access its members in performance-sensitive 542 // code-path. Also, use a single MemMap for all the GC's data structures, 543 // which we will clear in the end. This would help in limiting the number of 544 // VMAs that get created in the kernel. 545 std::unique_ptr<LiveWordsBitmap<kAlignment>> live_words_bitmap_; 546 // Track GC-roots updated so far in a GC-cycle. This is to confirm that no 547 // GC-root is updated twice. 548 // TODO: Must be replaced with an efficient mechanism eventually. Or ensure 549 // that double updation doesn't happen in the first place. 550 std::unique_ptr<std::unordered_set<void*>> updated_roots_ GUARDED_BY(lock_); 551 MemMap from_space_map_; 552 MemMap shadow_to_space_map_; 553 // Any array of live-bytes in logical chunks of kOffsetChunkSize size 554 // in the 'to-be-compacted' space. 555 MemMap info_map_; 556 // Set of page-sized buffers used for compaction. The first page is used by 557 // the GC thread. Subdequent pages are used by mutator threads in case of 558 // SIGBUS feature, and by uffd-worker threads otherwise. In the latter case 559 // the first page is also used for termination of concurrent compaction by 560 // making worker threads terminate the userfaultfd read loop. 561 MemMap compaction_buffers_map_; 562 563 class LessByArenaAddr { 564 public: operator()565 bool operator()(const TrackedArena* a, const TrackedArena* b) const { 566 return std::less<uint8_t*>{}(a->Begin(), b->Begin()); 567 } 568 }; 569 570 // Map of arenas allocated in LinearAlloc arena-pool and last non-zero page, 571 // captured during compaction pause for concurrent updates. 572 std::map<const TrackedArena*, uint8_t*, LessByArenaAddr> linear_alloc_arenas_; 573 // Set of PageStatus arrays, one per arena-pool space. It's extremely rare to 574 // have more than one, but this is to be ready for the worst case. 575 class LinearAllocSpaceData { 576 public: LinearAllocSpaceData(MemMap && shadow,MemMap && page_status_map,uint8_t * begin,uint8_t * end,bool already_shared)577 LinearAllocSpaceData(MemMap&& shadow, 578 MemMap&& page_status_map, 579 uint8_t* begin, 580 uint8_t* end, 581 bool already_shared) 582 : shadow_(std::move(shadow)), 583 page_status_map_(std::move(page_status_map)), 584 begin_(begin), 585 end_(end), 586 already_shared_(already_shared) {} 587 588 MemMap shadow_; 589 MemMap page_status_map_; 590 uint8_t* begin_; 591 uint8_t* end_; 592 // Indicates if the linear-alloc is already MAP_SHARED. 593 bool already_shared_; 594 }; 595 596 std::vector<LinearAllocSpaceData> linear_alloc_spaces_data_; 597 598 class ObjReferenceHash { 599 public: operator()600 uint32_t operator()(const ObjReference& ref) const { 601 return ref.AsVRegValue() >> kObjectAlignmentShift; 602 } 603 }; 604 605 class ObjReferenceEqualFn { 606 public: operator()607 bool operator()(const ObjReference& a, const ObjReference& b) const { 608 return a.AsMirrorPtr() == b.AsMirrorPtr(); 609 } 610 }; 611 612 class LessByObjReference { 613 public: operator()614 bool operator()(const ObjReference& a, const ObjReference& b) const { 615 return std::less<mirror::Object*>{}(a.AsMirrorPtr(), b.AsMirrorPtr()); 616 } 617 }; 618 619 // Data structures used to track objects whose layout information is stored in later 620 // allocated classes (at higher addresses). We must be careful not to free the 621 // corresponding from-space pages prematurely. 622 using ObjObjOrderedMap = std::map<ObjReference, ObjReference, LessByObjReference>; 623 using ObjObjUnorderedMap = 624 std::unordered_map<ObjReference, ObjReference, ObjReferenceHash, ObjReferenceEqualFn>; 625 // Unordered map of <K, S> such that the class K (in moving space) has kClassWalkSuper 626 // in reference bitmap and S is its highest address super class. 627 ObjObjUnorderedMap super_class_after_class_hash_map_; 628 // Unordered map of <K, V> such that the class K (in moving space) is after its objects 629 // or would require iterating super-class hierarchy when visiting references. And V is 630 // its lowest address object (in moving space). 631 ObjObjUnorderedMap class_after_obj_hash_map_; 632 // Ordered map constructed before starting compaction using the above two maps. Key is a 633 // class (or super-class) which is higher in address order than some of its object(s) and 634 // value is the corresponding object with lowest address. 635 ObjObjOrderedMap class_after_obj_ordered_map_; 636 // Since the compaction is done in reverse, we use a reverse iterator. It is maintained 637 // either at the pair whose class is lower than the first page to be freed, or at the 638 // pair whose object is not yet compacted. 639 ObjObjOrderedMap::const_reverse_iterator class_after_obj_iter_; 640 // Cached reference to the last class which has kClassWalkSuper in reference 641 // bitmap but has all its super classes lower address order than itself. 642 mirror::Class* walk_super_class_cache_; 643 // Used by FreeFromSpacePages() for maintaining markers in the moving space for 644 // how far the pages have been reclaimed/checked. 645 size_t last_checked_reclaim_page_idx_; 646 uint8_t* last_reclaimed_page_; 647 648 space::ContinuousSpace* non_moving_space_; 649 space::BumpPointerSpace* const bump_pointer_space_; 650 // The main space bitmap 651 accounting::ContinuousSpaceBitmap* const moving_space_bitmap_; 652 accounting::ContinuousSpaceBitmap* non_moving_space_bitmap_; 653 Thread* thread_running_gc_; 654 // Array of moving-space's pages' compaction status. 655 Atomic<PageState>* moving_pages_status_; 656 size_t vector_length_; 657 size_t live_stack_freeze_size_; 658 659 uint64_t bytes_scanned_; 660 661 // For every page in the to-space (post-compact heap) we need to know the 662 // first object from which we must compact and/or update references. This is 663 // for both non-moving and moving space. Additionally, for the moving-space, 664 // we also need the offset within the object from where we need to start 665 // copying. 666 // chunk_info_vec_ holds live bytes for chunks during marking phase. After 667 // marking we perform an exclusive scan to compute offset for every chunk. 668 uint32_t* chunk_info_vec_; 669 // For pages before black allocations, pre_compact_offset_moving_space_[i] 670 // holds offset within the space from where the objects need to be copied in 671 // the ith post-compact page. 672 // Otherwise, black_alloc_pages_first_chunk_size_[i] holds the size of first 673 // non-empty chunk in the ith black-allocations page. 674 union { 675 uint32_t* pre_compact_offset_moving_space_; 676 uint32_t* black_alloc_pages_first_chunk_size_; 677 }; 678 // first_objs_moving_space_[i] is the pre-compact address of the object which 679 // would overlap with the starting boundary of the ith post-compact page. 680 ObjReference* first_objs_moving_space_; 681 // First object for every page. It could be greater than the page's start 682 // address, or null if the page is empty. 683 ObjReference* first_objs_non_moving_space_; 684 size_t non_moving_first_objs_count_; 685 // Length of first_objs_moving_space_ and pre_compact_offset_moving_space_ 686 // arrays. Also the number of pages which are to be compacted. 687 size_t moving_first_objs_count_; 688 // Number of pages containing black-allocated objects, indicating number of 689 // pages to be slid. 690 size_t black_page_count_; 691 692 uint8_t* from_space_begin_; 693 // moving-space's end pointer at the marking pause. All allocations beyond 694 // this will be considered black in the current GC cycle. Aligned up to page 695 // size. 696 uint8_t* black_allocations_begin_; 697 // End of compacted space. Use for computing post-compact addr of black 698 // allocated objects. Aligned up to page size. 699 uint8_t* post_compact_end_; 700 // Cache (black_allocations_begin_ - post_compact_end_) for post-compact 701 // address computations. 702 ptrdiff_t black_objs_slide_diff_; 703 // Cache (from_space_begin_ - bump_pointer_space_->Begin()) so that we can 704 // compute from-space address of a given pre-comapct addr efficiently. 705 ptrdiff_t from_space_slide_diff_; 706 707 // TODO: Remove once an efficient mechanism to deal with double root updation 708 // is incorporated. 709 void* stack_high_addr_; 710 void* stack_low_addr_; 711 712 uint8_t* conc_compaction_termination_page_; 713 714 PointerSize pointer_size_; 715 // Number of objects freed during this GC in moving space. It is decremented 716 // every time an object is discovered. And total-object count is added to it 717 // in MarkingPause(). It reaches the correct count only once the marking phase 718 // is completed. 719 int32_t freed_objects_; 720 // memfds for moving space for using userfaultfd's minor-fault feature. 721 // Initialized to kFdUnused to indicate that mmap should be MAP_PRIVATE in 722 // KernelPrepareRange(). 723 int moving_to_space_fd_; 724 int moving_from_space_fd_; 725 // Userfault file descriptor, accessed only by the GC itself. 726 // kFallbackMode value indicates that we are in the fallback mode. 727 int uffd_; 728 // Number of mutator-threads currently executing SIGBUS handler. When the 729 // GC-thread is done with compaction, it set the most significant bit to 730 // indicate that. Mutator threads check for the flag when incrementing in the 731 // handler. 732 std::atomic<SigbusCounterType> sigbus_in_progress_count_; 733 // Number of mutator-threads/uffd-workers working on moving-space page. It 734 // must be 0 before gc-thread can unregister the space after it's done 735 // sequentially compacting all pages of the space. 736 std::atomic<uint16_t> compaction_in_progress_count_; 737 // When using SIGBUS feature, this counter is used by mutators to claim a page 738 // out of compaction buffers to be used for the entire compaction cycle. 739 std::atomic<uint16_t> compaction_buffer_counter_; 740 // Used to exit from compaction loop at the end of concurrent compaction 741 uint8_t thread_pool_counter_; 742 // True while compacting. 743 bool compacting_; 744 // Flag indicating whether one-time uffd initialization has been done. It will 745 // be false on the first GC for non-zygote processes, and always for zygote. 746 // Its purpose is to minimize the userfaultfd overhead to the minimal in 747 // Heap::PostForkChildAction() as it's invoked in app startup path. With 748 // this, we register the compaction-termination page on the first GC. 749 bool uffd_initialized_; 750 // Flag indicating if userfaultfd supports minor-faults. Set appropriately in 751 // CreateUserfaultfd(), where we get this information from the kernel. 752 const bool uffd_minor_fault_supported_; 753 // Flag indicating if we should use sigbus signals instead of threads to 754 // handle userfaults. 755 const bool use_uffd_sigbus_; 756 // For non-zygote processes this flag indicates if the spaces are ready to 757 // start using userfaultfd's minor-fault feature. This initialization involves 758 // starting to use shmem (memfd_create) for the userfaultfd protected spaces. 759 bool minor_fault_initialized_; 760 // Set to true when linear-alloc can start mapping with MAP_SHARED. Set on 761 // non-zygote processes during first GC, which sets up everyting for using 762 // minor-fault from next GC. 763 bool map_linear_alloc_shared_; 764 765 class FlipCallback; 766 class ThreadFlipVisitor; 767 class VerifyRootMarkedVisitor; 768 class ScanObjectVisitor; 769 class CheckpointMarkThreadRoots; 770 template<size_t kBufferSize> class ThreadRootsVisitor; 771 class CardModifiedVisitor; 772 class RefFieldsVisitor; 773 template <bool kCheckBegin, bool kCheckEnd> class RefsUpdateVisitor; 774 class ArenaPoolPageUpdater; 775 class ClassLoaderRootsUpdater; 776 class LinearAllocPageUpdater; 777 class ImmuneSpaceUpdateObjVisitor; 778 class ConcurrentCompactionGcTask; 779 780 DISALLOW_IMPLICIT_CONSTRUCTORS(MarkCompact); 781 }; 782 783 std::ostream& operator<<(std::ostream& os, MarkCompact::PageState value); 784 785 } // namespace collector 786 } // namespace gc 787 } // namespace art 788 789 #endif // ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_ 790