• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <fcntl.h>
18 // Glibc v2.19 doesn't include these in fcntl.h so host builds will fail without.
19 #if !defined(FALLOC_FL_PUNCH_HOLE) || !defined(FALLOC_FL_KEEP_SIZE)
20 #include <linux/falloc.h>
21 #endif
22 #include <linux/userfaultfd.h>
23 #include <poll.h>
24 #include <sys/ioctl.h>
25 #include <sys/mman.h>
26 #include <sys/resource.h>
27 #include <sys/stat.h>
28 #include <sys/syscall.h>
29 #include <unistd.h>
30 
31 #include <fstream>
32 #include <numeric>
33 #include <string>
34 #include <string_view>
35 #include <vector>
36 
37 #include "android-base/file.h"
38 #include "android-base/parsebool.h"
39 #include "android-base/parseint.h"
40 #include "android-base/properties.h"
41 #include "android-base/strings.h"
42 #include "base/file_utils.h"
43 #include "base/memfd.h"
44 #include "base/quasi_atomic.h"
45 #include "base/systrace.h"
46 #include "base/utils.h"
47 #include "gc/accounting/mod_union_table-inl.h"
48 #include "gc/collector_type.h"
49 #include "gc/reference_processor.h"
50 #include "gc/space/bump_pointer_space.h"
51 #include "gc/space/space-inl.h"
52 #include "gc/task_processor.h"
53 #include "gc/verification-inl.h"
54 #include "jit/jit_code_cache.h"
55 #include "mark_compact-inl.h"
56 #include "mirror/object-refvisitor-inl.h"
57 #include "read_barrier_config.h"
58 #include "scoped_thread_state_change-inl.h"
59 #include "sigchain.h"
60 #include "thread_list.h"
61 
62 #ifdef ART_TARGET_ANDROID
63 #include "android-modules-utils/sdk_level.h"
64 #include "com_android_art.h"
65 #include "com_android_art_flags.h"
66 #endif
67 
68 // See aosp/2996596 for where these values came from.
69 #ifndef UFFDIO_COPY_MODE_MMAP_TRYLOCK
70 #define UFFDIO_COPY_MODE_MMAP_TRYLOCK (static_cast<uint64_t>(1) << 63)
71 #endif
72 #ifndef UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK
73 #define UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK (static_cast<uint64_t>(1) << 63)
74 #endif
75 #ifdef ART_TARGET_ANDROID
76 namespace {
77 
78 using ::android::base::GetBoolProperty;
79 using ::android::base::ParseBool;
80 using ::android::base::ParseBoolResult;
81 using ::android::modules::sdklevel::IsAtLeastV;
82 
83 }  // namespace
84 #endif
85 
86 namespace art HIDDEN {
87 
HaveMremapDontunmap()88 static bool HaveMremapDontunmap() {
89   const size_t page_size = GetPageSizeSlow();
90   void* old = mmap(nullptr, page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
91   CHECK_NE(old, MAP_FAILED);
92   void* addr = mremap(old, page_size, page_size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, nullptr);
93   CHECK_EQ(munmap(old, page_size), 0);
94   if (addr != MAP_FAILED) {
95     CHECK_EQ(munmap(addr, page_size), 0);
96     return true;
97   } else {
98     return false;
99   }
100 }
101 
102 static bool gUffdSupportsMmapTrylock = false;
103 // We require MREMAP_DONTUNMAP functionality of the mremap syscall, which was
104 // introduced in 5.13 kernel version. But it was backported to GKI kernels.
105 static bool gHaveMremapDontunmap = IsKernelVersionAtLeast(5, 13) || HaveMremapDontunmap();
106 // Bitmap of features supported by userfaultfd. This is obtained via uffd API ioctl.
107 static uint64_t gUffdFeatures = 0;
108 // Both, missing and minor faults on shmem are needed only for minor-fault mode.
109 static constexpr uint64_t kUffdFeaturesForMinorFault =
110     UFFD_FEATURE_MISSING_SHMEM | UFFD_FEATURE_MINOR_SHMEM;
111 static constexpr uint64_t kUffdFeaturesForSigbus = UFFD_FEATURE_SIGBUS;
112 // A region which is more than kBlackDenseRegionThreshold percent live doesn't
113 // need to be compacted as it is too densely packed.
114 static constexpr uint kBlackDenseRegionThreshold = 95U;
115 // We consider SIGBUS feature necessary to enable this GC as it's superior than
116 // threading-based implementation for janks. We may want minor-fault in future
117 // to be available for making jit-code-cache updation concurrent, which uses shmem.
KernelSupportsUffd()118 bool KernelSupportsUffd() {
119 #ifdef __linux__
120   if (gHaveMremapDontunmap) {
121     int fd = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
122     // On non-android devices we may not have the kernel patches that restrict
123     // userfaultfd to user mode. But that is not a security concern as we are
124     // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
125     if (!kIsTargetAndroid && fd == -1 && errno == EINVAL) {
126       fd = syscall(__NR_userfaultfd, O_CLOEXEC);
127     }
128     if (fd >= 0) {
129       // We are only fetching the available features, which is returned by the
130       // ioctl.
131       struct uffdio_api api = {.api = UFFD_API, .features = 0, .ioctls = 0};
132       CHECK_EQ(ioctl(fd, UFFDIO_API, &api), 0) << "ioctl_userfaultfd : API:" << strerror(errno);
133       gUffdFeatures = api.features;
134       // MMAP_TRYLOCK is available only in 5.10 and 5.15 GKI kernels. The higher
135       // versions will have per-vma locks. The lower ones don't support
136       // userfaultfd.
137       if (kIsTargetAndroid && !IsKernelVersionAtLeast(5, 16)) {
138         // Check if MMAP_TRYLOCK feature is supported
139         const size_t page_size = GetPageSizeSlow();
140         void* mem =
141             mmap(nullptr, page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
142         CHECK_NE(mem, MAP_FAILED) << " errno: " << errno;
143 
144         struct uffdio_zeropage uffd_zeropage;
145         uffd_zeropage.mode = UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK;
146         uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(mem);
147         uffd_zeropage.range.len = page_size;
148         uffd_zeropage.zeropage = 0;
149         // The ioctl will definitely fail as mem is not registered with uffd.
150         CHECK_EQ(ioctl(fd, UFFDIO_ZEROPAGE, &uffd_zeropage), -1);
151         // uffd ioctls return EINVAL for several reasons. We make sure with
152         // (proper alignment of 'mem' and 'len') that, before updating
153         // uffd_zeropage.zeropage (with error), it fails with EINVAL only if
154         // `trylock` isn't available.
155         if (uffd_zeropage.zeropage == 0 && errno == EINVAL) {
156           LOG(INFO) << "MMAP_TRYLOCK is not supported in uffd addr:" << mem
157                     << " page-size:" << page_size;
158         } else {
159           gUffdSupportsMmapTrylock = true;
160           LOG(INFO) << "MMAP_TRYLOCK is supported in uffd errno:" << errno << " addr:" << mem
161                     << " size:" << page_size;
162         }
163         munmap(mem, page_size);
164       }
165       close(fd);
166       // Minimum we need is sigbus feature for using userfaultfd.
167       return (api.features & kUffdFeaturesForSigbus) == kUffdFeaturesForSigbus;
168     }
169   }
170 #endif
171   return false;
172 }
173 
174 // The other cases are defined as constexpr in runtime/read_barrier_config.h
175 #if !defined(ART_FORCE_USE_READ_BARRIER) && defined(ART_USE_READ_BARRIER)
176 // Returns collector type asked to be used on the cmdline.
FetchCmdlineGcType()177 static gc::CollectorType FetchCmdlineGcType() {
178   std::string argv;
179   gc::CollectorType gc_type = gc::CollectorType::kCollectorTypeNone;
180   if (android::base::ReadFileToString("/proc/self/cmdline", &argv)) {
181     auto pos = argv.rfind("-Xgc:");
182     if (argv.substr(pos + 5, 3) == "CMC") {
183       gc_type = gc::CollectorType::kCollectorTypeCMC;
184     } else if (argv.substr(pos + 5, 2) == "CC") {
185       gc_type = gc::CollectorType::kCollectorTypeCC;
186     }
187   }
188   return gc_type;
189 }
190 
191 #ifdef ART_TARGET_ANDROID
GetOverrideCacheInfoFd()192 static int GetOverrideCacheInfoFd() {
193   std::string args_str;
194   if (!android::base::ReadFileToString("/proc/self/cmdline", &args_str)) {
195     LOG(WARNING) << "Failed to load /proc/self/cmdline";
196     return -1;
197   }
198   std::vector<std::string_view> args;
199   Split(std::string_view(args_str), /*separator=*/'\0', &args);
200   for (std::string_view arg : args) {
201     if (android::base::ConsumePrefix(&arg, "--cache-info-fd=")) {  // This is a dex2oat flag.
202       int fd;
203       if (!android::base::ParseInt(std::string(arg), &fd)) {
204         LOG(ERROR) << "Failed to parse --cache-info-fd (value: '" << arg << "')";
205         return -1;
206       }
207       return fd;
208     }
209   }
210   return -1;
211 }
212 
GetCachedProperties()213 static std::unordered_map<std::string, std::string> GetCachedProperties() {
214   // For simplicity, we don't handle multiple calls because otherwise we would have to reset the fd.
215   static bool called = false;
216   CHECK(!called) << "GetCachedBoolProperty can be called only once";
217   called = true;
218 
219   std::string cache_info_contents;
220   int fd = GetOverrideCacheInfoFd();
221   if (fd >= 0) {
222     if (!android::base::ReadFdToString(fd, &cache_info_contents)) {
223       PLOG(ERROR) << "Failed to read cache-info from fd " << fd;
224       return {};
225     }
226   } else {
227     std::string path = GetApexDataDalvikCacheDirectory(InstructionSet::kNone) + "/cache-info.xml";
228     if (!android::base::ReadFileToString(path, &cache_info_contents)) {
229       // If the file is not found, then we are in chroot or in a standalone runtime process (e.g.,
230       // IncidentHelper), or odsign/odrefresh failed to generate and sign the cache info. There's
231       // nothing we can do.
232       if (errno != ENOENT) {
233         PLOG(ERROR) << "Failed to read cache-info from the default path";
234       }
235       return {};
236     }
237   }
238 
239   std::optional<com::android::art::CacheInfo> cache_info =
240       com::android::art::parse(cache_info_contents.c_str());
241   if (!cache_info.has_value()) {
242     // This should never happen.
243     LOG(ERROR) << "Failed to parse cache-info";
244     return {};
245   }
246   const com::android::art::KeyValuePairList* list = cache_info->getFirstSystemProperties();
247   if (list == nullptr) {
248     // This should never happen.
249     LOG(ERROR) << "Missing system properties from cache-info";
250     return {};
251   }
252   const std::vector<com::android::art::KeyValuePair>& properties = list->getItem();
253   std::unordered_map<std::string, std::string> result;
254   for (const com::android::art::KeyValuePair& pair : properties) {
255     result[pair.getK()] = pair.getV();
256   }
257   return result;
258 }
259 
GetCachedBoolProperty(const std::unordered_map<std::string,std::string> & cached_properties,const std::string & key,bool default_value)260 static bool GetCachedBoolProperty(
261     const std::unordered_map<std::string, std::string>& cached_properties,
262     const std::string& key,
263     bool default_value) {
264   auto it = cached_properties.find(key);
265   if (it == cached_properties.end()) {
266     return default_value;
267   }
268   ParseBoolResult result = ParseBool(it->second);
269   switch (result) {
270     case ParseBoolResult::kTrue:
271       return true;
272     case ParseBoolResult::kFalse:
273       return false;
274     case ParseBoolResult::kError:
275       return default_value;
276   }
277 }
278 
SysPropSaysUffdGc()279 static bool SysPropSaysUffdGc() {
280   // The phenotype flag can change at time time after boot, but it shouldn't take effect until a
281   // reboot. Therefore, we read the phenotype flag from the cache info, which is generated on boot.
282   std::unordered_map<std::string, std::string> cached_properties = GetCachedProperties();
283   bool phenotype_enable = GetCachedBoolProperty(
284       cached_properties, "persist.device_config.runtime_native_boot.enable_uffd_gc_2", false);
285   bool phenotype_force_disable = GetCachedBoolProperty(
286       cached_properties, "persist.device_config.runtime_native_boot.force_disable_uffd_gc", false);
287   bool build_enable = GetBoolProperty("ro.dalvik.vm.enable_uffd_gc", false);
288   bool is_at_most_u = !IsAtLeastV();
289   return (phenotype_enable || build_enable || is_at_most_u) && !phenotype_force_disable;
290 }
291 #else
292 // Never called.
SysPropSaysUffdGc()293 static bool SysPropSaysUffdGc() { return false; }
294 #endif
295 
ShouldUseUserfaultfd()296 static bool ShouldUseUserfaultfd() {
297   static_assert(kUseBakerReadBarrier || kUseTableLookupReadBarrier);
298 #ifdef __linux__
299   // Use CMC/CC if that is being explicitly asked for on cmdline. Otherwise,
300   // always use CC on host. On target, use CMC only if system properties says so
301   // and the kernel supports it.
302   gc::CollectorType gc_type = FetchCmdlineGcType();
303   return gc_type == gc::CollectorType::kCollectorTypeCMC ||
304          (gc_type == gc::CollectorType::kCollectorTypeNone &&
305           kIsTargetAndroid &&
306           SysPropSaysUffdGc() &&
307           KernelSupportsUffd());
308 #else
309   return false;
310 #endif
311 }
312 
313 const bool gUseUserfaultfd = ShouldUseUserfaultfd();
314 const bool gUseReadBarrier = !gUseUserfaultfd;
315 #endif
316 #ifdef ART_TARGET_ANDROID
ShouldUseGenerationalGC()317 bool ShouldUseGenerationalGC() {
318   if (gUseUserfaultfd && !com::android::art::flags::use_generational_cmc()) {
319     return false;
320   }
321   // Generational GC feature doesn't need a reboot. Any process (like dex2oat)
322   // can pick a different values than zygote and will be able to execute.
323   return GetBoolProperty("persist.device_config.runtime_native_boot.use_generational_gc", true);
324 }
325 #else
ShouldUseGenerationalGC()326 bool ShouldUseGenerationalGC() { return true; }
327 #endif
328 
329 namespace gc {
330 namespace collector {
331 
332 // Turn off kCheckLocks when profiling the GC as it slows down the GC
333 // significantly.
334 static constexpr bool kCheckLocks = kDebugLocking;
335 static constexpr bool kVerifyRootsMarked = kIsDebugBuild;
336 // Verify that there are no missing card marks.
337 static constexpr bool kVerifyNoMissingCardMarks = kIsDebugBuild;
338 // Verify that all references in post-GC objects are valid.
339 static constexpr bool kVerifyPostGCObjects = kIsDebugBuild;
340 // Number of compaction buffers reserved for mutator threads in SIGBUS feature
341 // case. It's extremely unlikely that we will ever have more than these number
342 // of mutator threads trying to access the moving-space during one compaction
343 // phase.
344 static constexpr size_t kMutatorCompactionBufferCount = 2048;
345 // Minimum from-space chunk to be madvised (during concurrent compaction) in one go.
346 // Choose a reasonable size to avoid making too many batched ioctl and madvise calls.
347 static constexpr ssize_t kMinFromSpaceMadviseSize = 8 * MB;
348 // Concurrent compaction termination logic is different (and slightly more efficient) if the
349 // kernel has the fault-retry feature (allowing repeated faults on the same page), which was
350 // introduced in 5.7 (https://android-review.git.corp.google.com/c/kernel/common/+/1540088).
351 // This allows a single page fault to be handled, in turn, by each worker thread, only waking
352 // up the GC thread at the end.
353 static const bool gKernelHasFaultRetry = IsKernelVersionAtLeast(5, 7);
354 
GetUffdAndMinorFault()355 std::pair<bool, bool> MarkCompact::GetUffdAndMinorFault() {
356   bool uffd_available;
357   // In most cases the gUffdFeatures will already be initialized at boot time
358   // when libart is loaded. On very old kernels we may get '0' from the kernel,
359   // in which case we would be doing the syscalls each time this function is
360   // called. But that's very unlikely case. There are no correctness issues as
361   // the response from kernel never changes after boot.
362   if (UNLIKELY(gUffdFeatures == 0)) {
363     uffd_available = KernelSupportsUffd();
364   } else {
365     // We can have any uffd features only if uffd exists.
366     uffd_available = true;
367   }
368   bool minor_fault_available =
369       (gUffdFeatures & kUffdFeaturesForMinorFault) == kUffdFeaturesForMinorFault;
370   return std::pair<bool, bool>(uffd_available, minor_fault_available);
371 }
372 
CreateUserfaultfd(bool post_fork)373 bool MarkCompact::CreateUserfaultfd(bool post_fork) {
374   if (post_fork || uffd_ == kFdUnused) {
375     // Check if we have MREMAP_DONTUNMAP here for cases where
376     // 'ART_USE_READ_BARRIER=false' is used. Additionally, this check ensures
377     // that userfaultfd isn't used on old kernels, which cause random ioctl
378     // failures.
379     if (gHaveMremapDontunmap) {
380       // Don't use O_NONBLOCK as we rely on read waiting on uffd_ if there isn't
381       // any read event available. We don't use poll.
382       uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
383       // On non-android devices we may not have the kernel patches that restrict
384       // userfaultfd to user mode. But that is not a security concern as we are
385       // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
386       if (!kIsTargetAndroid && UNLIKELY(uffd_ == -1 && errno == EINVAL)) {
387         uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC);
388       }
389       if (UNLIKELY(uffd_ == -1)) {
390         uffd_ = kFallbackMode;
391         LOG(WARNING) << "Userfaultfd isn't supported (reason: " << strerror(errno)
392                      << ") and therefore falling back to stop-the-world compaction.";
393       } else {
394         DCHECK(IsValidFd(uffd_));
395         // Initialize uffd with the features which are required and available.
396         // Using private anonymous mapping in threading mode is the default,
397         // for which we don't need to ask for any features. Note: this mode
398         // is not used in production.
399         struct uffdio_api api = {.api = UFFD_API, .features = 0, .ioctls = 0};
400         // We should add SIGBUS feature only if we plan on using it as
401         // requesting it here will mean threading mode will not work.
402         CHECK_EQ(gUffdFeatures & kUffdFeaturesForSigbus, kUffdFeaturesForSigbus);
403         api.features |= kUffdFeaturesForSigbus;
404         CHECK_EQ(ioctl(uffd_, UFFDIO_API, &api), 0)
405             << "ioctl_userfaultfd: API: " << strerror(errno);
406       }
407     } else {
408       uffd_ = kFallbackMode;
409     }
410   }
411   uffd_initialized_ = !post_fork || uffd_ == kFallbackMode;
412   return IsValidFd(uffd_);
413 }
414 
415 template <size_t kAlignment>
Create(uintptr_t begin,uintptr_t end)416 MarkCompact::LiveWordsBitmap<kAlignment>* MarkCompact::LiveWordsBitmap<kAlignment>::Create(
417     uintptr_t begin, uintptr_t end) {
418   return static_cast<LiveWordsBitmap<kAlignment>*>(
419           MemRangeBitmap::Create("Concurrent Mark Compact live words bitmap", begin, end));
420 }
421 
ComputeInfoMapSize()422 size_t MarkCompact::ComputeInfoMapSize() {
423   size_t moving_space_size = bump_pointer_space_->Capacity();
424   size_t chunk_info_vec_size = moving_space_size / kOffsetChunkSize;
425   size_t nr_moving_pages = DivideByPageSize(moving_space_size);
426   size_t nr_non_moving_pages = DivideByPageSize(heap_->GetNonMovingSpace()->Capacity());
427   return chunk_info_vec_size * sizeof(uint32_t) + nr_non_moving_pages * sizeof(ObjReference) +
428          nr_moving_pages * (sizeof(ObjReference) + sizeof(uint32_t) + sizeof(Atomic<uint32_t>));
429 }
430 
InitializeInfoMap(uint8_t * p,size_t moving_space_sz)431 size_t MarkCompact::InitializeInfoMap(uint8_t* p, size_t moving_space_sz) {
432   size_t nr_moving_pages = DivideByPageSize(moving_space_sz);
433 
434   chunk_info_vec_ = reinterpret_cast<uint32_t*>(p);
435   vector_length_ = moving_space_sz / kOffsetChunkSize;
436   size_t total = vector_length_ * sizeof(uint32_t);
437 
438   first_objs_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
439   total += nr_moving_pages * sizeof(ObjReference);
440 
441   pre_compact_offset_moving_space_ = reinterpret_cast<uint32_t*>(p + total);
442   total += nr_moving_pages * sizeof(uint32_t);
443 
444   moving_pages_status_ = reinterpret_cast<Atomic<uint32_t>*>(p + total);
445   total += nr_moving_pages * sizeof(Atomic<uint32_t>);
446 
447   first_objs_non_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
448   total += DivideByPageSize(heap_->GetNonMovingSpace()->Capacity()) * sizeof(ObjReference);
449   DCHECK_EQ(total, ComputeInfoMapSize());
450   return total;
451 }
452 
YoungMarkCompact(Heap * heap,MarkCompact * main)453 YoungMarkCompact::YoungMarkCompact(Heap* heap, MarkCompact* main)
454     : GarbageCollector(heap, "young concurrent mark compact"), main_collector_(main) {
455   // Initialize GC metrics.
456   metrics::ArtMetrics* metrics = GetMetrics();
457   gc_time_histogram_ = metrics->YoungGcCollectionTime();
458   metrics_gc_count_ = metrics->YoungGcCount();
459   metrics_gc_count_delta_ = metrics->YoungGcCountDelta();
460   gc_throughput_histogram_ = metrics->YoungGcThroughput();
461   gc_tracing_throughput_hist_ = metrics->YoungGcTracingThroughput();
462   gc_throughput_avg_ = metrics->YoungGcThroughputAvg();
463   gc_tracing_throughput_avg_ = metrics->YoungGcTracingThroughputAvg();
464   gc_scanned_bytes_ = metrics->YoungGcScannedBytes();
465   gc_scanned_bytes_delta_ = metrics->YoungGcScannedBytesDelta();
466   gc_freed_bytes_ = metrics->YoungGcFreedBytes();
467   gc_freed_bytes_delta_ = metrics->YoungGcFreedBytesDelta();
468   gc_duration_ = metrics->YoungGcDuration();
469   gc_duration_delta_ = metrics->YoungGcDurationDelta();
470   gc_app_slow_path_during_gc_duration_delta_ = metrics->AppSlowPathDuringYoungGcDurationDelta();
471   are_metrics_initialized_ = true;
472 }
473 
RunPhases()474 void YoungMarkCompact::RunPhases() {
475   DCHECK(!main_collector_->young_gen_);
476   main_collector_->young_gen_ = true;
477   main_collector_->RunPhases();
478   main_collector_->young_gen_ = false;
479 }
480 
MarkCompact(Heap * heap)481 MarkCompact::MarkCompact(Heap* heap)
482     : GarbageCollector(heap, "concurrent mark compact"),
483       gc_barrier_(0),
484       lock_("mark compact lock", kGenericBottomLock),
485       sigbus_in_progress_count_{kSigbusCounterCompactionDoneMask, kSigbusCounterCompactionDoneMask},
486       mid_to_old_promo_bit_vec_(nullptr),
487       bump_pointer_space_(heap->GetBumpPointerSpace()),
488       post_compact_end_(nullptr),
489       young_gen_(false),
490       use_generational_(heap->GetUseGenerational()),
491       compacting_(false),
492       moving_space_bitmap_(bump_pointer_space_->GetMarkBitmap()),
493       moving_space_begin_(bump_pointer_space_->Begin()),
494       moving_space_end_(bump_pointer_space_->Limit()),
495       black_dense_end_(moving_space_begin_),
496       mid_gen_end_(moving_space_begin_),
497       uffd_(kFdUnused),
498       marking_done_(false),
499       uffd_initialized_(false),
500       clamp_info_map_status_(ClampInfoStatus::kClampInfoNotDone) {
501   if (kIsDebugBuild) {
502     updated_roots_.reset(new std::unordered_set<void*>());
503   }
504   if (gUffdFeatures == 0) {
505     GetUffdAndMinorFault();
506   }
507   uint8_t* moving_space_begin = bump_pointer_space_->Begin();
508   // TODO: Depending on how the bump-pointer space move is implemented. If we
509   // switch between two virtual memories each time, then we will have to
510   // initialize live_words_bitmap_ accordingly.
511   live_words_bitmap_.reset(LiveWordsBitmap<kAlignment>::Create(
512       reinterpret_cast<uintptr_t>(moving_space_begin),
513       reinterpret_cast<uintptr_t>(bump_pointer_space_->Limit())));
514 
515   std::string err_msg;
516   size_t moving_space_size = bump_pointer_space_->Capacity();
517   {
518     // Create one MemMap for all the data structures
519     info_map_ = MemMap::MapAnonymous("Concurrent mark-compact chunk-info vector",
520                                      ComputeInfoMapSize(),
521                                      PROT_READ | PROT_WRITE,
522                                      /*low_4gb=*/false,
523                                      &err_msg);
524     if (UNLIKELY(!info_map_.IsValid())) {
525       LOG(FATAL) << "Failed to allocate concurrent mark-compact chunk-info vector: " << err_msg;
526     } else {
527       size_t total = InitializeInfoMap(info_map_.Begin(), moving_space_size);
528       DCHECK_EQ(total, info_map_.Size());
529     }
530   }
531 
532   size_t moving_space_alignment = Heap::BestPageTableAlignment(moving_space_size);
533   // The moving space is created at a fixed address, which is expected to be
534   // PMD-size aligned.
535   if (!IsAlignedParam(moving_space_begin, moving_space_alignment)) {
536     LOG(WARNING) << "Bump pointer space is not aligned to " << PrettySize(moving_space_alignment)
537                  << ". This can lead to longer stop-the-world pauses for compaction";
538   }
539   // NOTE: PROT_NONE is used here as these mappings are for address space reservation
540   // only and will be used only after appropriately remapping them.
541   from_space_map_ = MemMap::MapAnonymousAligned("Concurrent mark-compact from-space",
542                                                 moving_space_size,
543                                                 PROT_NONE,
544                                                 /*low_4gb=*/kObjPtrPoisoning,
545                                                 moving_space_alignment,
546                                                 &err_msg);
547   if (UNLIKELY(!from_space_map_.IsValid())) {
548     LOG(FATAL) << "Failed to allocate concurrent mark-compact from-space" << err_msg;
549   } else {
550     from_space_begin_ = from_space_map_.Begin();
551   }
552 
553   compaction_buffers_map_ = MemMap::MapAnonymous("Concurrent mark-compact compaction buffers",
554                                                  (1 + kMutatorCompactionBufferCount) * gPageSize,
555                                                  PROT_READ | PROT_WRITE,
556                                                  /*low_4gb=*/kObjPtrPoisoning,
557                                                  &err_msg);
558   if (UNLIKELY(!compaction_buffers_map_.IsValid())) {
559     LOG(FATAL) << "Failed to allocate concurrent mark-compact compaction buffers" << err_msg;
560   }
561   // We also use the first page-sized buffer for the purpose of terminating concurrent compaction.
562   conc_compaction_termination_page_ = compaction_buffers_map_.Begin();
563   // Touch the page deliberately to avoid userfaults on it. We madvise it in
564   // CompactionPhase() before using it to terminate concurrent compaction.
565   ForceRead(conc_compaction_termination_page_);
566 
567   // In most of the cases, we don't expect more than one LinearAlloc space.
568   linear_alloc_spaces_data_.reserve(1);
569 
570   // Initialize GC metrics.
571   metrics::ArtMetrics* metrics = GetMetrics();
572   gc_time_histogram_ = metrics->FullGcCollectionTime();
573   metrics_gc_count_ = metrics->FullGcCount();
574   metrics_gc_count_delta_ = metrics->FullGcCountDelta();
575   gc_throughput_histogram_ = metrics->FullGcThroughput();
576   gc_tracing_throughput_hist_ = metrics->FullGcTracingThroughput();
577   gc_throughput_avg_ = metrics->FullGcThroughputAvg();
578   gc_tracing_throughput_avg_ = metrics->FullGcTracingThroughputAvg();
579   gc_scanned_bytes_ = metrics->FullGcScannedBytes();
580   gc_scanned_bytes_delta_ = metrics->FullGcScannedBytesDelta();
581   gc_freed_bytes_ = metrics->FullGcFreedBytes();
582   gc_freed_bytes_delta_ = metrics->FullGcFreedBytesDelta();
583   gc_duration_ = metrics->FullGcDuration();
584   gc_duration_delta_ = metrics->FullGcDurationDelta();
585   gc_app_slow_path_during_gc_duration_delta_ = metrics->AppSlowPathDuringFullGcDurationDelta();
586   are_metrics_initialized_ = true;
587 }
588 
ResetGenerationalState()589 void MarkCompact::ResetGenerationalState() {
590   black_dense_end_ = mid_gen_end_ = moving_space_begin_;
591   post_compact_end_ = nullptr;
592   class_after_obj_map_.clear();
593 }
594 
AddLinearAllocSpaceData(uint8_t * begin,size_t len)595 void MarkCompact::AddLinearAllocSpaceData(uint8_t* begin, size_t len) {
596   DCHECK_ALIGNED_PARAM(begin, gPageSize);
597   DCHECK_ALIGNED_PARAM(len, gPageSize);
598   DCHECK_GE(len, Heap::GetPMDSize());
599   size_t alignment = Heap::BestPageTableAlignment(len);
600   std::string err_msg;
601   MemMap shadow(MemMap::MapAnonymousAligned("linear-alloc shadow map",
602                                             len,
603                                             PROT_NONE,
604                                             /*low_4gb=*/false,
605                                             alignment,
606                                             &err_msg));
607   if (!shadow.IsValid()) {
608     LOG(FATAL) << "Failed to allocate linear-alloc shadow map: " << err_msg;
609     UNREACHABLE();
610   }
611 
612   MemMap page_status_map(MemMap::MapAnonymous("linear-alloc page-status map",
613                                               DivideByPageSize(len),
614                                               PROT_READ | PROT_WRITE,
615                                               /*low_4gb=*/false,
616                                               &err_msg));
617   if (!page_status_map.IsValid()) {
618     LOG(FATAL) << "Failed to allocate linear-alloc page-status shadow map: " << err_msg;
619     UNREACHABLE();
620   }
621   linear_alloc_spaces_data_.emplace_back(
622       std::forward<MemMap>(shadow), std::forward<MemMap>(page_status_map), begin, begin + len);
623 }
624 
ClampGrowthLimit(size_t new_capacity)625 void MarkCompact::ClampGrowthLimit(size_t new_capacity) {
626   // From-space is the same size as moving-space in virtual memory.
627   // However, if it's in >4GB address space then we don't need to do it
628   // synchronously.
629 #if defined(__LP64__)
630   constexpr bool kClampFromSpace = kObjPtrPoisoning;
631 #else
632   constexpr bool kClampFromSpace = true;
633 #endif
634   size_t old_capacity = bump_pointer_space_->Capacity();
635   new_capacity = bump_pointer_space_->ClampGrowthLimit(new_capacity);
636   if (new_capacity < old_capacity) {
637     CHECK(from_space_map_.IsValid());
638     if (kClampFromSpace) {
639       from_space_map_.SetSize(new_capacity);
640     }
641     clamp_info_map_status_ = ClampInfoStatus::kClampInfoPending;
642   }
643   CHECK_EQ(moving_space_begin_, bump_pointer_space_->Begin());
644 }
645 
MaybeClampGcStructures()646 void MarkCompact::MaybeClampGcStructures() {
647   size_t moving_space_size = bump_pointer_space_->Capacity();
648   DCHECK(thread_running_gc_ != nullptr);
649   if (UNLIKELY(clamp_info_map_status_ == ClampInfoStatus::kClampInfoPending)) {
650     CHECK(from_space_map_.IsValid());
651     if (from_space_map_.Size() > moving_space_size) {
652       from_space_map_.SetSize(moving_space_size);
653     }
654     // Bitmaps and other data structures
655     live_words_bitmap_->SetBitmapSize(moving_space_size);
656     size_t set_size = InitializeInfoMap(info_map_.Begin(), moving_space_size);
657     CHECK_LT(set_size, info_map_.Size());
658     info_map_.SetSize(set_size);
659 
660     clamp_info_map_status_ = ClampInfoStatus::kClampInfoFinished;
661   }
662 }
663 
PrepareForMarking(bool pre_marking)664 void MarkCompact::PrepareForMarking(bool pre_marking) {
665   static_assert(gc::accounting::CardTable::kCardDirty - 1 == gc::accounting::CardTable::kCardAged);
666   static_assert(gc::accounting::CardTable::kCardAged - 1 == gc::accounting::CardTable::kCardAged2);
667   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
668   accounting::CardTable* const card_table = heap_->GetCardTable();
669   // immune_spaces_ is emptied in InitializePhase() before marking starts. This
670   // function is invoked twice during marking. We only need to populate immune_spaces_
671   // once per GC cycle. And when it's done (below), all the immune spaces are
672   // added to it. We can never have partially filled immune_spaces_.
673   bool update_immune_spaces = immune_spaces_.IsEmpty();
674   // Mark all of the spaces we never collect as immune.
675   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
676     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
677         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
678       CHECK(space->IsZygoteSpace() || space->IsImageSpace());
679       if (update_immune_spaces) {
680         immune_spaces_.AddSpace(space);
681       }
682       accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
683       if (table != nullptr) {
684         table->ProcessCards();
685       } else {
686         // Keep cards aged if we don't have a mod-union table since we need
687         // to scan them in future GCs. This case is for app images.
688         card_table->ModifyCardsAtomic(
689             space->Begin(),
690             space->End(),
691             [](uint8_t card) {
692               return (card == gc::accounting::CardTable::kCardClean)
693                   ? card
694                   : gc::accounting::CardTable::kCardAged;
695             },
696             /* card modified visitor */ VoidFunctor());
697       }
698     } else if (pre_marking) {
699       CHECK(!space->IsZygoteSpace());
700       CHECK(!space->IsImageSpace());
701       if (young_gen_) {
702         uint8_t* space_age_end = space->Limit();
703         // Age cards in old-gen as they contain old-to-young references.
704         if (space == bump_pointer_space_) {
705           DCHECK_ALIGNED_PARAM(old_gen_end_, gPageSize);
706           moving_space_bitmap_->ClearRange(reinterpret_cast<mirror::Object*>(old_gen_end_),
707                                            reinterpret_cast<mirror::Object*>(moving_space_end_));
708           // Clear cards in [old_gen_end_, moving_space_end_) as they are not needed.
709           card_table->ClearCardRange(old_gen_end_, space->Limit());
710           space_age_end = old_gen_end_;
711         }
712         card_table->ModifyCardsAtomic(space->Begin(),
713                                       space_age_end,
714                                       AgeCardVisitor(),
715                                       /*card modified visitor=*/VoidFunctor());
716       } else {
717         // The card-table corresponding to bump-pointer and non-moving space can
718         // be cleared, because we are going to traverse all the reachable objects
719         // in these spaces. This card-table will eventually be used to track
720         // mutations while concurrent marking is going on.
721         card_table->ClearCardRange(space->Begin(), space->Limit());
722         if (space == bump_pointer_space_) {
723           moving_space_bitmap_->Clear();
724         }
725       }
726       if (space != bump_pointer_space_) {
727         CHECK_EQ(space, heap_->GetNonMovingSpace());
728         if (young_gen_) {
729           space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
730         }
731         non_moving_space_ = space;
732         non_moving_space_bitmap_ = space->GetMarkBitmap();
733       }
734     } else {
735       if (young_gen_) {
736         // It would be correct to retain existing aged cards and add dirty cards
737         // to that set. However, that would unecessarily need us to re-scan
738         // cards which haven't been dirtied since first-pass of marking.
739         auto card_visitor = [](uint8_t card) {
740           return (card > gc::accounting::CardTable::kCardAged2)
741                      ? card - 1
742                      : gc::accounting::CardTable::kCardClean;
743         };
744         card_table->ModifyCardsAtomic(
745             space->Begin(), space->End(), card_visitor, /*card modified visitor=*/VoidFunctor());
746       } else {
747         card_table->ModifyCardsAtomic(space->Begin(),
748                                       space->End(),
749                                       AgeCardVisitor(),
750                                       /*card modified visitor=*/VoidFunctor());
751       }
752     }
753   }
754   if (pre_marking && young_gen_) {
755     for (const auto& space : GetHeap()->GetDiscontinuousSpaces()) {
756       CHECK(space->IsLargeObjectSpace());
757       space->AsLargeObjectSpace()->CopyLiveToMarked();
758     }
759   }
760 }
761 
MarkZygoteLargeObjects()762 void MarkCompact::MarkZygoteLargeObjects() {
763   Thread* self = thread_running_gc_;
764   DCHECK_EQ(self, Thread::Current());
765   space::LargeObjectSpace* const los = heap_->GetLargeObjectsSpace();
766   if (los != nullptr) {
767     // Pick the current live bitmap (mark bitmap if swapped).
768     accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap();
769     accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap();
770     // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept.
771     std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic();
772     live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first),
773                                   reinterpret_cast<uintptr_t>(range.second),
774                                   [mark_bitmap, los, self](mirror::Object* obj)
775                                       REQUIRES(Locks::heap_bitmap_lock_)
776                                           REQUIRES_SHARED(Locks::mutator_lock_) {
777                                             if (los->IsZygoteLargeObject(self, obj)) {
778                                               mark_bitmap->Set(obj);
779                                             }
780                                           });
781   }
782 }
783 
InitializePhase()784 void MarkCompact::InitializePhase() {
785   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
786   mark_stack_ = heap_->GetMarkStack();
787   CHECK(mark_stack_->IsEmpty());
788   immune_spaces_.Reset();
789   moving_first_objs_count_ = 0;
790   non_moving_first_objs_count_ = 0;
791   black_page_count_ = 0;
792   bytes_scanned_ = 0;
793   freed_objects_ = 0;
794   // The first buffer is used by gc-thread.
795   compaction_buffer_counter_.store(1, std::memory_order_relaxed);
796   black_allocations_begin_ = bump_pointer_space_->Limit();
797   DCHECK_EQ(moving_space_begin_, bump_pointer_space_->Begin());
798   from_space_slide_diff_ = from_space_begin_ - moving_space_begin_;
799   moving_space_end_ = bump_pointer_space_->Limit();
800   if (use_generational_ && !young_gen_) {
801     class_after_obj_map_.clear();
802   }
803   // TODO: Would it suffice to read it once in the constructor, which is called
804   // in zygote process?
805   pointer_size_ = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
806   for (size_t i = 0; i < vector_length_; i++) {
807     DCHECK_EQ(chunk_info_vec_[i], 0u);
808   }
809   app_slow_path_start_time_ = 0;
810 }
811 
812 class MarkCompact::ThreadFlipVisitor : public Closure {
813  public:
ThreadFlipVisitor(MarkCompact * collector)814   explicit ThreadFlipVisitor(MarkCompact* collector) : collector_(collector) {}
815 
Run(Thread * thread)816   void Run(Thread* thread) override REQUIRES_SHARED(Locks::mutator_lock_) {
817     // Note: self is not necessarily equal to thread since thread may be suspended.
818     Thread* self = Thread::Current();
819     CHECK(thread == self || thread->GetState() != ThreadState::kRunnable)
820         << thread->GetState() << " thread " << thread << " self " << self;
821     thread->VisitRoots(collector_, kVisitRootFlagAllRoots);
822     // Interpreter cache is thread-local so it needs to be swept either in a
823     // flip, or a stop-the-world pause.
824     CHECK(collector_->compacting_);
825     thread->GetInterpreterCache()->Clear(thread);
826     thread->AdjustTlab(collector_->black_objs_slide_diff_);
827   }
828 
829  private:
830   MarkCompact* const collector_;
831 };
832 
833 class MarkCompact::FlipCallback : public Closure {
834  public:
FlipCallback(MarkCompact * collector)835   explicit FlipCallback(MarkCompact* collector) : collector_(collector) {}
836 
Run(Thread * thread)837   void Run([[maybe_unused]] Thread* thread) override REQUIRES(Locks::mutator_lock_) {
838     collector_->CompactionPause();
839   }
840 
841  private:
842   MarkCompact* const collector_;
843 };
844 
RunPhases()845 void MarkCompact::RunPhases() {
846   Thread* self = Thread::Current();
847   thread_running_gc_ = self;
848   Runtime* runtime = Runtime::Current();
849   GetHeap()->PreGcVerification(this);
850   InitializePhase();
851   {
852     ReaderMutexLock mu(self, *Locks::mutator_lock_);
853     MarkingPhase();
854   }
855   {
856     // Marking pause
857     ScopedPause pause(this);
858     MarkingPause();
859     if (kIsDebugBuild) {
860       bump_pointer_space_->AssertAllThreadLocalBuffersAreRevoked();
861     }
862   }
863   bool perform_compaction;
864   {
865     ReaderMutexLock mu(self, *Locks::mutator_lock_);
866     ReclaimPhase();
867     perform_compaction = PrepareForCompaction();
868   }
869   if (perform_compaction) {
870     // Compaction pause
871     ThreadFlipVisitor visitor(this);
872     FlipCallback callback(this);
873     runtime->GetThreadList()->FlipThreadRoots(
874         &visitor, &callback, this, GetHeap()->GetGcPauseListener());
875 
876     if (IsValidFd(uffd_)) {
877       ReaderMutexLock mu(self, *Locks::mutator_lock_);
878       CompactionPhase();
879     }
880   } else {
881     if (use_generational_) {
882       DCHECK_IMPLIES(post_compact_end_ != nullptr, post_compact_end_ == black_allocations_begin_);
883     }
884     post_compact_end_ = black_allocations_begin_;
885   }
886   FinishPhase(perform_compaction);
887   GetHeap()->PostGcVerification(this);
888   thread_running_gc_ = nullptr;
889 }
890 
InitMovingSpaceFirstObjects(size_t vec_len,size_t to_space_page_idx)891 void MarkCompact::InitMovingSpaceFirstObjects(size_t vec_len, size_t to_space_page_idx) {
892   uint32_t offset_in_chunk_word;
893   uint32_t offset;
894   mirror::Object* obj;
895   const uintptr_t heap_begin = moving_space_bitmap_->HeapBegin();
896 
897   // Find the first live word.
898   size_t chunk_idx = to_space_page_idx * (gPageSize / kOffsetChunkSize);
899   DCHECK_LT(chunk_idx, vec_len);
900   // Find the first live word in the space
901   for (; chunk_info_vec_[chunk_idx] == 0; chunk_idx++) {
902     if (chunk_idx >= vec_len) {
903       // We don't have any live data on the moving-space.
904       moving_first_objs_count_ = to_space_page_idx;
905       return;
906     }
907   }
908   DCHECK_LT(chunk_idx, vec_len);
909   // Use live-words bitmap to find the first live word
910   offset_in_chunk_word = live_words_bitmap_->FindNthLiveWordOffset(chunk_idx, /*n*/ 0);
911   offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
912   DCHECK(live_words_bitmap_->Test(offset)) << "offset=" << offset
913                                            << " chunk_idx=" << chunk_idx
914                                            << " N=0"
915                                            << " offset_in_word=" << offset_in_chunk_word
916                                            << " word=" << std::hex
917                                            << live_words_bitmap_->GetWord(chunk_idx);
918   obj = moving_space_bitmap_->FindPrecedingObject(heap_begin + offset * kAlignment);
919   // TODO: add a check to validate the object.
920 
921   pre_compact_offset_moving_space_[to_space_page_idx] = offset;
922   first_objs_moving_space_[to_space_page_idx].Assign(obj);
923   to_space_page_idx++;
924 
925   uint32_t page_live_bytes = 0;
926   while (true) {
927     for (; page_live_bytes <= gPageSize; chunk_idx++) {
928       if (chunk_idx >= vec_len) {
929         moving_first_objs_count_ = to_space_page_idx;
930         return;
931       }
932       page_live_bytes += chunk_info_vec_[chunk_idx];
933     }
934     chunk_idx--;
935     page_live_bytes -= gPageSize;
936     DCHECK_LE(page_live_bytes, kOffsetChunkSize);
937     DCHECK_LE(page_live_bytes, chunk_info_vec_[chunk_idx])
938         << " chunk_idx=" << chunk_idx
939         << " to_space_page_idx=" << to_space_page_idx
940         << " vec_len=" << vec_len;
941     DCHECK(IsAligned<kAlignment>(chunk_info_vec_[chunk_idx] - page_live_bytes));
942     offset_in_chunk_word =
943             live_words_bitmap_->FindNthLiveWordOffset(
944                 chunk_idx, (chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment);
945     offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
946     DCHECK(live_words_bitmap_->Test(offset))
947         << "offset=" << offset
948         << " chunk_idx=" << chunk_idx
949         << " N=" << ((chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment)
950         << " offset_in_word=" << offset_in_chunk_word
951         << " word=" << std::hex << live_words_bitmap_->GetWord(chunk_idx);
952     // TODO: Can we optimize this for large objects? If we are continuing a
953     // large object that spans multiple pages, then we may be able to do without
954     // calling FindPrecedingObject().
955     //
956     // Find the object which encapsulates offset in it, which could be
957     // starting at offset itself.
958     obj = moving_space_bitmap_->FindPrecedingObject(heap_begin + offset * kAlignment);
959     // TODO: add a check to validate the object.
960     pre_compact_offset_moving_space_[to_space_page_idx] = offset;
961     first_objs_moving_space_[to_space_page_idx].Assign(obj);
962     to_space_page_idx++;
963     chunk_idx++;
964   }
965 }
966 
InitNonMovingFirstObjects(uintptr_t begin,uintptr_t end,accounting::ContinuousSpaceBitmap * bitmap,ObjReference * first_objs_arr)967 size_t MarkCompact::InitNonMovingFirstObjects(uintptr_t begin,
968                                               uintptr_t end,
969                                               accounting::ContinuousSpaceBitmap* bitmap,
970                                               ObjReference* first_objs_arr) {
971   mirror::Object* prev_obj;
972   size_t page_idx;
973   {
974     // Find first live object
975     mirror::Object* obj = nullptr;
976     bitmap->VisitMarkedRange</*kVisitOnce*/ true>(begin,
977                                                   end,
978                                                   [&obj] (mirror::Object* o) {
979                                                     obj = o;
980                                                   });
981     if (obj == nullptr) {
982       // There are no live objects in the space
983       return 0;
984     }
985     page_idx = DivideByPageSize(reinterpret_cast<uintptr_t>(obj) - begin);
986     first_objs_arr[page_idx++].Assign(obj);
987     prev_obj = obj;
988   }
989   // TODO: check obj is valid
990   uintptr_t prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
991                            + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
992   // For every page find the object starting from which we need to call
993   // VisitReferences. It could either be an object that started on some
994   // preceding page, or some object starting within this page.
995   begin = RoundDown(reinterpret_cast<uintptr_t>(prev_obj) + gPageSize, gPageSize);
996   while (begin < end) {
997     // Utilize, if any, large object that started in some preceding page, but
998     // overlaps with this page as well.
999     if (prev_obj != nullptr && prev_obj_end > begin) {
1000       DCHECK_LT(prev_obj, reinterpret_cast<mirror::Object*>(begin));
1001       first_objs_arr[page_idx].Assign(prev_obj);
1002     } else {
1003       prev_obj_end = 0;
1004       // It's sufficient to only search for previous object in the preceding page.
1005       // If no live object started in that page and some object had started in
1006       // the page preceding to that page, which was big enough to overlap with
1007       // the current page, then we wouldn't be in the else part.
1008       prev_obj = bitmap->FindPrecedingObject(begin, begin - gPageSize);
1009       if (prev_obj != nullptr) {
1010         prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
1011                         + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
1012       }
1013       if (prev_obj_end > begin) {
1014         first_objs_arr[page_idx].Assign(prev_obj);
1015       } else {
1016         // Find the first live object in this page
1017         bitmap->VisitMarkedRange</*kVisitOnce*/ true>(
1018             begin, begin + gPageSize, [first_objs_arr, page_idx](mirror::Object* obj) {
1019               first_objs_arr[page_idx].Assign(obj);
1020             });
1021       }
1022       // An empty entry indicates that the page has no live objects and hence
1023       // can be skipped.
1024     }
1025     begin += gPageSize;
1026     page_idx++;
1027   }
1028   return page_idx;
1029 }
1030 
1031 // Generational CMC description
1032 // ============================
1033 //
1034 // All allocations since last GC are considered to be in young generation.
1035 // Unlike other ART GCs, we promote surviving objects to old generation after
1036 // they survive two contiguous GCs. Objects that survive one GC are considered
1037 // to be in mid generation. In the next young GC, marking is performed on both
1038 // the young as well as mid gen objects. And then during compaction, the
1039 // surviving mid-gen objects are compacted and then promoted to old-gen, while
1040 // the surviving young gen objects are compacted and promoted to mid-gen.
1041 //
1042 // Some other important points worth explaining:
1043 //
1044 // 1. During marking-phase, 'mid_gen_end_' segregates young and mid generations.
1045 // Before starting compaction, in PrepareForCompaction(), we set it to the
1046 // corresponding post-compact addresses, aligned up to page-size. Therefore,
1047 // some object's beginning portion maybe in mid-gen, while the rest is in young-gen.
1048 // Aligning up is essential as mid_gen_end_ becomes old_gen_end_ at the end of
1049 // GC cycle, and the latter has to be page-aligned as old-gen pages are
1050 // processed differently (no compaction).
1051 //
1052 // 2. We need to maintain the mark-bitmap for the old-gen for subsequent GCs,
1053 // when objects are promoted to old-gen from mid-gen, their mark bits are
1054 // first collected in a BitVector and then later copied into mark-bitmap in
1055 // FinishPhase(). We can't directly set the bits in mark-bitmap as the bitmap
1056 // contains pre-compaction mark bits which are required during compaction.
1057 //
1058 // 3. Since we need to revisit mid-gen objects in the next GC cycle, we need to
1059 // dirty the cards in old-gen containing references to them. We identify these
1060 // references when visiting old-gen objects during compaction. However, native
1061 // roots are skipped at that time (they are updated separately in linear-alloc
1062 // space, where we don't know which object (dex-cache/class-loader/class) does
1063 // a native root belong to. Therefore, native roots are covered during marking
1064 // phase.
1065 
PrepareForCompaction()1066 bool MarkCompact::PrepareForCompaction() {
1067   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1068   size_t chunk_info_per_page = gPageSize / kOffsetChunkSize;
1069   size_t vector_len = (black_allocations_begin_ - moving_space_begin_) / kOffsetChunkSize;
1070   DCHECK_LE(vector_len, vector_length_);
1071   DCHECK_ALIGNED_PARAM(vector_length_, chunk_info_per_page);
1072   if (UNLIKELY(vector_len == 0)) {
1073     // Nothing to compact. Entire heap is empty.
1074     black_dense_end_ = mid_gen_end_ = moving_space_begin_;
1075     return false;
1076   }
1077   for (size_t i = 0; i < vector_len; i++) {
1078     DCHECK_LE(chunk_info_vec_[i], kOffsetChunkSize)
1079         << "i:" << i << " vector_length:" << vector_len << " vector_length_:" << vector_length_;
1080     DCHECK_EQ(chunk_info_vec_[i], live_words_bitmap_->LiveBytesInBitmapWord(i));
1081   }
1082 
1083   // TODO: We can do a lot of neat tricks with this offset vector to tune the
1084   // compaction as we wish. Originally, the compaction algorithm slides all
1085   // live objects towards the beginning of the heap. This is nice because it
1086   // keeps the spatial locality of objects intact.
1087   // However, sometimes it's desired to compact objects in certain portions
1088   // of the heap. For instance, it is expected that, over time,
1089   // objects towards the beginning of the heap are long lived and are always
1090   // densely packed. In this case, it makes sense to only update references in
1091   // there and not try to compact it.
1092   // Furthermore, we might have some large objects and may not want to move such
1093   // objects.
1094   // We can adjust, without too much effort, the values in the chunk_info_vec_ such
1095   // that the objects in the dense beginning area aren't moved. OTOH, large
1096   // objects, which could be anywhere in the heap, could also be kept from
1097   // moving by using a similar trick. The only issue is that by doing this we will
1098   // leave an unused hole in the middle of the heap which can't be used for
1099   // allocations until we do a *full* compaction.
1100   //
1101   // At this point every element in the chunk_info_vec_ contains the live-bytes
1102   // of the corresponding chunk. For old-to-new address computation we need
1103   // every element to reflect total live-bytes till the corresponding chunk.
1104 
1105   size_t black_dense_idx = 0;
1106   GcCause gc_cause = GetCurrentIteration()->GetGcCause();
1107   if (young_gen_) {
1108     DCHECK_ALIGNED_PARAM(old_gen_end_, gPageSize);
1109     DCHECK_GE(mid_gen_end_, old_gen_end_);
1110     DCHECK_GE(black_allocations_begin_, mid_gen_end_);
1111     // old-gen's boundary was decided at the end of previous GC-cycle.
1112     black_dense_idx = (old_gen_end_ - moving_space_begin_) / kOffsetChunkSize;
1113     if (black_dense_idx == vector_len) {
1114       // There is nothing live in young-gen.
1115       DCHECK_EQ(old_gen_end_, black_allocations_begin_);
1116       mid_gen_end_ = black_allocations_begin_;
1117       return false;
1118     }
1119     InitNonMovingFirstObjects(reinterpret_cast<uintptr_t>(moving_space_begin_),
1120                               reinterpret_cast<uintptr_t>(old_gen_end_),
1121                               moving_space_bitmap_,
1122                               first_objs_moving_space_);
1123   } else if (gc_cause != kGcCauseExplicit && gc_cause != kGcCauseCollectorTransition &&
1124              !GetCurrentIteration()->GetClearSoftReferences()) {
1125     uint64_t live_bytes = 0, total_bytes = 0;
1126     size_t aligned_vec_len = RoundUp(vector_len, chunk_info_per_page);
1127     size_t num_pages = aligned_vec_len / chunk_info_per_page;
1128     size_t threshold_passing_marker = 0;  // In number of pages
1129     std::vector<uint32_t> pages_live_bytes;
1130     pages_live_bytes.reserve(num_pages);
1131     // Identify the largest chunk towards the beginning of moving space which
1132     // passes the black-dense threshold.
1133     for (size_t i = 0; i < aligned_vec_len; i += chunk_info_per_page) {
1134       uint32_t page_live_bytes = 0;
1135       for (size_t j = 0; j < chunk_info_per_page; j++) {
1136         page_live_bytes += chunk_info_vec_[i + j];
1137         total_bytes += kOffsetChunkSize;
1138       }
1139       live_bytes += page_live_bytes;
1140       pages_live_bytes.push_back(page_live_bytes);
1141       if (live_bytes * 100U >= total_bytes * kBlackDenseRegionThreshold) {
1142         threshold_passing_marker = pages_live_bytes.size();
1143       }
1144     }
1145     DCHECK_EQ(pages_live_bytes.size(), num_pages);
1146     // Eliminate the pages at the end of the chunk which are lower than the threshold.
1147     if (threshold_passing_marker > 0) {
1148       auto iter = std::find_if(
1149           pages_live_bytes.rbegin() + (num_pages - threshold_passing_marker),
1150           pages_live_bytes.rend(),
1151           [](uint32_t bytes) { return bytes * 100U >= gPageSize * kBlackDenseRegionThreshold; });
1152       black_dense_idx = (pages_live_bytes.rend() - iter) * chunk_info_per_page;
1153     }
1154     black_dense_end_ = moving_space_begin_ + black_dense_idx * kOffsetChunkSize;
1155     DCHECK_ALIGNED_PARAM(black_dense_end_, gPageSize);
1156 
1157     // Adjust for class allocated after black_dense_end_ while its object(s)
1158     // are earlier. This is required as we update the references in the
1159     // black-dense region in-place. And if the class pointer of some first
1160     // object for a page, which started in some preceding page, is already
1161     // updated, then we will read wrong class data like ref-offset bitmap.
1162     for (auto iter = class_after_obj_map_.rbegin();
1163          iter != class_after_obj_map_.rend() &&
1164          reinterpret_cast<uint8_t*>(iter->first.AsMirrorPtr()) >= black_dense_end_;
1165          iter++) {
1166       black_dense_end_ =
1167           std::min(black_dense_end_, reinterpret_cast<uint8_t*>(iter->second.AsMirrorPtr()));
1168       black_dense_end_ = AlignDown(black_dense_end_, gPageSize);
1169     }
1170     black_dense_idx = (black_dense_end_ - moving_space_begin_) / kOffsetChunkSize;
1171     DCHECK_LE(black_dense_idx, vector_len);
1172     if (black_dense_idx == vector_len) {
1173       // There is nothing to compact. All the in-use pages are completely full.
1174       mid_gen_end_ = black_allocations_begin_;
1175       return false;
1176     }
1177     InitNonMovingFirstObjects(reinterpret_cast<uintptr_t>(moving_space_begin_),
1178                               reinterpret_cast<uintptr_t>(black_dense_end_),
1179                               moving_space_bitmap_,
1180                               first_objs_moving_space_);
1181   } else {
1182     black_dense_end_ = moving_space_begin_;
1183   }
1184 
1185   InitMovingSpaceFirstObjects(vector_len, black_dense_idx / chunk_info_per_page);
1186   non_moving_first_objs_count_ =
1187       InitNonMovingFirstObjects(reinterpret_cast<uintptr_t>(non_moving_space_->Begin()),
1188                                 reinterpret_cast<uintptr_t>(non_moving_space_->End()),
1189                                 non_moving_space_bitmap_,
1190                                 first_objs_non_moving_space_);
1191   // Update the vector one past the heap usage as it is required for black
1192   // allocated objects' post-compact address computation.
1193   uint32_t total_bytes;
1194   if (vector_len < vector_length_) {
1195     vector_len++;
1196     total_bytes = 0;
1197   } else {
1198     // Fetch the value stored in the last element before it gets overwritten by
1199     // std::exclusive_scan().
1200     total_bytes = chunk_info_vec_[vector_len - 1];
1201   }
1202   std::exclusive_scan(chunk_info_vec_ + black_dense_idx,
1203                       chunk_info_vec_ + vector_len,
1204                       chunk_info_vec_ + black_dense_idx,
1205                       black_dense_idx * kOffsetChunkSize);
1206   total_bytes += chunk_info_vec_[vector_len - 1];
1207   post_compact_end_ = AlignUp(moving_space_begin_ + total_bytes, gPageSize);
1208   CHECK_EQ(post_compact_end_, moving_space_begin_ + moving_first_objs_count_ * gPageSize)
1209       << "moving_first_objs_count_:" << moving_first_objs_count_
1210       << " black_dense_idx:" << black_dense_idx << " vector_len:" << vector_len
1211       << " total_bytes:" << total_bytes
1212       << " black_dense_end:" << reinterpret_cast<void*>(black_dense_end_)
1213       << " chunk_info_per_page:" << chunk_info_per_page;
1214   black_objs_slide_diff_ = black_allocations_begin_ - post_compact_end_;
1215   // We shouldn't be consuming more space after compaction than pre-compaction.
1216   CHECK_GE(black_objs_slide_diff_, 0);
1217   for (size_t i = vector_len; i < vector_length_; i++) {
1218     DCHECK_EQ(chunk_info_vec_[i], 0u);
1219   }
1220   if (black_objs_slide_diff_ == 0) {
1221     // Regardless of the gc-type, there are no pages to be compacted. Ensure
1222     // that we don't shrink the mid-gen, which will become old-gen in
1223     // FinishPhase(), thereby possibly moving some objects back to young-gen,
1224     // which can cause memory corruption due to missing card marks.
1225     mid_gen_end_ = std::max(mid_gen_end_, black_dense_end_);
1226     mid_gen_end_ = std::min(mid_gen_end_, post_compact_end_);
1227     return false;
1228   }
1229   if (use_generational_) {
1230     // Current value of mid_gen_end_ represents end of 'pre-compacted' mid-gen,
1231     // which was done at the end of previous GC. Compute, 'post-compacted' end of
1232     // mid-gen, which will be consumed by old-gen at the end of this GC cycle.
1233     DCHECK_NE(mid_gen_end_, nullptr);
1234     mirror::Object* first_obj = nullptr;
1235     if (mid_gen_end_ < black_allocations_begin_) {
1236       ReaderMutexLock rmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1237       // Find the first live object in the young-gen.
1238       moving_space_bitmap_->VisitMarkedRange</*kVisitOnce=*/true>(
1239           reinterpret_cast<uintptr_t>(mid_gen_end_),
1240           reinterpret_cast<uintptr_t>(black_allocations_begin_),
1241           [&first_obj](mirror::Object* obj) { first_obj = obj; });
1242     }
1243     if (first_obj != nullptr) {
1244       mirror::Object* compacted_obj;
1245       if (reinterpret_cast<uint8_t*>(first_obj) >= old_gen_end_) {
1246         // post-compact address of the first live object in young-gen.
1247         compacted_obj = PostCompactOldObjAddr(first_obj);
1248         DCHECK_LT(reinterpret_cast<uint8_t*>(compacted_obj), post_compact_end_);
1249       } else {
1250         DCHECK(!young_gen_);
1251         compacted_obj = first_obj;
1252       }
1253       // It's important to page-align mid-gen boundary. However, that means
1254       // there could be an object overlapping that boundary. We will deal with
1255       // the consequences of that at different places. Aligning up is important
1256       // to ensure that we don't de-promote an object from old-gen back to
1257       // young-gen. Otherwise, we may skip dirtying card for such an object if
1258       // it contains native-roots to young-gen.
1259       mid_gen_end_ = AlignUp(reinterpret_cast<uint8_t*>(compacted_obj), gPageSize);
1260       // We need to ensure that for any object in old-gen, its class is also in
1261       // there (for the same reason as mentioned above in the black-dense case).
1262       // So adjust mid_gen_end_ accordingly, in the worst case all the way up
1263       // to post_compact_end_.
1264       auto iter = class_after_obj_map_.lower_bound(ObjReference::FromMirrorPtr(first_obj));
1265       for (; iter != class_after_obj_map_.end(); iter++) {
1266         // 'mid_gen_end_' is now post-compact, so need to compare with
1267         // post-compact addresses.
1268         compacted_obj =
1269             PostCompactAddress(iter->second.AsMirrorPtr(), old_gen_end_, moving_space_end_);
1270         // We cannot update the map with post-compact addresses yet as compaction-phase
1271         // expects pre-compacted addresses. So we will update in FinishPhase().
1272         if (reinterpret_cast<uint8_t*>(compacted_obj) < mid_gen_end_) {
1273           mirror::Object* klass = iter->first.AsMirrorPtr();
1274           DCHECK_LT(reinterpret_cast<uint8_t*>(klass), black_allocations_begin_);
1275           klass = PostCompactAddress(klass, old_gen_end_, moving_space_end_);
1276           // We only need to make sure that the class object doesn't move during
1277           // compaction, which can be ensured by just making its first word be
1278           // consumed in to the old-gen.
1279           mid_gen_end_ =
1280               std::max(mid_gen_end_, reinterpret_cast<uint8_t*>(klass) + kObjectAlignment);
1281           mid_gen_end_ = AlignUp(mid_gen_end_, gPageSize);
1282         }
1283       }
1284       CHECK_LE(mid_gen_end_, post_compact_end_);
1285     } else {
1286       // Young-gen is empty.
1287       mid_gen_end_ = post_compact_end_;
1288     }
1289     DCHECK_LE(mid_gen_end_, post_compact_end_);
1290     // We need this temporary bitmap only when running in generational mode.
1291     if (old_gen_end_ < mid_gen_end_) {
1292       mid_to_old_promo_bit_vec_.reset(
1293           new BitVector((mid_gen_end_ - old_gen_end_) / kObjectAlignment,
1294                         /*expandable=*/false,
1295                         Allocator::GetCallocAllocator()));
1296     }
1297   }
1298   // How do we handle compaction of heap portion used for allocations after the
1299   // marking-pause?
1300   // All allocations after the marking-pause are considered black (reachable)
1301   // for this GC cycle. However, they need not be allocated contiguously as
1302   // different mutators use TLABs. So we will compact the heap till the point
1303   // where allocations took place before the marking-pause. And everything after
1304   // that will be slid with TLAB holes, and then TLAB info in TLS will be
1305   // appropriately updated in the pre-compaction pause.
1306   // The chunk-info vector entries for the post marking-pause allocations will be
1307   // also updated in the pre-compaction pause.
1308 
1309   if (!uffd_initialized_) {
1310     CreateUserfaultfd(/*post_fork=*/false);
1311   }
1312   return true;
1313 }
1314 
1315 class MarkCompact::VerifyRootMarkedVisitor : public SingleRootVisitor {
1316  public:
VerifyRootMarkedVisitor(MarkCompact * collector)1317   explicit VerifyRootMarkedVisitor(MarkCompact* collector) : collector_(collector) { }
1318 
VisitRoot(mirror::Object * root,const RootInfo & info)1319   void VisitRoot(mirror::Object* root, const RootInfo& info) override
1320       REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
1321     CHECK(collector_->IsMarked(root) != nullptr) << info.ToString();
1322   }
1323 
1324  private:
1325   MarkCompact* const collector_;
1326 };
1327 
ReMarkRoots(Runtime * runtime)1328 void MarkCompact::ReMarkRoots(Runtime* runtime) {
1329   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1330   DCHECK_EQ(thread_running_gc_, Thread::Current());
1331   Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
1332   MarkNonThreadRoots(runtime);
1333   MarkConcurrentRoots(
1334       static_cast<VisitRootFlags>(kVisitRootFlagNewRoots | kVisitRootFlagStopLoggingNewRoots |
1335                                   kVisitRootFlagClearRootLog),
1336       runtime);
1337   if (kVerifyRootsMarked) {
1338     TimingLogger::ScopedTiming t2("(Paused)VerifyRoots", GetTimings());
1339     VerifyRootMarkedVisitor visitor(this);
1340     runtime->VisitRoots(&visitor);
1341   }
1342 }
1343 
MarkingPause()1344 void MarkCompact::MarkingPause() {
1345   TimingLogger::ScopedTiming t("(Paused)MarkingPause", GetTimings());
1346   Runtime* runtime = Runtime::Current();
1347   Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
1348   {
1349     // Handle the dirty objects as we are a concurrent GC
1350     WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1351     {
1352       MutexLock mu2(thread_running_gc_, *Locks::runtime_shutdown_lock_);
1353       MutexLock mu3(thread_running_gc_, *Locks::thread_list_lock_);
1354       std::list<Thread*> thread_list = runtime->GetThreadList()->GetList();
1355       for (Thread* thread : thread_list) {
1356         thread->VisitRoots(this, static_cast<VisitRootFlags>(0));
1357         DCHECK_EQ(thread->GetThreadLocalGcBuffer(), nullptr);
1358         // Need to revoke all the thread-local allocation stacks since we will
1359         // swap the allocation stacks (below) and don't want anybody to allocate
1360         // into the live stack.
1361         thread->RevokeThreadLocalAllocationStack();
1362         bump_pointer_space_->RevokeThreadLocalBuffers(thread);
1363       }
1364     }
1365     ProcessMarkStack();
1366     // Fetch only the accumulated objects-allocated count as it is guaranteed to
1367     // be up-to-date after the TLAB revocation above.
1368     freed_objects_ += bump_pointer_space_->GetAccumulatedObjectsAllocated();
1369     // Capture 'end' of moving-space at this point. Every allocation beyond this
1370     // point will be considered as black.
1371     // Align-up to page boundary so that black allocations happen from next page
1372     // onwards. Also, it ensures that 'end' is aligned for card-table's
1373     // ClearCardRange().
1374     black_allocations_begin_ = bump_pointer_space_->AlignEnd(thread_running_gc_, gPageSize, heap_);
1375     DCHECK_ALIGNED_PARAM(black_allocations_begin_, gPageSize);
1376 
1377     // Re-mark root set. Doesn't include thread-roots as they are already marked
1378     // above.
1379     ReMarkRoots(runtime);
1380     // Scan dirty objects.
1381     RecursiveMarkDirtyObjects(/*paused*/ true, accounting::CardTable::kCardDirty);
1382     {
1383       TimingLogger::ScopedTiming t2("SwapStacks", GetTimings());
1384       heap_->SwapStacks();
1385       live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
1386     }
1387   }
1388   // TODO: For PreSweepingGcVerification(), find correct strategy to visit/walk
1389   // objects in bump-pointer space when we have a mark-bitmap to indicate live
1390   // objects. At the same time we also need to be able to visit black allocations,
1391   // even though they are not marked in the bitmap. Without both of these we fail
1392   // pre-sweeping verification. As well as we leave windows open wherein a
1393   // VisitObjects/Walk on the space would either miss some objects or visit
1394   // unreachable ones. These windows are when we are switching from shared
1395   // mutator-lock to exclusive and vice-versa starting from here till compaction pause.
1396   // heap_->PreSweepingGcVerification(this);
1397 
1398   // Disallow new system weaks to prevent a race which occurs when someone adds
1399   // a new system weak before we sweep them. Since this new system weak may not
1400   // be marked, the GC may incorrectly sweep it. This also fixes a race where
1401   // interning may attempt to return a strong reference to a string that is
1402   // about to be swept.
1403   runtime->DisallowNewSystemWeaks();
1404   // Enable the reference processing slow path, needs to be done with mutators
1405   // paused since there is no lock in the GetReferent fast path.
1406   heap_->GetReferenceProcessor()->EnableSlowPath();
1407   marking_done_ = true;
1408 }
1409 
SweepSystemWeaks(Thread * self,Runtime * runtime,const bool paused)1410 void MarkCompact::SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused) {
1411   TimingLogger::ScopedTiming t(paused ? "(Paused)SweepSystemWeaks" : "SweepSystemWeaks",
1412                                GetTimings());
1413   ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
1414   runtime->SweepSystemWeaks(this);
1415 }
1416 
ProcessReferences(Thread * self)1417 void MarkCompact::ProcessReferences(Thread* self) {
1418   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
1419   GetHeap()->GetReferenceProcessor()->ProcessReferences(self, GetTimings());
1420 }
1421 
SweepArray(accounting::ObjectStack * obj_arr,bool swap_bitmaps)1422 void MarkCompact::SweepArray(accounting::ObjectStack* obj_arr, bool swap_bitmaps) {
1423   TimingLogger::ScopedTiming t("SweepArray", GetTimings());
1424   std::vector<space::ContinuousSpace*> sweep_spaces;
1425   for (space::ContinuousSpace* space : heap_->GetContinuousSpaces()) {
1426     if (!space->IsAllocSpace() || space == bump_pointer_space_ ||
1427         immune_spaces_.ContainsSpace(space) || space->GetLiveBitmap() == nullptr) {
1428       continue;
1429     }
1430     sweep_spaces.push_back(space);
1431   }
1432   GarbageCollector::SweepArray(obj_arr, swap_bitmaps, &sweep_spaces);
1433 }
1434 
Sweep(bool swap_bitmaps)1435 void MarkCompact::Sweep(bool swap_bitmaps) {
1436   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1437   if (young_gen_) {
1438     // Only sweep objects on the live stack.
1439     SweepArray(heap_->GetLiveStack(), /*swap_bitmaps=*/false);
1440   } else {
1441     // Ensure that nobody inserted objects in the live stack after we swapped the
1442     // stacks.
1443     CHECK_GE(live_stack_freeze_size_, GetHeap()->GetLiveStack()->Size());
1444     {
1445       TimingLogger::ScopedTiming t2("MarkAllocStackAsLive", GetTimings());
1446       // Mark everything allocated since the last GC as live so that we can sweep
1447       // concurrently, knowing that new allocations won't be marked as live.
1448       accounting::ObjectStack* live_stack = heap_->GetLiveStack();
1449       heap_->MarkAllocStackAsLive(live_stack);
1450       live_stack->Reset();
1451       DCHECK(mark_stack_->IsEmpty());
1452     }
1453     for (const auto& space : GetHeap()->GetContinuousSpaces()) {
1454       if (space->IsContinuousMemMapAllocSpace() && space != bump_pointer_space_ &&
1455           !immune_spaces_.ContainsSpace(space)) {
1456         space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
1457         DCHECK(!alloc_space->IsZygoteSpace());
1458         TimingLogger::ScopedTiming split("SweepMallocSpace", GetTimings());
1459         RecordFree(alloc_space->Sweep(swap_bitmaps));
1460       }
1461     }
1462     SweepLargeObjects(swap_bitmaps);
1463   }
1464 }
1465 
SweepLargeObjects(bool swap_bitmaps)1466 void MarkCompact::SweepLargeObjects(bool swap_bitmaps) {
1467   space::LargeObjectSpace* los = heap_->GetLargeObjectsSpace();
1468   if (los != nullptr) {
1469     TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
1470     RecordFreeLOS(los->Sweep(swap_bitmaps));
1471   }
1472 }
1473 
ReclaimPhase()1474 void MarkCompact::ReclaimPhase() {
1475   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1476   DCHECK(thread_running_gc_ == Thread::Current());
1477   Runtime* const runtime = Runtime::Current();
1478   // Process the references concurrently.
1479   ProcessReferences(thread_running_gc_);
1480   // TODO: Try to merge this system-weak sweeping with the one while updating
1481   // references during the compaction pause.
1482   SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ false);
1483   runtime->AllowNewSystemWeaks();
1484   // Clean up class loaders after system weaks are swept since that is how we know if class
1485   // unloading occurred.
1486   runtime->GetClassLinker()->CleanupClassLoaders();
1487   {
1488     WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1489     // Reclaim unmarked objects.
1490     Sweep(false);
1491     // Swap the live and mark bitmaps for each space which we modified space. This is an
1492     // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
1493     // bitmaps.
1494     SwapBitmaps();
1495     // Unbind the live and mark bitmaps.
1496     GetHeap()->UnBindBitmaps();
1497   }
1498   // After sweeping and unbinding, we will need to use non-moving space'
1499   // live-bitmap, instead of mark-bitmap.
1500   non_moving_space_bitmap_ = non_moving_space_->GetLiveBitmap();
1501 }
1502 
1503 // We want to avoid checking for every reference if it's within the page or
1504 // not. This can be done if we know where in the page the holder object lies.
1505 // If it doesn't overlap either boundaries then we can skip the checks.
1506 //
1507 // If kDirtyOldToMid = true, then check if the object contains any references
1508 // into young-gen, which will be mid-gen after this GC. This is required
1509 // as we mark and compact mid-gen again in next GC-cycle, and hence cards
1510 // need to be dirtied. Note that even black-allocations (the next young-gen)
1511 // will also have to be checked because the pages are being compacted and hence
1512 // the card corresponding to the compacted page needs to be dirtied.
1513 template <bool kCheckBegin, bool kCheckEnd, bool kDirtyOldToMid>
1514 class MarkCompact::RefsUpdateVisitor {
1515  public:
RefsUpdateVisitor(MarkCompact * collector,mirror::Object * obj,uint8_t * begin,uint8_t * end,accounting::CardTable * card_table=nullptr,mirror::Object * card_obj=nullptr)1516   RefsUpdateVisitor(MarkCompact* collector,
1517                     mirror::Object* obj,
1518                     uint8_t* begin,
1519                     uint8_t* end,
1520                     accounting::CardTable* card_table = nullptr,
1521                     mirror::Object* card_obj = nullptr)
1522       : RefsUpdateVisitor(collector, obj, begin, end, false) {
1523     DCHECK(!kCheckBegin || begin != nullptr);
1524     DCHECK(!kCheckEnd || end != nullptr);
1525     // We can skip checking each reference for objects whose cards are already dirty.
1526     if (kDirtyOldToMid && card_obj != nullptr) {
1527       dirty_card_ = card_table->IsDirty(card_obj);
1528     }
1529   }
1530 
RefsUpdateVisitor(MarkCompact * collector,mirror::Object * obj,uint8_t * begin,uint8_t * end,bool dirty_card)1531   RefsUpdateVisitor(
1532       MarkCompact* collector, mirror::Object* obj, uint8_t* begin, uint8_t* end, bool dirty_card)
1533       : collector_(collector),
1534         moving_space_begin_(collector->black_dense_end_),
1535         moving_space_end_(collector->moving_space_end_),
1536         young_gen_begin_(collector->mid_gen_end_),
1537         obj_(obj),
1538         begin_(begin),
1539         end_(end),
1540         dirty_card_(dirty_card) {}
1541 
ShouldDirtyCard() const1542   bool ShouldDirtyCard() const { return dirty_card_; }
1543 
operator ()(mirror::Object * old,MemberOffset offset,bool is_static) const1544   void operator()([[maybe_unused]] mirror::Object* old,
1545                   MemberOffset offset,
1546                   [[maybe_unused]] bool is_static) const ALWAYS_INLINE
1547       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
1548     bool update = true;
1549     if (kCheckBegin || kCheckEnd) {
1550       uint8_t* ref = reinterpret_cast<uint8_t*>(obj_) + offset.Int32Value();
1551       update = (!kCheckBegin || ref >= begin_) && (!kCheckEnd || ref < end_);
1552     }
1553     if (update) {
1554       mirror::Object* new_ref =
1555           collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_);
1556       CheckShouldDirtyCard(new_ref);
1557     }
1558   }
1559 
1560   // For object arrays we don't need to check boundaries here as it's done in
1561   // VisitReferenes().
1562   // TODO: Optimize reference updating using SIMD instructions. Object arrays
1563   // are perfect as all references are tightly packed.
operator ()(mirror::Object * old,MemberOffset offset,bool is_static,bool is_obj_array) const1564   void operator()([[maybe_unused]] mirror::Object* old,
1565                   MemberOffset offset,
1566                   [[maybe_unused]] bool is_static,
1567                   [[maybe_unused]] bool is_obj_array) const ALWAYS_INLINE
1568       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
1569     mirror::Object* new_ref =
1570         collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_);
1571     CheckShouldDirtyCard(new_ref);
1572   }
1573 
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const1574   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
1575       ALWAYS_INLINE
1576       REQUIRES_SHARED(Locks::mutator_lock_) {
1577     if (!root->IsNull()) {
1578       VisitRoot(root);
1579     }
1580   }
1581 
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const1582   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
1583       ALWAYS_INLINE
1584       REQUIRES_SHARED(Locks::mutator_lock_) {
1585     mirror::Object* new_ref = collector_->UpdateRoot(root, moving_space_begin_, moving_space_end_);
1586     CheckShouldDirtyCard(new_ref);
1587   }
1588 
1589  private:
CheckShouldDirtyCard(mirror::Object * ref) const1590   inline void CheckShouldDirtyCard(mirror::Object* ref) const {
1591     if (kDirtyOldToMid && !dirty_card_) {
1592       // moving_space_end_ is young-gen's end.
1593       dirty_card_ = reinterpret_cast<uint8_t*>(ref) >= young_gen_begin_ &&
1594                     reinterpret_cast<uint8_t*>(ref) < moving_space_end_;
1595     }
1596   }
1597 
1598   MarkCompact* const collector_;
1599   uint8_t* const moving_space_begin_;
1600   uint8_t* const moving_space_end_;
1601   uint8_t* const young_gen_begin_;
1602   mirror::Object* const obj_;
1603   uint8_t* const begin_;
1604   uint8_t* const end_;
1605   mutable bool dirty_card_;
1606 };
1607 
SetBitForMidToOldPromotion(uint8_t * obj)1608 inline void MarkCompact::SetBitForMidToOldPromotion(uint8_t* obj) {
1609   DCHECK(use_generational_);
1610   DCHECK_GE(obj, old_gen_end_);
1611   DCHECK_LT(obj, mid_gen_end_);
1612   // This doesn't need to be atomic as every thread only sets bits in the
1613   // bit_vector words corresponding to the page it is compacting.
1614   mid_to_old_promo_bit_vec_->SetBit((obj - old_gen_end_) / kObjectAlignment);
1615 }
1616 
IsValidObject(mirror::Object * obj) const1617 bool MarkCompact::IsValidObject(mirror::Object* obj) const {
1618   mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
1619   if (!heap_->GetVerification()->IsValidHeapObjectAddress(klass)) {
1620     return false;
1621   }
1622   return heap_->GetVerification()->IsValidClassUnchecked<kWithFromSpaceBarrier>(
1623           obj->GetClass<kVerifyNone, kWithFromSpaceBarrier>());
1624 }
1625 
1626 template <typename Callback>
VerifyObject(mirror::Object * ref,Callback & callback) const1627 void MarkCompact::VerifyObject(mirror::Object* ref, Callback& callback) const {
1628   if (kIsDebugBuild) {
1629     mirror::Class* klass = ref->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1630     mirror::Class* pre_compact_klass = ref->GetClass<kVerifyNone, kWithoutReadBarrier>();
1631     mirror::Class* klass_klass = klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1632     mirror::Class* klass_klass_klass = klass_klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1633     if (HasAddress(pre_compact_klass) &&
1634         reinterpret_cast<uint8_t*>(pre_compact_klass) < black_allocations_begin_) {
1635       CHECK(moving_space_bitmap_->Test(pre_compact_klass))
1636           << "ref=" << ref
1637           << " post_compact_end=" << static_cast<void*>(post_compact_end_)
1638           << " pre_compact_klass=" << pre_compact_klass
1639           << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
1640       if (!young_gen_) {
1641         CHECK(live_words_bitmap_->Test(pre_compact_klass));
1642       }
1643     }
1644     if (!IsValidObject(ref)) {
1645       std::ostringstream oss;
1646       oss << "Invalid object: "
1647           << "ref=" << ref
1648           << " klass=" << klass
1649           << " klass_klass=" << klass_klass
1650           << " klass_klass_klass=" << klass_klass_klass
1651           << " pre_compact_klass=" << pre_compact_klass
1652           << " from_space_begin=" << static_cast<void*>(from_space_begin_)
1653           << " pre_compact_begin=" << static_cast<void*>(bump_pointer_space_->Begin())
1654           << " post_compact_end=" << static_cast<void*>(post_compact_end_)
1655           << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
1656 
1657       // Call callback before dumping larger data like RAM and space dumps.
1658       callback(oss);
1659 
1660       oss << " \nobject="
1661           << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(ref), 128)
1662           << " \nklass(from)="
1663           << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(klass), 128)
1664           << "spaces:\n";
1665       heap_->DumpSpaces(oss);
1666       LOG(FATAL) << oss.str();
1667     }
1668   }
1669 }
1670 
1671 template <bool kSetupForGenerational>
CompactPage(mirror::Object * obj,uint32_t offset,uint8_t * addr,uint8_t * to_space_addr,bool needs_memset_zero)1672 void MarkCompact::CompactPage(mirror::Object* obj,
1673                               uint32_t offset,
1674                               uint8_t* addr,
1675                               uint8_t* to_space_addr,
1676                               bool needs_memset_zero) {
1677   DCHECK_ALIGNED_PARAM(to_space_addr, gPageSize);
1678   DCHECK(moving_space_bitmap_->Test(obj)
1679          && live_words_bitmap_->Test(obj));
1680   DCHECK(live_words_bitmap_->Test(offset)) << "obj=" << obj
1681                                            << " offset=" << offset
1682                                            << " addr=" << static_cast<void*>(addr)
1683                                            << " black_allocs_begin="
1684                                            << static_cast<void*>(black_allocations_begin_)
1685                                            << " post_compact_addr="
1686                                            << static_cast<void*>(post_compact_end_);
1687   accounting::CardTable* card_table = heap_->GetCardTable();
1688   uint8_t* const start_addr = addr;
1689   // We need to find the cards in the mid-gen (which is going to be consumed
1690   // into old-gen after this GC) for dirty cards (dirtied after marking-pause and
1691   // until compaction pause) and dirty the corresponding post-compact cards. We
1692   // could have found reference fields while updating them in RefsUpdateVisitor.
1693   // But it will not catch native-roots and hence we need to directly look at the
1694   // pre-compact card-table.
1695   // NOTE: we may get some false-positives if the same address in post-compact
1696   // heap is already allocated as TLAB and has been having write-barrers be
1697   // called. But that is not harmful.
1698   size_t cards_per_page = gPageSize >> accounting::CardTable::kCardShift;
1699   size_t dest_cards = 0;
1700   DCHECK(IsAligned<accounting::CardTable::kCardSize>(gPageSize));
1701   static_assert(sizeof(dest_cards) * kBitsPerByte >=
1702                 kMaxPageSize / accounting::CardTable::kCardSize);
1703   // How many distinct live-strides do we have.
1704   size_t stride_count = 0;
1705   uint8_t* last_stride = addr;
1706   uint32_t last_stride_begin = 0;
1707   auto verify_obj_callback = [&](std::ostream& os) {
1708     os << " stride_count=" << stride_count << " last_stride=" << static_cast<void*>(last_stride)
1709        << " offset=" << offset << " start_addr=" << static_cast<void*>(start_addr);
1710   };
1711   live_words_bitmap_->VisitLiveStrides(
1712       offset,
1713       black_allocations_begin_,
1714       gPageSize,
1715       [&](uint32_t stride_begin, size_t stride_size, [[maybe_unused]] bool is_last)
1716           REQUIRES_SHARED(Locks::mutator_lock_) {
1717             size_t stride_in_bytes = stride_size * kAlignment;
1718             size_t stride_begin_bytes = stride_begin * kAlignment;
1719             DCHECK_LE(stride_in_bytes, gPageSize);
1720             last_stride_begin = stride_begin;
1721             DCHECK(IsAligned<kAlignment>(addr));
1722             memcpy(addr, from_space_begin_ + stride_begin_bytes, stride_in_bytes);
1723             if (kIsDebugBuild) {
1724               uint8_t* space_begin = bump_pointer_space_->Begin();
1725               // We can interpret the first word of the stride as an
1726               // obj only from second stride onwards, as the first
1727               // stride's first-object may have started on previous
1728               // page. The only exception is the first page of the
1729               // moving space.
1730               if (stride_count > 0 || stride_begin * kAlignment < gPageSize) {
1731                 mirror::Object* o =
1732                     reinterpret_cast<mirror::Object*>(space_begin + stride_begin * kAlignment);
1733                 CHECK(live_words_bitmap_->Test(o)) << "ref=" << o;
1734                 CHECK(moving_space_bitmap_->Test(o))
1735                     << "ref=" << o << " bitmap: " << moving_space_bitmap_->DumpMemAround(o);
1736                 VerifyObject(reinterpret_cast<mirror::Object*>(addr), verify_obj_callback);
1737               }
1738             }
1739             last_stride = addr;
1740             stride_count++;
1741             if (kSetupForGenerational) {
1742               // Card idx within the gPageSize sized destination page.
1743               size_t dest_card_idx = (addr - start_addr) >> accounting::CardTable::kCardShift;
1744               DCHECK_LT(dest_card_idx, cards_per_page);
1745               // Bytes remaining to fill in the current dest card.
1746               size_t dest_bytes_remaining = accounting::CardTable::kCardSize -
1747                                             (addr - start_addr) % accounting::CardTable::kCardSize;
1748               // Update 'addr' for next stride before starting to modify
1749               // 'stride_in_bytes' in the loops below.
1750               addr += stride_in_bytes;
1751               // Unconsumed bytes in the current src card.
1752               size_t src_card_bytes = accounting::CardTable::kCardSize -
1753                                       stride_begin_bytes % accounting::CardTable::kCardSize;
1754               src_card_bytes = std::min(src_card_bytes, stride_in_bytes);
1755               uint8_t* end_card = card_table->CardFromAddr(
1756                   moving_space_begin_ + stride_begin_bytes + stride_in_bytes - 1);
1757               for (uint8_t* card =
1758                        card_table->CardFromAddr(moving_space_begin_ + stride_begin_bytes);
1759                    card <= end_card;
1760                    card++) {
1761                 if (*card == accounting::CardTable::kCardDirty) {
1762                   // If the current src card will contribute to the next dest
1763                   // card as well, then dirty the next one too.
1764                   size_t val = dest_bytes_remaining < src_card_bytes ? 3 : 1;
1765                   dest_cards |= val << dest_card_idx;
1766                 }
1767                 // Adjust destination card and its remaining bytes for next iteration.
1768                 if (dest_bytes_remaining <= src_card_bytes) {
1769                   dest_bytes_remaining =
1770                       accounting::CardTable::kCardSize - (src_card_bytes - dest_bytes_remaining);
1771                   dest_card_idx++;
1772                 } else {
1773                   dest_bytes_remaining -= src_card_bytes;
1774                 }
1775                 DCHECK_LE(dest_card_idx, cards_per_page);
1776                 stride_in_bytes -= src_card_bytes;
1777                 src_card_bytes = std::min(accounting::CardTable::kCardSize, stride_in_bytes);
1778               }
1779             } else {
1780               addr += stride_in_bytes;
1781             }
1782           });
1783   DCHECK_LT(last_stride, start_addr + gPageSize);
1784   DCHECK_GT(stride_count, 0u);
1785   size_t obj_size = 0;
1786   uint32_t offset_within_obj =
1787       offset * kAlignment - (reinterpret_cast<uint8_t*>(obj) - moving_space_begin_);
1788   // First object
1789   if (offset_within_obj > 0) {
1790     bool should_dirty_card;
1791     mirror::Object* to_ref = reinterpret_cast<mirror::Object*>(start_addr - offset_within_obj);
1792     mirror::Object* from_obj = GetFromSpaceAddr(obj);
1793     mirror::Object* post_compact_obj = nullptr;
1794     if (kSetupForGenerational) {
1795       post_compact_obj = PostCompactAddress(obj, black_dense_end_, moving_space_end_);
1796     }
1797     if (stride_count > 1) {
1798       RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ false, kSetupForGenerational> visitor(
1799           this, to_ref, start_addr, nullptr, card_table, post_compact_obj);
1800       obj_size =
1801           from_obj->VisitRefsForCompaction</*kFetchObjSize*/ true, /*kVisitNativeRoots*/ false>(
1802               visitor, MemberOffset(offset_within_obj), MemberOffset(-1));
1803       should_dirty_card = visitor.ShouldDirtyCard();
1804     } else {
1805       RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ true, kSetupForGenerational> visitor(
1806           this, to_ref, start_addr, start_addr + gPageSize, card_table, post_compact_obj);
1807       obj_size =
1808           from_obj->VisitRefsForCompaction</*kFetchObjSize*/ true, /*kVisitNativeRoots*/ false>(
1809               visitor,
1810               MemberOffset(offset_within_obj),
1811               MemberOffset(offset_within_obj + gPageSize));
1812       should_dirty_card = visitor.ShouldDirtyCard();
1813     }
1814     if (kSetupForGenerational && should_dirty_card) {
1815       card_table->MarkCard(post_compact_obj);
1816     }
1817     obj_size = RoundUp(obj_size, kAlignment);
1818     DCHECK_GT(obj_size, offset_within_obj)
1819         << "obj:" << obj
1820         << " class:" << from_obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
1821         << " to_addr:" << to_ref
1822         << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
1823         << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
1824         << " offset:" << offset * kAlignment << " class-after-obj-iter:"
1825         << (class_after_obj_iter_ != class_after_obj_map_.rend()
1826                 ? class_after_obj_iter_->first.AsMirrorPtr()
1827                 : nullptr)
1828         << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
1829         << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
1830         << " offset-of-last-idx:"
1831         << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
1832         << " first-obj-of-last-idx:"
1833         << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
1834 
1835     obj_size -= offset_within_obj;
1836     // If there is only one stride, then adjust last_stride_begin to the
1837     // end of the first object.
1838     if (stride_count == 1) {
1839       last_stride_begin += obj_size / kAlignment;
1840     }
1841   }
1842 
1843   // Except for the last page being compacted, the pages will have addr ==
1844   // start_addr + gPageSize.
1845   uint8_t* const end_addr = addr;
1846   addr = start_addr;
1847   size_t bytes_done = obj_size;
1848   // All strides except the last one can be updated without any boundary
1849   // checks.
1850   DCHECK_LE(addr, last_stride);
1851   size_t bytes_to_visit = last_stride - addr;
1852   DCHECK_LE(bytes_to_visit, gPageSize);
1853   while (bytes_to_visit > bytes_done) {
1854     mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
1855     VerifyObject(ref, verify_obj_callback);
1856     RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ false, kSetupForGenerational> visitor(
1857         this,
1858         ref,
1859         nullptr,
1860         nullptr,
1861         dest_cards & (1 << (bytes_done >> accounting::CardTable::kCardShift)));
1862     obj_size = ref->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
1863     if (kSetupForGenerational) {
1864       SetBitForMidToOldPromotion(to_space_addr + bytes_done);
1865       if (visitor.ShouldDirtyCard()) {
1866         card_table->MarkCard(reinterpret_cast<mirror::Object*>(to_space_addr + bytes_done));
1867       }
1868     }
1869     obj_size = RoundUp(obj_size, kAlignment);
1870     bytes_done += obj_size;
1871   }
1872   // Last stride may have multiple objects in it and we don't know where the
1873   // last object which crosses the page boundary starts, therefore check
1874   // page-end in all of these objects. Also, we need to call
1875   // VisitRefsForCompaction() with from-space object as we fetch object size,
1876   // which in case of klass requires 'class_size_'.
1877   uint8_t* from_addr = from_space_begin_ + last_stride_begin * kAlignment;
1878   bytes_to_visit = end_addr - addr;
1879   DCHECK_LE(bytes_to_visit, gPageSize);
1880   while (bytes_to_visit > bytes_done) {
1881     mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
1882     obj = reinterpret_cast<mirror::Object*>(from_addr);
1883     VerifyObject(ref, verify_obj_callback);
1884     RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true, kSetupForGenerational> visitor(
1885         this,
1886         ref,
1887         nullptr,
1888         start_addr + gPageSize,
1889         dest_cards & (1 << (bytes_done >> accounting::CardTable::kCardShift)));
1890     obj_size = obj->VisitRefsForCompaction(visitor,
1891                                            MemberOffset(0),
1892                                            MemberOffset(end_addr - (addr + bytes_done)));
1893     if (kSetupForGenerational) {
1894       SetBitForMidToOldPromotion(to_space_addr + bytes_done);
1895       if (visitor.ShouldDirtyCard()) {
1896         card_table->MarkCard(reinterpret_cast<mirror::Object*>(to_space_addr + bytes_done));
1897       }
1898     }
1899     obj_size = RoundUp(obj_size, kAlignment);
1900     DCHECK_GT(obj_size, 0u)
1901         << "from_addr:" << obj
1902         << " from-space-class:" << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
1903         << " to_addr:" << ref
1904         << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
1905         << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
1906         << " offset:" << offset * kAlignment << " bytes_done:" << bytes_done
1907         << " class-after-obj-iter:"
1908         << (class_after_obj_iter_ != class_after_obj_map_.rend() ?
1909                 class_after_obj_iter_->first.AsMirrorPtr() :
1910                 nullptr)
1911         << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
1912         << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
1913         << " offset-of-last-idx:"
1914         << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
1915         << " first-obj-of-last-idx:"
1916         << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
1917 
1918     from_addr += obj_size;
1919     bytes_done += obj_size;
1920   }
1921   // The last page that we compact may have some bytes left untouched in the
1922   // end, we should zero them as the kernel copies at page granularity.
1923   if (needs_memset_zero && UNLIKELY(bytes_done < gPageSize)) {
1924     std::memset(addr + bytes_done, 0x0, gPageSize - bytes_done);
1925   }
1926 }
1927 
1928 // We store the starting point (pre_compact_page - first_obj) and first-chunk's
1929 // size. If more TLAB(s) started in this page, then those chunks are identified
1930 // using mark bitmap. All this info is prepared in UpdateMovingSpaceBlackAllocations().
1931 // If we find a set bit in the bitmap, then we copy the remaining page and then
1932 // use the bitmap to visit each object for updating references.
SlideBlackPage(mirror::Object * first_obj,mirror::Object * next_page_first_obj,uint32_t first_chunk_size,uint8_t * const pre_compact_page,uint8_t * dest,bool needs_memset_zero)1933 void MarkCompact::SlideBlackPage(mirror::Object* first_obj,
1934                                  mirror::Object* next_page_first_obj,
1935                                  uint32_t first_chunk_size,
1936                                  uint8_t* const pre_compact_page,
1937                                  uint8_t* dest,
1938                                  bool needs_memset_zero) {
1939   DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
1940   size_t bytes_copied;
1941   uint8_t* src_addr = reinterpret_cast<uint8_t*>(GetFromSpaceAddr(first_obj));
1942   uint8_t* pre_compact_addr = reinterpret_cast<uint8_t*>(first_obj);
1943   uint8_t* const pre_compact_page_end = pre_compact_page + gPageSize;
1944   uint8_t* const dest_page_end = dest + gPageSize;
1945 
1946   auto verify_obj_callback = [&] (std::ostream& os) {
1947                                os << " first_obj=" << first_obj
1948                                   << " next_page_first_obj=" << next_page_first_obj
1949                                   << " first_chunk_sie=" << first_chunk_size
1950                                   << " dest=" << static_cast<void*>(dest)
1951                                   << " pre_compact_page="
1952                                   << static_cast<void* const>(pre_compact_page);
1953                              };
1954   // We have empty portion at the beginning of the page. Zero it.
1955   if (pre_compact_addr > pre_compact_page) {
1956     bytes_copied = pre_compact_addr - pre_compact_page;
1957     DCHECK_LT(bytes_copied, gPageSize);
1958     if (needs_memset_zero) {
1959       std::memset(dest, 0x0, bytes_copied);
1960     }
1961     dest += bytes_copied;
1962   } else {
1963     bytes_copied = 0;
1964     size_t offset = pre_compact_page - pre_compact_addr;
1965     pre_compact_addr = pre_compact_page;
1966     src_addr += offset;
1967     DCHECK(IsAlignedParam(src_addr, gPageSize));
1968   }
1969   // Copy the first chunk of live words
1970   std::memcpy(dest, src_addr, first_chunk_size);
1971   // Update references in the first chunk. Use object size to find next object.
1972   {
1973     size_t bytes_to_visit = first_chunk_size;
1974     size_t obj_size;
1975     // The first object started in some previous page. So we need to check the
1976     // beginning.
1977     DCHECK_LE(reinterpret_cast<uint8_t*>(first_obj), pre_compact_addr);
1978     size_t offset = pre_compact_addr - reinterpret_cast<uint8_t*>(first_obj);
1979     if (bytes_copied == 0 && offset > 0) {
1980       mirror::Object* to_obj = reinterpret_cast<mirror::Object*>(dest - offset);
1981       mirror::Object* from_obj = reinterpret_cast<mirror::Object*>(src_addr - offset);
1982       // If the next page's first-obj is in this page or nullptr, then we don't
1983       // need to check end boundary
1984       if (next_page_first_obj == nullptr
1985           || (first_obj != next_page_first_obj
1986               && reinterpret_cast<uint8_t*>(next_page_first_obj) <= pre_compact_page_end)) {
1987         RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this,
1988                                                                            to_obj,
1989                                                                            dest,
1990                                                                            nullptr);
1991         obj_size = from_obj->VisitRefsForCompaction<
1992                 /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
1993                                                                    MemberOffset(offset),
1994                                                                    MemberOffset(-1));
1995       } else {
1996         RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this,
1997                                                                           to_obj,
1998                                                                           dest,
1999                                                                           dest_page_end);
2000         obj_size = from_obj->VisitRefsForCompaction<
2001                 /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
2002                                                                    MemberOffset(offset),
2003                                                                    MemberOffset(offset
2004                                                                                 + gPageSize));
2005         if (first_obj == next_page_first_obj) {
2006           // First object is the only object on this page. So there's nothing else left to do.
2007           return;
2008         }
2009       }
2010       obj_size = RoundUp(obj_size, kAlignment);
2011       obj_size -= offset;
2012       dest += obj_size;
2013       bytes_to_visit -= obj_size;
2014     }
2015     bytes_copied += first_chunk_size;
2016     // If the last object in this page is next_page_first_obj, then we need to check end boundary
2017     bool check_last_obj = false;
2018     if (next_page_first_obj != nullptr
2019         && reinterpret_cast<uint8_t*>(next_page_first_obj) < pre_compact_page_end
2020         && bytes_copied == gPageSize) {
2021       size_t diff = pre_compact_page_end - reinterpret_cast<uint8_t*>(next_page_first_obj);
2022       DCHECK_LE(diff, gPageSize);
2023       DCHECK_LE(diff, bytes_to_visit);
2024       bytes_to_visit -= diff;
2025       check_last_obj = true;
2026     }
2027     while (bytes_to_visit > 0) {
2028       mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
2029       VerifyObject(dest_obj, verify_obj_callback);
2030       RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(this,
2031                                                                           dest_obj,
2032                                                                           nullptr,
2033                                                                           nullptr);
2034       obj_size = dest_obj->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
2035       obj_size = RoundUp(obj_size, kAlignment);
2036       bytes_to_visit -= obj_size;
2037       dest += obj_size;
2038     }
2039     DCHECK_EQ(bytes_to_visit, 0u);
2040     if (check_last_obj) {
2041       mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
2042       VerifyObject(dest_obj, verify_obj_callback);
2043       RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> visitor(this,
2044                                                                          dest_obj,
2045                                                                          nullptr,
2046                                                                          dest_page_end);
2047       mirror::Object* obj = GetFromSpaceAddr(next_page_first_obj);
2048       obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
2049                                                           MemberOffset(0),
2050                                                           MemberOffset(dest_page_end - dest));
2051       return;
2052     }
2053   }
2054 
2055   // Probably a TLAB finished on this page and/or a new TLAB started as well.
2056   if (bytes_copied < gPageSize) {
2057     src_addr += first_chunk_size;
2058     pre_compact_addr += first_chunk_size;
2059     // Use mark-bitmap to identify where objects are. First call
2060     // VisitMarkedRange for only the first marked bit. If found, zero all bytes
2061     // until that object and then call memcpy on the rest of the page.
2062     // Then call VisitMarkedRange for all marked bits *after* the one found in
2063     // this invocation. This time to visit references.
2064     uintptr_t start_visit = reinterpret_cast<uintptr_t>(pre_compact_addr);
2065     uintptr_t page_end = reinterpret_cast<uintptr_t>(pre_compact_page_end);
2066     mirror::Object* found_obj = nullptr;
2067     moving_space_bitmap_->VisitMarkedRange</*kVisitOnce*/true>(start_visit,
2068                                                                 page_end,
2069                                                                 [&found_obj](mirror::Object* obj) {
2070                                                                   found_obj = obj;
2071                                                                 });
2072     size_t remaining_bytes = gPageSize - bytes_copied;
2073     if (found_obj == nullptr) {
2074       if (needs_memset_zero) {
2075         // No more black objects in this page. Zero the remaining bytes and return.
2076         std::memset(dest, 0x0, remaining_bytes);
2077       }
2078       return;
2079     }
2080     // Copy everything in this page, which includes any zeroed regions
2081     // in-between.
2082     std::memcpy(dest, src_addr, remaining_bytes);
2083     DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
2084     moving_space_bitmap_->VisitMarkedRange(
2085             reinterpret_cast<uintptr_t>(found_obj) + mirror::kObjectHeaderSize,
2086             page_end,
2087             [&found_obj, pre_compact_addr, dest, this, verify_obj_callback] (mirror::Object* obj)
2088             REQUIRES_SHARED(Locks::mutator_lock_) {
2089               ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
2090               mirror::Object* ref = reinterpret_cast<mirror::Object*>(dest + diff);
2091               VerifyObject(ref, verify_obj_callback);
2092               RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
2093                       visitor(this, ref, nullptr, nullptr);
2094               ref->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
2095                                                                   MemberOffset(0),
2096                                                                   MemberOffset(-1));
2097               // Remember for next round.
2098               found_obj = obj;
2099             });
2100     // found_obj may have been updated in VisitMarkedRange. Visit the last found
2101     // object.
2102     DCHECK_GT(reinterpret_cast<uint8_t*>(found_obj), pre_compact_addr);
2103     DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
2104     ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
2105     mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest + diff);
2106     VerifyObject(dest_obj, verify_obj_callback);
2107     RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true> visitor(
2108         this, dest_obj, nullptr, dest_page_end);
2109     // Last object could overlap with next page. And if it happens to be a
2110     // class, then we may access something (like static-fields' offsets) which
2111     // is on the next page. Therefore, use from-space's reference.
2112     mirror::Object* obj = GetFromSpaceAddr(found_obj);
2113     obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
2114         visitor, MemberOffset(0), MemberOffset(page_end - reinterpret_cast<uintptr_t>(found_obj)));
2115   }
2116 }
2117 
2118 template <uint32_t kYieldMax = 5, uint64_t kSleepUs = 10>
BackOff(uint32_t i)2119 static void BackOff(uint32_t i) {
2120   // TODO: Consider adding x86 PAUSE and/or ARM YIELD here.
2121   if (i <= kYieldMax) {
2122     sched_yield();
2123   } else {
2124     // nanosleep is not in the async-signal-safe list, but bionic implements it
2125     // with a pure system call, so it should be fine.
2126     NanoSleep(kSleepUs * 1000 * (i - kYieldMax));
2127   }
2128 }
2129 
ZeropageIoctl(void * addr,size_t length,bool tolerate_eexist,bool tolerate_enoent)2130 size_t MarkCompact::ZeropageIoctl(void* addr,
2131                                   size_t length,
2132                                   bool tolerate_eexist,
2133                                   bool tolerate_enoent) {
2134   int32_t backoff_count = -1;
2135   int32_t max_backoff = 10;  // max native priority.
2136   struct uffdio_zeropage uffd_zeropage;
2137   DCHECK(IsAlignedParam(addr, gPageSize));
2138   uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(addr);
2139   uffd_zeropage.range.len = length;
2140   uffd_zeropage.mode = gUffdSupportsMmapTrylock ? UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK : 0;
2141   while (true) {
2142     uffd_zeropage.zeropage = 0;
2143     int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage);
2144     if (ret == 0) {
2145       DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(length));
2146       return length;
2147     } else if (errno == EAGAIN) {
2148       if (uffd_zeropage.zeropage > 0) {
2149         // Contention was observed after acquiring mmap_lock. But the first page
2150         // is already done, which is what we care about.
2151         DCHECK(IsAlignedParam(uffd_zeropage.zeropage, gPageSize));
2152         DCHECK_GE(uffd_zeropage.zeropage, static_cast<ssize_t>(gPageSize));
2153         return uffd_zeropage.zeropage;
2154       } else if (uffd_zeropage.zeropage < 0) {
2155         // mmap_read_trylock() failed due to contention. Back-off and retry.
2156         DCHECK_EQ(uffd_zeropage.zeropage, -EAGAIN);
2157         if (backoff_count == -1) {
2158           int prio = Thread::Current()->GetNativePriority();
2159           DCHECK(prio > 0 && prio <= 10) << prio;
2160           max_backoff -= prio;
2161           backoff_count = 0;
2162         }
2163         if (backoff_count < max_backoff) {
2164           // Using 3 to align 'normal' priority threads with sleep.
2165           BackOff</*kYieldMax=*/3, /*kSleepUs=*/1000>(backoff_count++);
2166         } else {
2167           uffd_zeropage.mode = 0;
2168         }
2169       }
2170     } else if (tolerate_eexist && errno == EEXIST) {
2171       // Ioctl returns the number of bytes it mapped. The page on which EEXIST occurred
2172       // wouldn't be included in it.
2173       return uffd_zeropage.zeropage > 0 ? uffd_zeropage.zeropage + gPageSize : gPageSize;
2174     } else {
2175       CHECK(tolerate_enoent && errno == ENOENT)
2176           << "ioctl_userfaultfd: zeropage failed: " << strerror(errno) << ". addr:" << addr;
2177       return 0;
2178     }
2179   }
2180 }
2181 
CopyIoctl(void * dst,void * buffer,size_t length,bool return_on_contention,bool tolerate_enoent)2182 size_t MarkCompact::CopyIoctl(
2183     void* dst, void* buffer, size_t length, bool return_on_contention, bool tolerate_enoent) {
2184   int32_t backoff_count = -1;
2185   int32_t max_backoff = 10;  // max native priority.
2186   struct uffdio_copy uffd_copy;
2187   uffd_copy.mode = gUffdSupportsMmapTrylock ? UFFDIO_COPY_MODE_MMAP_TRYLOCK : 0;
2188   uffd_copy.src = reinterpret_cast<uintptr_t>(buffer);
2189   uffd_copy.dst = reinterpret_cast<uintptr_t>(dst);
2190   uffd_copy.len = length;
2191   uffd_copy.copy = 0;
2192   while (true) {
2193     int ret = ioctl(uffd_, UFFDIO_COPY, &uffd_copy);
2194     if (ret == 0) {
2195       DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(length));
2196       break;
2197     } else if (errno == EAGAIN) {
2198       // Contention observed.
2199       DCHECK_NE(uffd_copy.copy, 0);
2200       if (uffd_copy.copy > 0) {
2201         // Contention was observed after acquiring mmap_lock.
2202         DCHECK(IsAlignedParam(uffd_copy.copy, gPageSize));
2203         DCHECK_GE(uffd_copy.copy, static_cast<ssize_t>(gPageSize));
2204         break;
2205       } else {
2206         // mmap_read_trylock() failed due to contention.
2207         DCHECK_EQ(uffd_copy.copy, -EAGAIN);
2208         uffd_copy.copy = 0;
2209         if (return_on_contention) {
2210           break;
2211         }
2212       }
2213       if (backoff_count == -1) {
2214         int prio = Thread::Current()->GetNativePriority();
2215         DCHECK(prio > 0 && prio <= 10) << prio;
2216         max_backoff -= prio;
2217         backoff_count = 0;
2218       }
2219       if (backoff_count < max_backoff) {
2220         // Using 3 to align 'normal' priority threads with sleep.
2221         BackOff</*kYieldMax=*/3, /*kSleepUs=*/1000>(backoff_count++);
2222       } else {
2223         uffd_copy.mode = 0;
2224       }
2225     } else if (errno == EEXIST) {
2226       DCHECK_NE(uffd_copy.copy, 0);
2227       if (uffd_copy.copy < 0) {
2228         uffd_copy.copy = 0;
2229       }
2230       // Ioctl returns the number of bytes it mapped. The page on which EEXIST occurred
2231       // wouldn't be included in it.
2232       uffd_copy.copy += gPageSize;
2233       break;
2234     } else {
2235       CHECK(tolerate_enoent && errno == ENOENT)
2236           << "ioctl_userfaultfd: copy failed: " << strerror(errno) << ". src:" << buffer
2237           << " dst:" << dst;
2238       return uffd_copy.copy > 0 ? uffd_copy.copy : 0;
2239     }
2240   }
2241   return uffd_copy.copy;
2242 }
2243 
2244 template <int kMode, typename CompactionFn>
DoPageCompactionWithStateChange(size_t page_idx,uint8_t * to_space_page,uint8_t * page,bool map_immediately,CompactionFn func)2245 bool MarkCompact::DoPageCompactionWithStateChange(size_t page_idx,
2246                                                   uint8_t* to_space_page,
2247                                                   uint8_t* page,
2248                                                   bool map_immediately,
2249                                                   CompactionFn func) {
2250   uint32_t expected_state = static_cast<uint8_t>(PageState::kUnprocessed);
2251   uint32_t desired_state = static_cast<uint8_t>(map_immediately ? PageState::kProcessingAndMapping :
2252                                                                   PageState::kProcessing);
2253   // In the concurrent case (kMode != kFallbackMode) we need to ensure that the update
2254   // to moving_spaces_status_[page_idx] is released before the contents of the page are
2255   // made accessible to other threads.
2256   //
2257   // We need acquire ordering here to ensure that when the CAS fails, another thread
2258   // has completed processing the page, which is guaranteed by the release below.
2259   if (kMode == kFallbackMode || moving_pages_status_[page_idx].compare_exchange_strong(
2260                                     expected_state, desired_state, std::memory_order_acquire)) {
2261     func();
2262     if (kMode == kCopyMode) {
2263       if (map_immediately) {
2264         CopyIoctl(to_space_page,
2265                   page,
2266                   gPageSize,
2267                   /*return_on_contention=*/false,
2268                   /*tolerate_enoent=*/false);
2269         // Store is sufficient as no other thread could modify the status at this
2270         // point. Relaxed order is sufficient as the ioctl will act as a fence.
2271         moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
2272                                              std::memory_order_relaxed);
2273       } else {
2274         // Add the src page's index in the status word.
2275         DCHECK(from_space_map_.HasAddress(page));
2276         DCHECK_LE(static_cast<size_t>(page - from_space_begin_),
2277                   std::numeric_limits<uint32_t>::max());
2278         uint32_t store_val = page - from_space_begin_;
2279         DCHECK_EQ(store_val & kPageStateMask, 0u);
2280         store_val |= static_cast<uint8_t>(PageState::kProcessed);
2281         // Store is sufficient as no other thread would modify the status at this point.
2282         moving_pages_status_[page_idx].store(store_val, std::memory_order_release);
2283       }
2284     }
2285     return true;
2286   } else {
2287     // Only GC thread could have set the state to Processed.
2288     DCHECK_NE(expected_state, static_cast<uint8_t>(PageState::kProcessed));
2289     return false;
2290   }
2291 }
2292 
FreeFromSpacePages(size_t cur_page_idx,int mode,size_t end_idx_for_mapping)2293 bool MarkCompact::FreeFromSpacePages(size_t cur_page_idx, int mode, size_t end_idx_for_mapping) {
2294   // Thanks to sliding compaction, bump-pointer allocations, and reverse
2295   // compaction (see CompactMovingSpace) the logic here is pretty simple: find
2296   // the to-space page up to which compaction has finished, all the from-space
2297   // pages corresponding to this onwards can be freed. There are some corner
2298   // cases to be taken care of, which are described below.
2299   size_t idx = last_checked_reclaim_page_idx_;
2300   // Find the to-space page up to which the corresponding from-space pages can be
2301   // freed.
2302   for (; idx > cur_page_idx; idx--) {
2303     PageState state = GetMovingPageState(idx - 1);
2304     if (state == PageState::kMutatorProcessing) {
2305       // Some mutator is working on the page.
2306       break;
2307     }
2308     DCHECK(state >= PageState::kProcessed ||
2309            (state == PageState::kUnprocessed &&
2310             (mode == kFallbackMode || idx > moving_first_objs_count_)));
2311   }
2312   DCHECK_LE(idx, last_checked_reclaim_page_idx_);
2313   if (idx == last_checked_reclaim_page_idx_) {
2314     // Nothing to do.
2315     return false;
2316   }
2317 
2318   uint8_t* reclaim_begin;
2319   uint8_t* idx_addr;
2320   // Calculate the first from-space page to be freed using 'idx'. If the
2321   // first-object of the idx'th to-space page started before the corresponding
2322   // from-space page, which is almost always the case in the compaction portion
2323   // of the moving-space, then it indicates that the subsequent pages that are
2324   // yet to be compacted will need the from-space pages. Therefore, find the page
2325   // (from the already compacted pages) whose first-object is different from
2326   // ours. All the from-space pages starting from that one are safe to be
2327   // removed. Please note that this iteration is not expected to be long in
2328   // normal cases as objects are smaller than page size.
2329   if (idx >= moving_first_objs_count_) {
2330     // black-allocated portion of the moving-space
2331     idx_addr = black_allocations_begin_ + (idx - moving_first_objs_count_) * gPageSize;
2332     reclaim_begin = idx_addr;
2333     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2334     if (first_obj != nullptr && reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
2335       size_t idx_len = moving_first_objs_count_ + black_page_count_;
2336       for (size_t i = idx + 1; i < idx_len; i++) {
2337         mirror::Object* obj = first_objs_moving_space_[i].AsMirrorPtr();
2338         // A null first-object indicates that the corresponding to-space page is
2339         // not used yet. So we can compute its from-space page and use that.
2340         if (obj != first_obj) {
2341           reclaim_begin = obj != nullptr
2342                           ? AlignUp(reinterpret_cast<uint8_t*>(obj), gPageSize)
2343                           : (black_allocations_begin_ + (i - moving_first_objs_count_) * gPageSize);
2344           break;
2345         }
2346       }
2347     }
2348   } else {
2349     DCHECK_GE(pre_compact_offset_moving_space_[idx], 0u);
2350     idx_addr = moving_space_begin_ + idx * gPageSize;
2351     if (idx_addr >= black_dense_end_) {
2352       idx_addr = moving_space_begin_ + pre_compact_offset_moving_space_[idx] * kAlignment;
2353     }
2354     reclaim_begin = idx_addr;
2355     DCHECK_LE(reclaim_begin, black_allocations_begin_);
2356     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2357     if (first_obj != nullptr) {
2358       if (reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
2359         DCHECK_LT(idx, moving_first_objs_count_);
2360         mirror::Object* obj = first_obj;
2361         for (size_t i = idx + 1; i < moving_first_objs_count_; i++) {
2362           obj = first_objs_moving_space_[i].AsMirrorPtr();
2363           if (obj == nullptr) {
2364             reclaim_begin = moving_space_begin_ + i * gPageSize;
2365             break;
2366           } else if (first_obj != obj) {
2367             DCHECK_LT(first_obj, obj);
2368             DCHECK_LT(reclaim_begin, reinterpret_cast<uint8_t*>(obj));
2369             reclaim_begin = reinterpret_cast<uint8_t*>(obj);
2370             break;
2371           }
2372         }
2373         if (obj == first_obj) {
2374           reclaim_begin = black_allocations_begin_;
2375         }
2376       }
2377     }
2378     reclaim_begin = AlignUp(reclaim_begin, gPageSize);
2379   }
2380 
2381   DCHECK_NE(reclaim_begin, nullptr);
2382   DCHECK_ALIGNED_PARAM(reclaim_begin, gPageSize);
2383   DCHECK_ALIGNED_PARAM(last_reclaimed_page_, gPageSize);
2384   // Check if the 'class_after_obj_map_' map allows pages to be freed.
2385   for (; class_after_obj_iter_ != class_after_obj_map_.rend(); class_after_obj_iter_++) {
2386     mirror::Object* klass = class_after_obj_iter_->first.AsMirrorPtr();
2387     mirror::Class* from_klass = static_cast<mirror::Class*>(GetFromSpaceAddr(klass));
2388     // Check with class' end to ensure that, if required, the entire class survives.
2389     uint8_t* klass_end = reinterpret_cast<uint8_t*>(klass) + from_klass->SizeOf<kVerifyNone>();
2390     DCHECK_LE(klass_end, last_reclaimed_page_);
2391     if (reinterpret_cast<uint8_t*>(klass_end) >= reclaim_begin) {
2392       // Found a class which is in the reclaim range.
2393       if (reinterpret_cast<uint8_t*>(class_after_obj_iter_->second.AsMirrorPtr()) < idx_addr) {
2394         // Its lowest-address object is not compacted yet. Reclaim starting from
2395         // the end of this class.
2396         reclaim_begin = AlignUp(klass_end, gPageSize);
2397       } else {
2398         // Continue consuming pairs wherein the lowest address object has already
2399         // been compacted.
2400         continue;
2401       }
2402     }
2403     // All the remaining class (and thereby corresponding object) addresses are
2404     // lower than the reclaim range.
2405     break;
2406   }
2407   bool all_mapped = mode == kFallbackMode;
2408   ssize_t size = last_reclaimed_page_ - reclaim_begin;
2409   if (size > kMinFromSpaceMadviseSize) {
2410     // Map all the pages in the range.
2411     if (mode == kCopyMode && cur_page_idx < end_idx_for_mapping) {
2412       if (MapMovingSpacePages(cur_page_idx,
2413                               end_idx_for_mapping,
2414                               /*from_ioctl=*/false,
2415                               /*return_on_contention=*/true,
2416                               /*tolerate_enoent=*/false) == end_idx_for_mapping - cur_page_idx) {
2417         all_mapped = true;
2418       }
2419     } else {
2420       // This for the black-allocations pages so that madvise is not missed.
2421       all_mapped = true;
2422     }
2423     // If not all pages are mapped, then take it as a hint that mmap_lock is
2424     // contended and hence don't madvise as that also needs the same lock.
2425     if (all_mapped) {
2426       // Retain a few pages for subsequent compactions.
2427       const ssize_t gBufferPages = 4 * gPageSize;
2428       DCHECK_LT(gBufferPages, kMinFromSpaceMadviseSize);
2429       size -= gBufferPages;
2430       uint8_t* addr = last_reclaimed_page_ - size;
2431       CHECK_EQ(madvise(addr + from_space_slide_diff_, size, MADV_DONTNEED), 0)
2432           << "madvise of from-space failed: " << strerror(errno);
2433       last_reclaimed_page_ = addr;
2434       cur_reclaimable_page_ = addr;
2435     }
2436   }
2437   last_reclaimable_page_ = std::min(reclaim_begin, last_reclaimable_page_);
2438   last_checked_reclaim_page_idx_ = idx;
2439   return all_mapped;
2440 }
2441 
2442 template <int kMode>
CompactMovingSpace(uint8_t * page)2443 void MarkCompact::CompactMovingSpace(uint8_t* page) {
2444   // For every page we have a starting object, which may have started in some
2445   // preceding page, and an offset within that object from where we must start
2446   // copying.
2447   // Consult the live-words bitmap to copy all contiguously live words at a
2448   // time. These words may constitute multiple objects. To avoid the need for
2449   // consulting mark-bitmap to find where does the next live object start, we
2450   // use the object-size returned by VisitRefsForCompaction.
2451   //
2452   // We do the compaction in reverse direction so that the pages containing
2453   // TLAB and latest allocations are processed first.
2454   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
2455   size_t page_status_arr_len = moving_first_objs_count_ + black_page_count_;
2456   size_t idx = page_status_arr_len;
2457   size_t black_dense_end_idx = (black_dense_end_ - moving_space_begin_) / gPageSize;
2458   uint8_t* to_space_end = moving_space_begin_ + page_status_arr_len * gPageSize;
2459   uint8_t* pre_compact_page = black_allocations_begin_ + (black_page_count_ * gPageSize);
2460 
2461   DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
2462 
2463   // These variables are maintained by FreeFromSpacePages().
2464   last_reclaimed_page_ = pre_compact_page;
2465   last_reclaimable_page_ = last_reclaimed_page_;
2466   cur_reclaimable_page_ = last_reclaimed_page_;
2467   last_checked_reclaim_page_idx_ = idx;
2468   class_after_obj_iter_ = class_after_obj_map_.rbegin();
2469   // Allocated-black pages
2470   mirror::Object* next_page_first_obj = nullptr;
2471   while (idx > moving_first_objs_count_) {
2472     idx--;
2473     pre_compact_page -= gPageSize;
2474     to_space_end -= gPageSize;
2475     if (kMode == kFallbackMode) {
2476       page = to_space_end;
2477     }
2478     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2479     uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[idx];
2480     if (first_obj != nullptr) {
2481       DoPageCompactionWithStateChange<kMode>(idx,
2482                                              to_space_end,
2483                                              page,
2484                                              /*map_immediately=*/true,
2485                                              [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2486                                                SlideBlackPage(first_obj,
2487                                                               next_page_first_obj,
2488                                                               first_chunk_size,
2489                                                               pre_compact_page,
2490                                                               page,
2491                                                               kMode == kCopyMode);
2492                                              });
2493       // We are sliding here, so no point attempting to madvise for every
2494       // page. Wait for enough pages to be done.
2495       if (idx % DivideByPageSize(kMinFromSpaceMadviseSize) == 0) {
2496         FreeFromSpacePages(idx, kMode, /*end_idx_for_mapping=*/0);
2497       }
2498     }
2499     next_page_first_obj = first_obj;
2500   }
2501   DCHECK_EQ(pre_compact_page, black_allocations_begin_);
2502   // Reserved page to be used if we can't find any reclaimable page for processing.
2503   uint8_t* reserve_page = page;
2504   size_t end_idx_for_mapping = idx;
2505   while (idx > black_dense_end_idx) {
2506     idx--;
2507     to_space_end -= gPageSize;
2508     if (kMode == kFallbackMode) {
2509       page = to_space_end;
2510     } else {
2511       DCHECK_EQ(kMode, kCopyMode);
2512       if (cur_reclaimable_page_ > last_reclaimable_page_) {
2513         cur_reclaimable_page_ -= gPageSize;
2514         page = cur_reclaimable_page_ + from_space_slide_diff_;
2515       } else {
2516         page = reserve_page;
2517       }
2518     }
2519     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2520     bool success = DoPageCompactionWithStateChange<kMode>(
2521         idx,
2522         to_space_end,
2523         page,
2524         /*map_immediately=*/page == reserve_page,
2525         [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2526           if (use_generational_ && to_space_end < mid_gen_end_) {
2527             CompactPage</*kSetupForGenerational=*/true>(first_obj,
2528                                                         pre_compact_offset_moving_space_[idx],
2529                                                         page,
2530                                                         to_space_end,
2531                                                         kMode == kCopyMode);
2532           } else {
2533             CompactPage</*kSetupForGenerational=*/false>(first_obj,
2534                                                          pre_compact_offset_moving_space_[idx],
2535                                                          page,
2536                                                          to_space_end,
2537                                                          kMode == kCopyMode);
2538           }
2539         });
2540     if (kMode == kCopyMode && (!success || page == reserve_page) && end_idx_for_mapping - idx > 1) {
2541       // map the pages in the following address as they can't be mapped with the
2542       // pages yet-to-be-compacted as their src-side pages won't be contiguous.
2543       MapMovingSpacePages(idx + 1,
2544                           end_idx_for_mapping,
2545                           /*from_fault=*/false,
2546                           /*return_on_contention=*/true,
2547                           /*tolerate_enoent=*/false);
2548     }
2549     if (FreeFromSpacePages(idx, kMode, end_idx_for_mapping)) {
2550       end_idx_for_mapping = idx;
2551     }
2552   }
2553   while (idx > 0) {
2554     idx--;
2555     to_space_end -= gPageSize;
2556     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2557     if (first_obj != nullptr) {
2558       DoPageCompactionWithStateChange<kMode>(
2559           idx,
2560           to_space_end,
2561           to_space_end + from_space_slide_diff_,
2562           /*map_immediately=*/false,
2563           [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2564             if (use_generational_) {
2565               UpdateNonMovingPage</*kSetupForGenerational=*/true>(
2566                   first_obj, to_space_end, from_space_slide_diff_, moving_space_bitmap_);
2567             } else {
2568               UpdateNonMovingPage</*kSetupForGenerational=*/false>(
2569                   first_obj, to_space_end, from_space_slide_diff_, moving_space_bitmap_);
2570             }
2571             if (kMode == kFallbackMode) {
2572               memcpy(to_space_end, to_space_end + from_space_slide_diff_, gPageSize);
2573             }
2574           });
2575     } else {
2576       // The page has no reachable object on it. Just declare it mapped.
2577       // Mutators shouldn't step on this page, which is asserted in sigbus
2578       // handler.
2579       DCHECK_EQ(moving_pages_status_[idx].load(std::memory_order_relaxed),
2580                 static_cast<uint8_t>(PageState::kUnprocessed));
2581       moving_pages_status_[idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
2582                                       std::memory_order_release);
2583     }
2584     if (FreeFromSpacePages(idx, kMode, end_idx_for_mapping)) {
2585       end_idx_for_mapping = idx;
2586     }
2587   }
2588   // map one last time to finish anything left.
2589   if (kMode == kCopyMode && end_idx_for_mapping > 0) {
2590     MapMovingSpacePages(idx,
2591                         end_idx_for_mapping,
2592                         /*from_fault=*/false,
2593                         /*return_on_contention=*/false,
2594                         /*tolerate_enoent=*/false);
2595   }
2596   DCHECK_EQ(to_space_end, bump_pointer_space_->Begin());
2597 }
2598 
MapMovingSpacePages(size_t start_idx,size_t arr_len,bool from_fault,bool return_on_contention,bool tolerate_enoent)2599 size_t MarkCompact::MapMovingSpacePages(size_t start_idx,
2600                                         size_t arr_len,
2601                                         bool from_fault,
2602                                         bool return_on_contention,
2603                                         bool tolerate_enoent) {
2604   DCHECK_LT(start_idx, arr_len);
2605   size_t arr_idx = start_idx;
2606   bool wait_for_unmapped = false;
2607   while (arr_idx < arr_len) {
2608     size_t map_count = 0;
2609     uint32_t cur_state = moving_pages_status_[arr_idx].load(std::memory_order_acquire);
2610     // Find a contiguous range that can be mapped with single ioctl.
2611     for (uint32_t i = arr_idx, from_page = cur_state & ~kPageStateMask; i < arr_len;
2612          i++, map_count++, from_page += gPageSize) {
2613       uint32_t s = moving_pages_status_[i].load(std::memory_order_acquire);
2614       uint32_t cur_from_page = s & ~kPageStateMask;
2615       if (GetPageStateFromWord(s) != PageState::kProcessed || cur_from_page != from_page) {
2616         break;
2617       }
2618     }
2619 
2620     if (map_count == 0) {
2621       if (from_fault) {
2622         bool mapped = GetPageStateFromWord(cur_state) == PageState::kProcessedAndMapped;
2623         return mapped ? 1 : 0;
2624       }
2625       // Skip the pages that this thread cannot map.
2626       for (; arr_idx < arr_len; arr_idx++) {
2627         PageState s = GetMovingPageState(arr_idx);
2628         if (s == PageState::kProcessed) {
2629           break;
2630         } else if (s != PageState::kProcessedAndMapped) {
2631           wait_for_unmapped = true;
2632         }
2633       }
2634     } else {
2635       uint32_t from_space_offset = cur_state & ~kPageStateMask;
2636       uint8_t* to_space_start = moving_space_begin_ + arr_idx * gPageSize;
2637       uint8_t* from_space_start = from_space_begin_ + from_space_offset;
2638       DCHECK_ALIGNED_PARAM(to_space_start, gPageSize);
2639       DCHECK_ALIGNED_PARAM(from_space_start, gPageSize);
2640       size_t mapped_len = CopyIoctl(to_space_start,
2641                                     from_space_start,
2642                                     map_count * gPageSize,
2643                                     return_on_contention,
2644                                     tolerate_enoent);
2645       for (size_t l = 0; l < mapped_len; l += gPageSize, arr_idx++) {
2646         // Store is sufficient as anyone storing is doing it with the same value.
2647         moving_pages_status_[arr_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
2648                                             std::memory_order_release);
2649       }
2650       if (from_fault) {
2651         return DivideByPageSize(mapped_len);
2652       }
2653       // We can return from COPY ioctl with a smaller length also if a page
2654       // was found to be already mapped. But that doesn't count as contention.
2655       if (return_on_contention && DivideByPageSize(mapped_len) < map_count && errno != EEXIST) {
2656         return arr_idx - start_idx;
2657       }
2658     }
2659   }
2660   if (wait_for_unmapped) {
2661     for (size_t i = start_idx; i < arr_len; i++) {
2662       PageState s = GetMovingPageState(i);
2663       DCHECK_GT(s, PageState::kProcessed);
2664       uint32_t backoff_count = 0;
2665       while (s != PageState::kProcessedAndMapped) {
2666         BackOff(backoff_count++);
2667         s = GetMovingPageState(i);
2668       }
2669     }
2670   }
2671   return arr_len - start_idx;
2672 }
2673 
2674 template <bool kSetupForGenerational>
UpdateNonMovingPage(mirror::Object * first,uint8_t * page,ptrdiff_t from_space_diff,accounting::ContinuousSpaceBitmap * bitmap)2675 void MarkCompact::UpdateNonMovingPage(mirror::Object* first,
2676                                       uint8_t* page,
2677                                       ptrdiff_t from_space_diff,
2678                                       accounting::ContinuousSpaceBitmap* bitmap) {
2679   DCHECK_LT(reinterpret_cast<uint8_t*>(first), page + gPageSize);
2680   accounting::CardTable* card_table = heap_->GetCardTable();
2681   mirror::Object* curr_obj = first;
2682   uint8_t* from_page = page + from_space_diff;
2683   uint8_t* from_page_end = from_page + gPageSize;
2684   uint8_t* scan_begin =
2685       std::max(reinterpret_cast<uint8_t*>(first) + mirror::kObjectHeaderSize, page);
2686   // For every object found in the page, visit the previous object. This ensures
2687   // that we can visit without checking page-end boundary.
2688   // Call VisitRefsForCompaction with from-space read-barrier as the klass object and
2689   // super-class loads require it.
2690   // TODO: Set kVisitNativeRoots to false once we implement concurrent
2691   // compaction
2692   auto obj_visitor = [&](mirror::Object* next_obj) {
2693     if (curr_obj != nullptr) {
2694       mirror::Object* from_obj =
2695           reinterpret_cast<mirror::Object*>(reinterpret_cast<uint8_t*>(curr_obj) + from_space_diff);
2696       bool should_dirty_card;
2697       if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
2698         RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ false, kSetupForGenerational> visitor(
2699             this, from_obj, from_page, from_page_end, card_table, curr_obj);
2700         MemberOffset begin_offset(page - reinterpret_cast<uint8_t*>(curr_obj));
2701         // Native roots shouldn't be visited as they are done when this
2702         // object's beginning was visited in the preceding page.
2703         from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ false>(
2704             visitor, begin_offset, MemberOffset(-1));
2705         should_dirty_card = visitor.ShouldDirtyCard();
2706       } else {
2707         RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ false, kSetupForGenerational>
2708             visitor(this, from_obj, from_page, from_page_end, card_table, curr_obj);
2709         from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
2710             visitor, MemberOffset(0), MemberOffset(-1));
2711         should_dirty_card = visitor.ShouldDirtyCard();
2712       }
2713       if (kSetupForGenerational && should_dirty_card) {
2714         card_table->MarkCard(curr_obj);
2715       }
2716     }
2717     curr_obj = next_obj;
2718   };
2719 
2720   if (young_gen_) {
2721     DCHECK(bitmap->Test(first));
2722     // If the first-obj is covered by the same card which also covers the first
2723     // word of the page, then it's important to set curr_obj to nullptr to avoid
2724     // updating the references twice.
2725     if (card_table->IsClean(first) ||
2726         card_table->CardFromAddr(first) == card_table->CardFromAddr(scan_begin)) {
2727       curr_obj = nullptr;
2728     }
2729     // We cannot acquire heap-bitmap-lock here as this function is called from
2730     // SIGBUS handler. But it's safe as the bitmap passed to Scan function
2731     // can't get modified until this GC cycle is finished.
2732     FakeMutexLock mu(*Locks::heap_bitmap_lock_);
2733     card_table->Scan</*kClearCard=*/false>(
2734         bitmap, scan_begin, page + gPageSize, obj_visitor, accounting::CardTable::kCardAged2);
2735   } else {
2736     bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(scan_begin),
2737                              reinterpret_cast<uintptr_t>(page + gPageSize),
2738                              obj_visitor);
2739   }
2740 
2741   if (curr_obj != nullptr) {
2742     bool should_dirty_card;
2743     mirror::Object* from_obj =
2744         reinterpret_cast<mirror::Object*>(reinterpret_cast<uint8_t*>(curr_obj) + from_space_diff);
2745     MemberOffset end_offset(page + gPageSize - reinterpret_cast<uint8_t*>(curr_obj));
2746     if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
2747       RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ true, kSetupForGenerational> visitor(
2748           this, from_obj, from_page, from_page_end, card_table, curr_obj);
2749       from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ false>(
2750           visitor, MemberOffset(page - reinterpret_cast<uint8_t*>(curr_obj)), end_offset);
2751       should_dirty_card = visitor.ShouldDirtyCard();
2752     } else {
2753       RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true, kSetupForGenerational> visitor(
2754           this, from_obj, from_page, from_page_end, card_table, curr_obj);
2755       from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
2756           visitor, MemberOffset(0), end_offset);
2757       should_dirty_card = visitor.ShouldDirtyCard();
2758     }
2759     if (kSetupForGenerational && should_dirty_card) {
2760       card_table->MarkCard(curr_obj);
2761     }
2762   }
2763 }
2764 
UpdateNonMovingSpace()2765 void MarkCompact::UpdateNonMovingSpace() {
2766   TimingLogger::ScopedTiming t("(Paused)UpdateNonMovingSpace", GetTimings());
2767   // Iterating in reverse ensures that the class pointer in objects which span
2768   // across more than one page gets updated in the end. This is necessary for
2769   // VisitRefsForCompaction() to work correctly.
2770   // TODO: If and when we make non-moving space update concurrent, implement a
2771   // mechanism to remember class pointers for such objects off-heap and pass it
2772   // to VisitRefsForCompaction().
2773   uint8_t* page = non_moving_space_->Begin() + non_moving_first_objs_count_ * gPageSize;
2774   for (ssize_t i = non_moving_first_objs_count_ - 1; i >= 0; i--) {
2775     mirror::Object* obj = first_objs_non_moving_space_[i].AsMirrorPtr();
2776     page -= gPageSize;
2777     // null means there are no objects on the page to update references.
2778     if (obj != nullptr) {
2779       if (use_generational_) {
2780         UpdateNonMovingPage</*kSetupForGenerational=*/true>(
2781             obj, page, /*from_space_diff=*/0, non_moving_space_bitmap_);
2782       } else {
2783         UpdateNonMovingPage</*kSetupForGenerational=*/false>(
2784             obj, page, /*from_space_diff=*/0, non_moving_space_bitmap_);
2785       }
2786     }
2787   }
2788 }
2789 
UpdateMovingSpaceBlackAllocations()2790 void MarkCompact::UpdateMovingSpaceBlackAllocations() {
2791   // For sliding black pages, we need the first-object, which overlaps with the
2792   // first byte of the page. Additionally, we compute the size of first chunk of
2793   // black objects. This will suffice for most black pages. Unlike, compaction
2794   // pages, here we don't need to pre-compute the offset within first-obj from
2795   // where sliding has to start. That can be calculated using the pre-compact
2796   // address of the page. Therefore, to save space, we store the first chunk's
2797   // size in black_alloc_pages_first_chunk_size_ array.
2798   // For the pages which may have holes after the first chunk, which could happen
2799   // if a new TLAB starts in the middle of the page, we mark the objects in
2800   // the mark-bitmap. So, if the first-chunk size is smaller than gPageSize,
2801   // then we use the mark-bitmap for the remainder of the page.
2802   uint8_t* const begin = bump_pointer_space_->Begin();
2803   uint8_t* black_allocs = black_allocations_begin_;
2804   DCHECK_LE(begin, black_allocs);
2805   size_t consumed_blocks_count = 0;
2806   size_t first_block_size;
2807   // Needed only for debug at the end of the function. Hopefully compiler will
2808   // eliminate it otherwise.
2809   size_t num_blocks = 0;
2810   // Get the list of all blocks allocated in the bump-pointer space.
2811   std::vector<size_t>* block_sizes = bump_pointer_space_->GetBlockSizes(thread_running_gc_,
2812                                                                         &first_block_size);
2813   DCHECK_LE(first_block_size, (size_t)(black_allocs - begin));
2814   if (block_sizes != nullptr) {
2815     size_t black_page_idx = moving_first_objs_count_;
2816     uint8_t* block_end = begin + first_block_size;
2817     uint32_t remaining_chunk_size = 0;
2818     uint32_t first_chunk_size = 0;
2819     mirror::Object* first_obj = nullptr;
2820     num_blocks = block_sizes->size();
2821     for (size_t block_size : *block_sizes) {
2822       block_end += block_size;
2823       // Skip the blocks that are prior to the black allocations. These will be
2824       // merged with the main-block later.
2825       if (black_allocs >= block_end) {
2826         consumed_blocks_count++;
2827         continue;
2828       }
2829       mirror::Object* obj = reinterpret_cast<mirror::Object*>(black_allocs);
2830       bool set_mark_bit = remaining_chunk_size > 0;
2831       // We don't know how many objects are allocated in the current block. When we hit
2832       // a null assume it's the end. This works as every block is expected to
2833       // have objects allocated linearly using bump-pointer.
2834       // BumpPointerSpace::Walk() also works similarly.
2835       while (black_allocs < block_end
2836              && obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) {
2837         // Try to keep instructions which access class instance together to
2838         // avoid reloading the pointer from object.
2839         size_t obj_size = obj->SizeOf();
2840         bytes_scanned_ += obj_size;
2841         obj_size = RoundUp(obj_size, kAlignment);
2842         UpdateClassAfterObjectMap(obj);
2843         if (first_obj == nullptr) {
2844           first_obj = obj;
2845         }
2846         // We only need the mark-bitmap in the pages wherein a new TLAB starts in
2847         // the middle of the page.
2848         if (set_mark_bit) {
2849           moving_space_bitmap_->Set(obj);
2850         }
2851         // Handle objects which cross page boundary, including objects larger
2852         // than page size.
2853         if (remaining_chunk_size + obj_size >= gPageSize) {
2854           set_mark_bit = false;
2855           first_chunk_size += gPageSize - remaining_chunk_size;
2856           remaining_chunk_size += obj_size;
2857           // We should not store first-object and remaining_chunk_size if there were
2858           // unused bytes before this TLAB, in which case we must have already
2859           // stored the values (below).
2860           if (black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
2861             black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2862             first_objs_moving_space_[black_page_idx].Assign(first_obj);
2863           }
2864           black_page_idx++;
2865           remaining_chunk_size -= gPageSize;
2866           // Consume an object larger than page size.
2867           while (remaining_chunk_size >= gPageSize) {
2868             black_alloc_pages_first_chunk_size_[black_page_idx] = gPageSize;
2869             first_objs_moving_space_[black_page_idx].Assign(obj);
2870             black_page_idx++;
2871             remaining_chunk_size -= gPageSize;
2872           }
2873           first_obj = remaining_chunk_size > 0 ? obj : nullptr;
2874           first_chunk_size = remaining_chunk_size;
2875         } else {
2876           DCHECK_LE(first_chunk_size, remaining_chunk_size);
2877           first_chunk_size += obj_size;
2878           remaining_chunk_size += obj_size;
2879         }
2880         black_allocs += obj_size;
2881         obj = reinterpret_cast<mirror::Object*>(black_allocs);
2882       }
2883       DCHECK_LE(black_allocs, block_end);
2884       DCHECK_LT(remaining_chunk_size, gPageSize);
2885       // consume the unallocated portion of the block
2886       if (black_allocs < block_end) {
2887         // first-chunk of the current page ends here. Store it.
2888         if (first_chunk_size > 0 && black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
2889           black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2890           first_objs_moving_space_[black_page_idx].Assign(first_obj);
2891         }
2892         first_chunk_size = 0;
2893         first_obj = nullptr;
2894         size_t page_remaining = gPageSize - remaining_chunk_size;
2895         size_t block_remaining = block_end - black_allocs;
2896         if (page_remaining <= block_remaining) {
2897           block_remaining -= page_remaining;
2898           // current page and the subsequent empty pages in the block
2899           black_page_idx += 1 + DivideByPageSize(block_remaining);
2900           remaining_chunk_size = ModuloPageSize(block_remaining);
2901         } else {
2902           remaining_chunk_size += block_remaining;
2903         }
2904         black_allocs = block_end;
2905       }
2906     }
2907     if (black_page_idx < DivideByPageSize(bump_pointer_space_->Size())) {
2908       // Store the leftover first-chunk, if any, and update page index.
2909       if (black_alloc_pages_first_chunk_size_[black_page_idx] > 0) {
2910         black_page_idx++;
2911       } else if (first_chunk_size > 0) {
2912         black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2913         first_objs_moving_space_[black_page_idx].Assign(first_obj);
2914         black_page_idx++;
2915       }
2916     }
2917     black_page_count_ = black_page_idx - moving_first_objs_count_;
2918     delete block_sizes;
2919   }
2920   // Update bump-pointer space by consuming all the pre-black blocks into the
2921   // main one.
2922   bump_pointer_space_->SetBlockSizes(thread_running_gc_,
2923                                      post_compact_end_ - begin,
2924                                      consumed_blocks_count);
2925   if (kIsDebugBuild) {
2926     size_t moving_space_size = bump_pointer_space_->Size();
2927     size_t los_size = 0;
2928     if (heap_->GetLargeObjectsSpace()) {
2929       los_size = heap_->GetLargeObjectsSpace()->GetBytesAllocated();
2930     }
2931     // The moving-space size is already updated to post-compact size in SetBlockSizes above.
2932     // Also, bytes-allocated has already been adjusted with large-object space' freed-bytes
2933     // in Sweep(), but not with moving-space freed-bytes.
2934     CHECK_GE(heap_->GetBytesAllocated() - black_objs_slide_diff_, moving_space_size + los_size)
2935         << " moving-space size:" << moving_space_size
2936         << " moving-space bytes-freed:" << black_objs_slide_diff_
2937         << " large-object-space size:" << los_size
2938         << " large-object-space bytes-freed:" << GetCurrentIteration()->GetFreedLargeObjectBytes()
2939         << " num-tlabs-merged:" << consumed_blocks_count
2940         << " main-block-size:" << (post_compact_end_ - begin)
2941         << " total-tlabs-moving-space:" << num_blocks;
2942   }
2943 }
2944 
UpdateNonMovingSpaceBlackAllocations()2945 void MarkCompact::UpdateNonMovingSpaceBlackAllocations() {
2946   accounting::ObjectStack* stack = heap_->GetAllocationStack();
2947   const StackReference<mirror::Object>* limit = stack->End();
2948   uint8_t* const space_begin = non_moving_space_->Begin();
2949   size_t num_pages = DivideByPageSize(non_moving_space_->Capacity());
2950   for (StackReference<mirror::Object>* it = stack->Begin(); it != limit; ++it) {
2951     mirror::Object* obj = it->AsMirrorPtr();
2952     if (obj != nullptr && non_moving_space_bitmap_->HasAddress(obj)) {
2953       non_moving_space_bitmap_->Set(obj);
2954       if (!use_generational_) {
2955         // Clear so that we don't try to set the bit again in the next GC-cycle.
2956         it->Clear();
2957       }
2958       size_t idx = DivideByPageSize(reinterpret_cast<uint8_t*>(obj) - space_begin);
2959       uint8_t* page_begin = AlignDown(reinterpret_cast<uint8_t*>(obj), gPageSize);
2960       mirror::Object* first_obj = first_objs_non_moving_space_[idx].AsMirrorPtr();
2961       if (first_obj == nullptr
2962           || (obj < first_obj && reinterpret_cast<uint8_t*>(first_obj) > page_begin)) {
2963         first_objs_non_moving_space_[idx].Assign(obj);
2964       }
2965       if (++idx == num_pages) {
2966         continue;
2967       }
2968       mirror::Object* next_page_first_obj = first_objs_non_moving_space_[idx].AsMirrorPtr();
2969       uint8_t* next_page_begin = page_begin + gPageSize;
2970       if (next_page_first_obj == nullptr
2971           || reinterpret_cast<uint8_t*>(next_page_first_obj) > next_page_begin) {
2972         size_t obj_size = RoundUp(obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
2973         uint8_t* obj_end = reinterpret_cast<uint8_t*>(obj) + obj_size;
2974         while (next_page_begin < obj_end) {
2975           first_objs_non_moving_space_[idx++].Assign(obj);
2976           next_page_begin += gPageSize;
2977         }
2978       }
2979       // update first_objs count in case we went past non_moving_first_objs_count_
2980       non_moving_first_objs_count_ = std::max(non_moving_first_objs_count_, idx);
2981     }
2982   }
2983 }
2984 
2985 class MarkCompact::ImmuneSpaceUpdateObjVisitor {
2986  public:
ImmuneSpaceUpdateObjVisitor(MarkCompact * collector)2987   explicit ImmuneSpaceUpdateObjVisitor(MarkCompact* collector) : collector_(collector) {}
2988 
operator ()(mirror::Object * obj) const2989   void operator()(mirror::Object* obj) const ALWAYS_INLINE REQUIRES(Locks::mutator_lock_) {
2990     RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(collector_,
2991                                                                         obj,
2992                                                                         /*begin_*/nullptr,
2993                                                                         /*end_*/nullptr);
2994     obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
2995         visitor, MemberOffset(0), MemberOffset(-1));
2996   }
2997 
Callback(mirror::Object * obj,void * arg)2998   static void Callback(mirror::Object* obj, void* arg) REQUIRES(Locks::mutator_lock_) {
2999     reinterpret_cast<ImmuneSpaceUpdateObjVisitor*>(arg)->operator()(obj);
3000   }
3001 
3002  private:
3003   MarkCompact* const collector_;
3004 };
3005 
3006 class MarkCompact::ClassLoaderRootsUpdater : public ClassLoaderVisitor {
3007  public:
ClassLoaderRootsUpdater(MarkCompact * collector)3008   explicit ClassLoaderRootsUpdater(MarkCompact* collector)
3009       : collector_(collector),
3010         moving_space_begin_(collector->black_dense_end_),
3011         moving_space_end_(collector->moving_space_end_) {}
3012 
Visit(ObjPtr<mirror::ClassLoader> class_loader)3013   void Visit(ObjPtr<mirror::ClassLoader> class_loader) override
3014       REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) {
3015     ClassTable* const class_table = class_loader->GetClassTable();
3016     if (class_table != nullptr) {
3017       // Classes are updated concurrently.
3018       class_table->VisitRoots(*this, /*skip_classes=*/true);
3019     }
3020   }
3021 
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const3022   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
3023       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
3024     if (!root->IsNull()) {
3025       VisitRoot(root);
3026     }
3027   }
3028 
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const3029   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
3030       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
3031     collector_->UpdateRoot(
3032         root, moving_space_begin_, moving_space_end_, RootInfo(RootType::kRootVMInternal));
3033   }
3034 
3035  private:
3036   MarkCompact* collector_;
3037   uint8_t* const moving_space_begin_;
3038   uint8_t* const moving_space_end_;
3039 };
3040 
3041 class MarkCompact::LinearAllocPageUpdater {
3042  public:
LinearAllocPageUpdater(MarkCompact * collector)3043   explicit LinearAllocPageUpdater(MarkCompact* collector)
3044       : collector_(collector),
3045         moving_space_begin_(collector->black_dense_end_),
3046         moving_space_end_(collector->moving_space_end_),
3047         last_page_touched_(false) {}
3048 
3049   // Update a page in multi-object arena.
MultiObjectArena(uint8_t * page_begin,uint8_t * first_obj)3050   void MultiObjectArena(uint8_t* page_begin, uint8_t* first_obj)
3051       REQUIRES_SHARED(Locks::mutator_lock_) {
3052     DCHECK(first_obj != nullptr);
3053     DCHECK_ALIGNED_PARAM(page_begin, gPageSize);
3054     uint8_t* page_end = page_begin + gPageSize;
3055     uint32_t obj_size;
3056     for (uint8_t* byte = first_obj; byte < page_end;) {
3057       TrackingHeader* header = reinterpret_cast<TrackingHeader*>(byte);
3058       obj_size = header->GetSize();
3059       if (UNLIKELY(obj_size == 0)) {
3060         // No more objects in this page to visit.
3061         last_page_touched_ = byte >= page_begin;
3062         return;
3063       }
3064       uint8_t* obj = byte + sizeof(TrackingHeader);
3065       uint8_t* obj_end = byte + obj_size;
3066       if (header->Is16Aligned()) {
3067         obj = AlignUp(obj, 16);
3068       }
3069       uint8_t* begin_boundary = std::max(obj, page_begin);
3070       uint8_t* end_boundary = std::min(obj_end, page_end);
3071       if (begin_boundary < end_boundary) {
3072         VisitObject(header->GetKind(), obj, begin_boundary, end_boundary);
3073       }
3074       if (ArenaAllocator::IsRunningOnMemoryTool()) {
3075         obj_size += ArenaAllocator::kMemoryToolRedZoneBytes;
3076       }
3077       byte += RoundUp(obj_size, LinearAlloc::kAlignment);
3078     }
3079     last_page_touched_ = true;
3080   }
3081 
3082   // This version is only used for cases where the entire page is filled with
3083   // GC-roots. For example, class-table and intern-table.
SingleObjectArena(uint8_t * page_begin,size_t page_size)3084   void SingleObjectArena(uint8_t* page_begin, size_t page_size)
3085       REQUIRES_SHARED(Locks::mutator_lock_) {
3086     static_assert(sizeof(uint32_t) == sizeof(GcRoot<mirror::Object>));
3087     DCHECK_ALIGNED(page_begin, kAlignment);
3088     // Least significant bits are used by class-table.
3089     static constexpr uint32_t kMask = kObjectAlignment - 1;
3090     size_t num_roots = page_size / sizeof(GcRoot<mirror::Object>);
3091     uint32_t* root_ptr = reinterpret_cast<uint32_t*>(page_begin);
3092     for (size_t i = 0; i < num_roots; root_ptr++, i++) {
3093       uint32_t word = *root_ptr;
3094       if (word != 0) {
3095         uint32_t lsbs = word & kMask;
3096         word &= ~kMask;
3097         VisitRootIfNonNull(reinterpret_cast<mirror::CompressedReference<mirror::Object>*>(&word));
3098         *root_ptr = word | lsbs;
3099         last_page_touched_ = true;
3100       }
3101     }
3102   }
3103 
WasLastPageTouched() const3104   bool WasLastPageTouched() const { return last_page_touched_; }
3105 
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const3106   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
3107       ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
3108     if (!root->IsNull()) {
3109       VisitRoot(root);
3110     }
3111   }
3112 
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const3113   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
3114       ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
3115     mirror::Object* old_ref = root->AsMirrorPtr();
3116     DCHECK_NE(old_ref, nullptr);
3117     if (MarkCompact::HasAddress(old_ref, moving_space_begin_, moving_space_end_)) {
3118       mirror::Object* new_ref = old_ref;
3119       if (reinterpret_cast<uint8_t*>(old_ref) >= collector_->black_allocations_begin_) {
3120         new_ref = collector_->PostCompactBlackObjAddr(old_ref);
3121       } else if (collector_->live_words_bitmap_->Test(old_ref)) {
3122         DCHECK(collector_->moving_space_bitmap_->Test(old_ref))
3123             << "ref:" << old_ref << " root:" << root;
3124         new_ref = collector_->PostCompactOldObjAddr(old_ref);
3125       }
3126       if (old_ref != new_ref) {
3127         root->Assign(new_ref);
3128       }
3129     }
3130   }
3131 
3132  private:
VisitObject(LinearAllocKind kind,void * obj,uint8_t * start_boundary,uint8_t * end_boundary) const3133   void VisitObject(LinearAllocKind kind,
3134                    void* obj,
3135                    uint8_t* start_boundary,
3136                    uint8_t* end_boundary) const ALWAYS_INLINE
3137       REQUIRES_SHARED(Locks::mutator_lock_) {
3138     switch (kind) {
3139       case LinearAllocKind::kNoGCRoots:
3140         break;
3141       case LinearAllocKind::kGCRootArray:
3142         {
3143           GcRoot<mirror::Object>* root = reinterpret_cast<GcRoot<mirror::Object>*>(start_boundary);
3144           GcRoot<mirror::Object>* last = reinterpret_cast<GcRoot<mirror::Object>*>(end_boundary);
3145           for (; root < last; root++) {
3146             VisitRootIfNonNull(root->AddressWithoutBarrier());
3147           }
3148         }
3149         break;
3150       case LinearAllocKind::kArtMethodArray:
3151         {
3152           LengthPrefixedArray<ArtMethod>* array = static_cast<LengthPrefixedArray<ArtMethod>*>(obj);
3153           // Old methods are clobbered in debug builds. Check size to confirm if the array
3154           // has any GC roots to visit. See ClassLinker::LinkMethodsHelper::ClobberOldMethods()
3155           if (array->size() > 0) {
3156             if (collector_->pointer_size_ == PointerSize::k64) {
3157               ArtMethod::VisitArrayRoots<PointerSize::k64>(
3158                   *this, start_boundary, end_boundary, array);
3159             } else {
3160               DCHECK_EQ(collector_->pointer_size_, PointerSize::k32);
3161               ArtMethod::VisitArrayRoots<PointerSize::k32>(
3162                   *this, start_boundary, end_boundary, array);
3163             }
3164           }
3165         }
3166         break;
3167       case LinearAllocKind::kArtMethod:
3168         ArtMethod::VisitRoots(*this, start_boundary, end_boundary, static_cast<ArtMethod*>(obj));
3169         break;
3170       case LinearAllocKind::kArtFieldArray:
3171         ArtField::VisitArrayRoots(*this,
3172                                   start_boundary,
3173                                   end_boundary,
3174                                   static_cast<LengthPrefixedArray<ArtField>*>(obj));
3175         break;
3176       case LinearAllocKind::kDexCacheArray:
3177         {
3178           mirror::DexCachePair<mirror::Object>* first =
3179               reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(start_boundary);
3180           mirror::DexCachePair<mirror::Object>* last =
3181               reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(end_boundary);
3182           mirror::DexCache::VisitDexCachePairRoots(*this, first, last);
3183       }
3184     }
3185   }
3186 
3187   MarkCompact* const collector_;
3188   // Cache to speed up checking if GC-root is in moving space or not.
3189   uint8_t* const moving_space_begin_;
3190   uint8_t* const moving_space_end_;
3191   // Whether the last page was touched or not.
3192   bool last_page_touched_ = false;
3193 };
3194 
UpdateClassTableClasses(Runtime * runtime,bool immune_class_table_only)3195 void MarkCompact::UpdateClassTableClasses(Runtime* runtime, bool immune_class_table_only) {
3196   // If the process is debuggable then redefinition is allowed, which may mean
3197   // pre-zygote-fork class-tables may have pointer to class in moving-space.
3198   // So visit classes from class-sets that are not in linear-alloc arena-pool.
3199   if (UNLIKELY(runtime->IsJavaDebuggableAtInit())) {
3200     ClassLinker* linker = runtime->GetClassLinker();
3201     ClassLoaderRootsUpdater updater(this);
3202     GcVisitedArenaPool* pool = static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
3203     auto cond = [this, pool, immune_class_table_only](ClassTable::ClassSet& set) -> bool {
3204       if (!set.empty()) {
3205         return immune_class_table_only ?
3206                immune_spaces_.ContainsObject(reinterpret_cast<mirror::Object*>(&*set.begin())) :
3207                !pool->Contains(reinterpret_cast<void*>(&*set.begin()));
3208       }
3209       return false;
3210     };
3211     linker->VisitClassTables([cond, &updater](ClassTable* table)
3212                                  REQUIRES_SHARED(Locks::mutator_lock_) {
3213                                table->VisitClassesIfConditionMet(cond, updater);
3214                              });
3215     ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
3216     linker->GetBootClassTable()->VisitClassesIfConditionMet(cond, updater);
3217   }
3218 }
3219 
CompactionPause()3220 void MarkCompact::CompactionPause() {
3221   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3222   Runtime* runtime = Runtime::Current();
3223   if (kIsDebugBuild) {
3224     DCHECK_EQ(thread_running_gc_, Thread::Current());
3225     // TODO(Simulator): Test that this should not operate on the simulated stack when the simulator
3226     // supports mark compact.
3227     stack_low_addr_ = thread_running_gc_->GetStackEnd<kNativeStackType>();
3228     stack_high_addr_ = reinterpret_cast<char*>(stack_low_addr_)
3229                        + thread_running_gc_->GetUsableStackSize<kNativeStackType>();
3230   }
3231   {
3232     TimingLogger::ScopedTiming t2("(Paused)UpdateCompactionDataStructures", GetTimings());
3233     ReaderMutexLock rmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
3234     // Refresh data-structures to catch-up on allocations that may have
3235     // happened since marking-phase pause.
3236     // There could be several TLABs that got allocated since marking pause. We
3237     // don't want to compact them and instead update the TLAB info in TLS and
3238     // let mutators continue to use the TLABs.
3239     // We need to set all the bits in live-words bitmap corresponding to allocated
3240     // objects. Also, we need to find the objects that are overlapping with
3241     // page-begin boundaries. Unlike objects allocated before
3242     // black_allocations_begin_, which can be identified via mark-bitmap, we can get
3243     // this info only via walking the space past black_allocations_begin_, which
3244     // involves fetching object size.
3245     // TODO: We can reduce the time spent on this in a pause by performing one
3246     // round of this concurrently prior to the pause.
3247     UpdateMovingSpaceBlackAllocations();
3248     // Iterate over the allocation_stack_, for every object in the non-moving
3249     // space:
3250     // 1. Mark the object in live bitmap
3251     // 2. Erase the object from allocation stack
3252     // 3. In the corresponding page, if the first-object vector needs updating
3253     // then do so.
3254     UpdateNonMovingSpaceBlackAllocations();
3255     // This store is visible to mutator (or uffd worker threads) as the mutator
3256     // lock's unlock guarantees that.
3257     compacting_ = true;
3258     // Start updating roots and system weaks now.
3259     heap_->GetReferenceProcessor()->UpdateRoots(this);
3260   }
3261   {
3262     // TODO: Immune space updation has to happen either before or after
3263     // remapping pre-compact pages to from-space. And depending on when it's
3264     // done, we have to invoke VisitRefsForCompaction() with or without
3265     // read-barrier.
3266     TimingLogger::ScopedTiming t2("(Paused)UpdateImmuneSpaces", GetTimings());
3267     accounting::CardTable* const card_table = heap_->GetCardTable();
3268     for (auto& space : immune_spaces_.GetSpaces()) {
3269       DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
3270       accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
3271       accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
3272       // Having zygote-space indicates that the first zygote fork has taken
3273       // place and that the classes/dex-caches in immune-spaces may have allocations
3274       // (ArtMethod/ArtField arrays, dex-cache array, etc.) in the
3275       // non-userfaultfd visited private-anonymous mappings. Visit them here.
3276       ImmuneSpaceUpdateObjVisitor visitor(this);
3277       if (table != nullptr) {
3278         table->ProcessCards();
3279         table->VisitObjects(ImmuneSpaceUpdateObjVisitor::Callback, &visitor);
3280       } else {
3281         WriterMutexLock wmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
3282         card_table->Scan<false>(
3283             live_bitmap,
3284             space->Begin(),
3285             space->Limit(),
3286             visitor,
3287             accounting::CardTable::kCardDirty - 1);
3288       }
3289     }
3290   }
3291 
3292   {
3293     TimingLogger::ScopedTiming t2("(Paused)UpdateRoots", GetTimings());
3294     runtime->VisitConcurrentRoots(this, kVisitRootFlagAllRoots);
3295     runtime->VisitNonThreadRoots(this);
3296     {
3297       ClassLinker* linker = runtime->GetClassLinker();
3298       ClassLoaderRootsUpdater updater(this);
3299       ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
3300       linker->VisitClassLoaders(&updater);
3301       linker->GetBootClassTable()->VisitRoots(updater, /*skip_classes=*/true);
3302     }
3303     SweepSystemWeaks(thread_running_gc_, runtime, /*paused=*/true);
3304 
3305     bool has_zygote_space = heap_->HasZygoteSpace();
3306     GcVisitedArenaPool* arena_pool =
3307         static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
3308     // Update immune/pre-zygote class-tables in case class redefinition took
3309     // place. pre-zygote class-tables that are not in immune spaces are updated
3310     // below if we are in fallback-mode or if there is no zygote space. So in
3311     // that case only visit class-tables that are there in immune-spaces.
3312     UpdateClassTableClasses(runtime, uffd_ == kFallbackMode || !has_zygote_space);
3313 
3314     // Acquire arena-pool's lock, which should be released after the pool is
3315     // userfaultfd registered. This is to ensure that no new arenas are
3316     // allocated and used in between. Since they will not be captured in
3317     // linear_alloc_arenas_ below, we will miss updating their pages. The same
3318     // reason also applies to new allocations within the existing arena which
3319     // may change last_byte.
3320     // Since we are in a STW pause, this shouldn't happen anyways, but holding
3321     // the lock confirms it.
3322     // TODO (b/305779657): Replace with ExclusiveTryLock() and assert that it
3323     // doesn't fail once it is available for ReaderWriterMutex.
3324     WriterMutexLock pool_wmu(thread_running_gc_, arena_pool->GetLock());
3325 
3326     // TODO: Find out why it's not sufficient to visit native roots of immune
3327     // spaces, and why all the pre-zygote fork arenas have to be linearly updated.
3328     // Is it possible that some native root starts getting pointed to by some object
3329     // in moving space after fork? Or are we missing a write-barrier somewhere
3330     // when a native root is updated?
3331     auto arena_visitor = [this](uint8_t* page_begin, uint8_t* first_obj, size_t page_size)
3332                              REQUIRES_SHARED(Locks::mutator_lock_) {
3333                            LinearAllocPageUpdater updater(this);
3334                            if (first_obj != nullptr) {
3335                              updater.MultiObjectArena(page_begin, first_obj);
3336                            } else {
3337                              updater.SingleObjectArena(page_begin, page_size);
3338                            }
3339                          };
3340     if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) {
3341       // Besides fallback-mode, visit linear-alloc space in the pause for zygote
3342       // processes prior to first fork (that's when zygote space gets created).
3343       if (kIsDebugBuild && IsValidFd(uffd_)) {
3344         // All arenas allocated so far are expected to be pre-zygote fork.
3345         arena_pool->ForEachAllocatedArena(
3346             [](const TrackedArena& arena)
3347                 REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); });
3348       }
3349       arena_pool->VisitRoots(arena_visitor);
3350     } else {
3351       // Inform the arena-pool that compaction is going on. So the TrackedArena
3352       // objects corresponding to the arenas that are freed shouldn't be deleted
3353       // immediately. We will do that in FinishPhase(). This is to avoid ABA
3354       // problem.
3355       arena_pool->DeferArenaFreeing();
3356       arena_pool->ForEachAllocatedArena(
3357           [this, arena_visitor, has_zygote_space](const TrackedArena& arena)
3358               REQUIRES_SHARED(Locks::mutator_lock_) {
3359             // The pre-zygote fork arenas are not visited concurrently in the
3360             // zygote children processes. The native roots of the dirty objects
3361             // are visited during immune space visit below.
3362             if (!arena.IsPreZygoteForkArena()) {
3363               uint8_t* last_byte = arena.GetLastUsedByte();
3364               auto ret = linear_alloc_arenas_.insert({&arena, last_byte});
3365               CHECK(ret.second);
3366             } else if (!arena.IsSingleObjectArena() || !has_zygote_space) {
3367               // Pre-zygote class-table and intern-table don't need to be updated.
3368               // TODO: Explore the possibility of using /proc/self/pagemap to
3369               // fetch which pages in these arenas are private-dirty and then only
3370               // visit those pages. To optimize it further, we can keep all
3371               // pre-zygote arenas in a single memory range so that just one read
3372               // from pagemap is sufficient.
3373               arena.VisitRoots(arena_visitor);
3374             }
3375           });
3376     }
3377     // Release order wrt to mutator threads' SIGBUS handler load.
3378     sigbus_in_progress_count_[0].store(0, std::memory_order_relaxed);
3379     sigbus_in_progress_count_[1].store(0, std::memory_order_release);
3380     app_slow_path_start_time_ = MilliTime();
3381     KernelPreparation();
3382   }
3383 
3384   UpdateNonMovingSpace();
3385   // fallback mode
3386   if (uffd_ == kFallbackMode) {
3387     CompactMovingSpace<kFallbackMode>(nullptr);
3388 
3389     int32_t freed_bytes = black_objs_slide_diff_;
3390     bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
3391     RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
3392   } else {
3393     DCHECK_EQ(compaction_buffer_counter_.load(std::memory_order_relaxed), 1);
3394   }
3395   stack_low_addr_ = nullptr;
3396 }
3397 
KernelPrepareRangeForUffd(uint8_t * to_addr,uint8_t * from_addr,size_t map_size)3398 void MarkCompact::KernelPrepareRangeForUffd(uint8_t* to_addr, uint8_t* from_addr, size_t map_size) {
3399   int mremap_flags = MREMAP_MAYMOVE | MREMAP_FIXED;
3400   if (gHaveMremapDontunmap) {
3401     mremap_flags |= MREMAP_DONTUNMAP;
3402   }
3403 
3404   void* ret = mremap(to_addr, map_size, map_size, mremap_flags, from_addr);
3405   CHECK_EQ(ret, static_cast<void*>(from_addr))
3406       << "mremap to move pages failed: " << strerror(errno)
3407       << ". space-addr=" << reinterpret_cast<void*>(to_addr) << " size=" << PrettySize(map_size);
3408 
3409   if (!gHaveMremapDontunmap) {
3410     // Without MREMAP_DONTUNMAP the source mapping is unmapped by mremap. So mmap
3411     // the moving space again.
3412     int mmap_flags = MAP_FIXED;
3413     // Use MAP_FIXED_NOREPLACE so that if someone else reserves 'to_addr'
3414     // mapping in meantime, which can happen when MREMAP_DONTUNMAP isn't
3415     // available, to avoid unmapping someone else' mapping and then causing
3416     // crashes elsewhere.
3417     mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
3418     ret = mmap(to_addr, map_size, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
3419     CHECK_EQ(ret, static_cast<void*>(to_addr))
3420         << "mmap for moving space failed: " << strerror(errno);
3421   }
3422 }
3423 
KernelPreparation()3424 void MarkCompact::KernelPreparation() {
3425   TimingLogger::ScopedTiming t("(Paused)KernelPreparation", GetTimings());
3426   uint8_t* moving_space_begin = bump_pointer_space_->Begin();
3427   size_t moving_space_size = bump_pointer_space_->Capacity();
3428   size_t moving_space_register_sz = (moving_first_objs_count_ + black_page_count_) * gPageSize;
3429   DCHECK_LE(moving_space_register_sz, moving_space_size);
3430 
3431   KernelPrepareRangeForUffd(moving_space_begin, from_space_begin_, moving_space_size);
3432 
3433   if (IsValidFd(uffd_)) {
3434     if (moving_space_register_sz > 0) {
3435       // mremap clears 'anon_vma' field of anonymous mappings. If we
3436       // uffd-register only the used portion of the space, then the vma gets
3437       // split (between used and unused portions) and as soon as pages are
3438       // mapped to the vmas, they get different `anon_vma` assigned, which
3439       // ensures that the two vmas cannot merge after we uffd-unregister the
3440       // used portion. OTOH, registering the entire space avoids the split, but
3441       // unnecessarily causes userfaults on allocations.
3442       // By faulting-in a page we force the kernel to allocate 'anon_vma' *before*
3443       // the vma-split in uffd-register. This ensures that when we unregister
3444       // the used portion after compaction, the two split vmas merge. This is
3445       // necessary for the mremap of the next GC cycle to not fail due to having
3446       // more than one vma in the source range.
3447       //
3448       // Fault in address aligned to PMD size so that in case THP is enabled,
3449       // we don't mistakenly fault a page in beginning portion that will be
3450       // registered with uffd. If the alignment takes us beyond the space, then
3451       // fault the first page and madvise it.
3452       size_t pmd_size = Heap::GetPMDSize();
3453       uint8_t* fault_in_addr = AlignUp(moving_space_begin + moving_space_register_sz, pmd_size);
3454       if (bump_pointer_space_->Contains(reinterpret_cast<mirror::Object*>(fault_in_addr))) {
3455         *const_cast<volatile uint8_t*>(fault_in_addr) = 0;
3456       } else {
3457         DCHECK_ALIGNED_PARAM(moving_space_begin, gPageSize);
3458         *const_cast<volatile uint8_t*>(moving_space_begin) = 0;
3459         madvise(moving_space_begin, pmd_size, MADV_DONTNEED);
3460       }
3461       // Register the moving space with userfaultfd.
3462       RegisterUffd(moving_space_begin, moving_space_register_sz);
3463       // madvise ensures that if any page gets mapped (only possible if some
3464       // thread is reading the page(s) without trying to make sense as we hold
3465       // mutator-lock exclusively) between mremap and uffd-registration, then
3466       // it gets zapped so that the map is empty and ready for userfaults. If
3467       // we could mremap after uffd-registration (like in case of linear-alloc
3468       // space below) then we wouldn't need it. But since we don't register the
3469       // entire space, we can't do that.
3470       madvise(moving_space_begin, moving_space_register_sz, MADV_DONTNEED);
3471     }
3472     // Prepare linear-alloc for concurrent compaction.
3473     for (auto& data : linear_alloc_spaces_data_) {
3474       DCHECK_EQ(static_cast<ssize_t>(data.shadow_.Size()), data.end_ - data.begin_);
3475       // There could be threads running in suspended mode when the compaction
3476       // pause is being executed. In order to make the userfaultfd setup atomic,
3477       // the registration has to be done *before* moving the pages to shadow map.
3478       RegisterUffd(data.begin_, data.shadow_.Size());
3479       KernelPrepareRangeForUffd(data.begin_, data.shadow_.Begin(), data.shadow_.Size());
3480     }
3481   }
3482 }
3483 
SigbusHandler(siginfo_t * info)3484 bool MarkCompact::SigbusHandler(siginfo_t* info) {
3485   class ScopedInProgressCount {
3486    public:
3487     explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) {
3488       // Increment the count only if compaction is not done yet.
3489       for (idx_ = 0; idx_ < 2; idx_++) {
3490         SigbusCounterType prev =
3491             collector_->sigbus_in_progress_count_[idx_].load(std::memory_order_relaxed);
3492         while ((prev & kSigbusCounterCompactionDoneMask) == 0) {
3493           if (collector_->sigbus_in_progress_count_[idx_].compare_exchange_strong(
3494                   prev, prev + 1, std::memory_order_acquire)) {
3495             DCHECK_LT(prev, kSigbusCounterCompactionDoneMask - 1);
3496             return;
3497           }
3498         }
3499       }
3500     }
3501 
3502     bool TolerateEnoent() const { return idx_ == 1; }
3503 
3504     bool IsCompactionDone() const { return idx_ == 2; }
3505 
3506     ~ScopedInProgressCount() {
3507       if (idx_ < 2) {
3508         collector_->sigbus_in_progress_count_[idx_].fetch_sub(1, std::memory_order_release);
3509       }
3510     }
3511 
3512    private:
3513     MarkCompact* const collector_;
3514     uint8_t idx_;
3515   };
3516 
3517   if (info->si_code != BUS_ADRERR) {
3518     // Userfaultfd raises SIGBUS with BUS_ADRERR. All other causes can't be
3519     // handled here.
3520     return false;
3521   }
3522 
3523   ScopedInProgressCount spc(this);
3524   uint8_t* fault_page = AlignDown(reinterpret_cast<uint8_t*>(info->si_addr), gPageSize);
3525   if (!spc.IsCompactionDone()) {
3526     if (HasAddress(reinterpret_cast<mirror::Object*>(fault_page))) {
3527       Thread* self = Thread::Current();
3528       Locks::mutator_lock_->AssertSharedHeld(self);
3529       size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_;
3530       ConcurrentlyProcessMovingPage(fault_page,
3531                                     self->GetThreadLocalGcBuffer(),
3532                                     nr_moving_space_used_pages,
3533                                     spc.TolerateEnoent());
3534       return true;
3535     } else {
3536       // Find the linear-alloc space containing fault-addr
3537       for (auto& data : linear_alloc_spaces_data_) {
3538         if (data.begin_ <= fault_page && data.end_ > fault_page) {
3539           ConcurrentlyProcessLinearAllocPage(fault_page, spc.TolerateEnoent());
3540           return true;
3541         }
3542       }
3543       // Fault address doesn't belong to either moving-space or linear-alloc.
3544       return false;
3545     }
3546   } else {
3547     // We may spuriously get SIGBUS fault, which was initiated before the
3548     // compaction was finished, but ends up here. In that case, if the fault
3549     // address is valid then consider it handled.
3550     return HasAddress(reinterpret_cast<mirror::Object*>(fault_page)) ||
3551            linear_alloc_spaces_data_.end() !=
3552                std::find_if(linear_alloc_spaces_data_.begin(),
3553                             linear_alloc_spaces_data_.end(),
3554                             [fault_page](const LinearAllocSpaceData& data) {
3555                               return data.begin_ <= fault_page && data.end_ > fault_page;
3556                             });
3557   }
3558 }
3559 
ConcurrentlyProcessMovingPage(uint8_t * fault_page,uint8_t * buf,size_t nr_moving_space_used_pages,bool tolerate_enoent)3560 void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page,
3561                                                 uint8_t* buf,
3562                                                 size_t nr_moving_space_used_pages,
3563                                                 bool tolerate_enoent) {
3564   Thread* self = Thread::Current();
3565   uint8_t* unused_space_begin = moving_space_begin_ + nr_moving_space_used_pages * gPageSize;
3566   DCHECK(IsAlignedParam(unused_space_begin, gPageSize));
3567   if (fault_page >= unused_space_begin) {
3568     // There is a race which allows more than one thread to install a
3569     // zero-page. But we can tolerate that. So absorb the EEXIST returned by
3570     // the ioctl and move on.
3571     ZeropageIoctl(fault_page, gPageSize, /*tolerate_eexist=*/true, tolerate_enoent);
3572     return;
3573   }
3574   size_t page_idx = DivideByPageSize(fault_page - moving_space_begin_);
3575   DCHECK_LT(page_idx, moving_first_objs_count_ + black_page_count_);
3576   mirror::Object* first_obj = first_objs_moving_space_[page_idx].AsMirrorPtr();
3577   if (first_obj == nullptr) {
3578     DCHECK_GT(fault_page, post_compact_end_);
3579     // Install zero-page in the entire remaining tlab to avoid multiple ioctl invocations.
3580     uint8_t* end = AlignDown(self->GetTlabEnd(), gPageSize);
3581     if (fault_page < self->GetTlabStart() || fault_page >= end) {
3582       end = fault_page + gPageSize;
3583     }
3584     size_t end_idx = page_idx + DivideByPageSize(end - fault_page);
3585     size_t length = 0;
3586     for (size_t idx = page_idx; idx < end_idx; idx++, length += gPageSize) {
3587       uint32_t cur_state = moving_pages_status_[idx].load(std::memory_order_acquire);
3588       if (cur_state != static_cast<uint8_t>(PageState::kUnprocessed)) {
3589         DCHECK_EQ(cur_state, static_cast<uint8_t>(PageState::kProcessedAndMapped));
3590         break;
3591       }
3592     }
3593     if (length > 0) {
3594       length = ZeropageIoctl(fault_page, length, /*tolerate_eexist=*/true, tolerate_enoent);
3595       for (size_t len = 0, idx = page_idx; len < length; idx++, len += gPageSize) {
3596         moving_pages_status_[idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
3597                                         std::memory_order_release);
3598       }
3599     }
3600     return;
3601   }
3602 
3603   uint32_t raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3604   uint32_t backoff_count = 0;
3605   PageState state;
3606   while (true) {
3607     state = GetPageStateFromWord(raw_state);
3608     if (state == PageState::kProcessing || state == PageState::kMutatorProcessing ||
3609         state == PageState::kProcessingAndMapping || state == PageState::kProcessedAndMapping) {
3610       // Wait for the page to be mapped (by gc-thread or some mutator) before returning.
3611       // The wait is not expected to be long as the read state indicates that the other
3612       // thread is actively working on the page.
3613       BackOff(backoff_count++);
3614       raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3615     } else if (state == PageState::kProcessedAndMapped) {
3616       // Nothing to do.
3617       break;
3618     } else {
3619       // Acquire order to ensure we don't start writing to a page, which could
3620       // be shared, before the CAS is successful.
3621       if (state == PageState::kUnprocessed &&
3622           moving_pages_status_[page_idx].compare_exchange_strong(
3623               raw_state,
3624               static_cast<uint8_t>(PageState::kMutatorProcessing),
3625               std::memory_order_acquire)) {
3626         if (fault_page < black_dense_end_) {
3627           if (use_generational_) {
3628             UpdateNonMovingPage</*kSetupForGenerational=*/true>(
3629                 first_obj, fault_page, from_space_slide_diff_, moving_space_bitmap_);
3630           } else {
3631             UpdateNonMovingPage</*kSetupForGenerational=*/false>(
3632                 first_obj, fault_page, from_space_slide_diff_, moving_space_bitmap_);
3633           }
3634           buf = fault_page + from_space_slide_diff_;
3635         } else {
3636           if (UNLIKELY(buf == nullptr)) {
3637             uint16_t idx = compaction_buffer_counter_.fetch_add(1, std::memory_order_relaxed);
3638             // The buffer-map is one page bigger as the first buffer is used by GC-thread.
3639             CHECK_LE(idx, kMutatorCompactionBufferCount);
3640             buf = compaction_buffers_map_.Begin() + idx * gPageSize;
3641             DCHECK(compaction_buffers_map_.HasAddress(buf));
3642             self->SetThreadLocalGcBuffer(buf);
3643           }
3644 
3645           if (fault_page < post_compact_end_) {
3646             // The page has to be compacted.
3647             if (use_generational_ && fault_page < mid_gen_end_) {
3648               CompactPage</*kSetupGenerational=*/true>(first_obj,
3649                                                        pre_compact_offset_moving_space_[page_idx],
3650                                                        buf,
3651                                                        fault_page,
3652                                                        /*needs_memset_zero=*/true);
3653             } else {
3654               CompactPage</*kSetupGenerational=*/false>(first_obj,
3655                                                         pre_compact_offset_moving_space_[page_idx],
3656                                                         buf,
3657                                                         fault_page,
3658                                                         /*needs_memset_zero=*/true);
3659             }
3660           } else {
3661             DCHECK_NE(first_obj, nullptr);
3662             DCHECK_GT(pre_compact_offset_moving_space_[page_idx], 0u);
3663             uint8_t* pre_compact_page = black_allocations_begin_ + (fault_page - post_compact_end_);
3664             uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[page_idx];
3665             mirror::Object* next_page_first_obj = nullptr;
3666             if (page_idx + 1 < moving_first_objs_count_ + black_page_count_) {
3667               next_page_first_obj = first_objs_moving_space_[page_idx + 1].AsMirrorPtr();
3668             }
3669             DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
3670             SlideBlackPage(first_obj,
3671                            next_page_first_obj,
3672                            first_chunk_size,
3673                            pre_compact_page,
3674                            buf,
3675                            /*needs_memset_zero=*/true);
3676           }
3677         }
3678         // Nobody else would simultaneously modify this page's state so an
3679         // atomic store is sufficient. Use 'release' order to guarantee that
3680         // loads/stores to the page are finished before this store. Since the
3681         // mutator used its own buffer for the processing, there is no reason to
3682         // put its index in the status of the page. Also, the mutator is going
3683         // to immediately map the page, so that info is not needed.
3684         moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapping),
3685                                              std::memory_order_release);
3686         CopyIoctl(fault_page, buf, gPageSize, /*return_on_contention=*/false, tolerate_enoent);
3687         // Store is sufficient as no other thread modifies the status at this stage.
3688         moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
3689                                              std::memory_order_release);
3690         break;
3691       }
3692       state = GetPageStateFromWord(raw_state);
3693       if (state == PageState::kProcessed) {
3694         size_t arr_len = moving_first_objs_count_ + black_page_count_;
3695         // The page is processed but not mapped. We should map it. The release
3696         // order used in MapMovingSpacePages will ensure that the increment to
3697         // moving_compaction_in_progress is done first.
3698         if (MapMovingSpacePages(page_idx,
3699                                 arr_len,
3700                                 /*from_fault=*/true,
3701                                 /*return_on_contention=*/false,
3702                                 tolerate_enoent) > 0) {
3703           break;
3704         }
3705         raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3706       }
3707     }
3708   }
3709 }
3710 
MapUpdatedLinearAllocPages(uint8_t * start_page,uint8_t * start_shadow_page,Atomic<PageState> * state,size_t length,bool free_pages,bool single_ioctl,bool tolerate_enoent)3711 bool MarkCompact::MapUpdatedLinearAllocPages(uint8_t* start_page,
3712                                              uint8_t* start_shadow_page,
3713                                              Atomic<PageState>* state,
3714                                              size_t length,
3715                                              bool free_pages,
3716                                              bool single_ioctl,
3717                                              bool tolerate_enoent) {
3718   DCHECK_ALIGNED_PARAM(length, gPageSize);
3719   Atomic<PageState>* madv_state = state;
3720   size_t madv_len = length;
3721   uint8_t* madv_start = start_shadow_page;
3722   bool check_state_for_madv = false;
3723   uint8_t* end_page = start_page + length;
3724   while (start_page < end_page) {
3725     size_t map_len = 0;
3726     // Find a contiguous range of pages that we can map in single ioctl.
3727     for (Atomic<PageState>* cur_state = state;
3728          map_len < length && cur_state->load(std::memory_order_acquire) == PageState::kProcessed;
3729          map_len += gPageSize, cur_state++) {
3730       // No body.
3731     }
3732 
3733     if (map_len == 0) {
3734       if (single_ioctl) {
3735         return state->load(std::memory_order_relaxed) == PageState::kProcessedAndMapped;
3736       }
3737       // Skip all the pages that this thread can't map.
3738       while (length > 0) {
3739         PageState s = state->load(std::memory_order_relaxed);
3740         if (s == PageState::kProcessed) {
3741           break;
3742         }
3743         // If we find any page which is being processed or mapped (only possible by a mutator(s))
3744         // then we need to re-check the page-state and, if needed, wait for the state to change
3745         // to 'mapped', before the shadow pages are reclaimed.
3746         check_state_for_madv |= s > PageState::kUnprocessed && s < PageState::kProcessedAndMapped;
3747         state++;
3748         length -= gPageSize;
3749         start_shadow_page += gPageSize;
3750         start_page += gPageSize;
3751       }
3752     } else {
3753       map_len = CopyIoctl(start_page,
3754                           start_shadow_page,
3755                           map_len,
3756                           /*return_on_contention=*/false,
3757                           tolerate_enoent);
3758       DCHECK_NE(map_len, 0u);
3759       // Declare that the pages are ready to be accessed. Store is sufficient
3760       // as any thread will be storing the same value.
3761       for (size_t l = 0; l < map_len; l += gPageSize, state++) {
3762         PageState s = state->load(std::memory_order_relaxed);
3763         DCHECK(s == PageState::kProcessed || s == PageState::kProcessedAndMapped) << "state:" << s;
3764         state->store(PageState::kProcessedAndMapped, std::memory_order_release);
3765       }
3766       if (single_ioctl) {
3767         break;
3768       }
3769       start_page += map_len;
3770       start_shadow_page += map_len;
3771       length -= map_len;
3772       // state is already updated above.
3773     }
3774   }
3775   if (free_pages) {
3776     if (check_state_for_madv) {
3777       // Wait until all the pages are mapped before releasing them. This is needed to be
3778       // checked only if some mutators were found to be concurrently mapping pages earlier.
3779       for (size_t l = 0; l < madv_len; l += gPageSize, madv_state++) {
3780         uint32_t backoff_count = 0;
3781         PageState s = madv_state->load(std::memory_order_relaxed);
3782         while (s > PageState::kUnprocessed && s < PageState::kProcessedAndMapped) {
3783           BackOff(backoff_count++);
3784           s = madv_state->load(std::memory_order_relaxed);
3785         }
3786       }
3787     }
3788     ZeroAndReleaseMemory(madv_start, madv_len);
3789   }
3790   return true;
3791 }
3792 
ConcurrentlyProcessLinearAllocPage(uint8_t * fault_page,bool tolerate_enoent)3793 void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool tolerate_enoent) {
3794   auto arena_iter = linear_alloc_arenas_.end();
3795   {
3796     TrackedArena temp_arena(fault_page);
3797     arena_iter = linear_alloc_arenas_.upper_bound(&temp_arena);
3798     arena_iter = arena_iter != linear_alloc_arenas_.begin() ? std::prev(arena_iter)
3799                                                             : linear_alloc_arenas_.end();
3800   }
3801   // Unlike ProcessLinearAlloc(), we don't need to hold arena-pool's lock here
3802   // because a thread trying to access the page and as a result causing this
3803   // userfault confirms that nobody can delete the corresponding arena and
3804   // release its pages.
3805   // NOTE: We may have some memory range be recycled several times during a
3806   // compaction cycle, thereby potentially causing userfault on the same page
3807   // several times. That's not a problem as all of them (except for possibly the
3808   // first one) would require us mapping a zero-page, which we do without updating
3809   // the 'state_arr'.
3810   if (arena_iter == linear_alloc_arenas_.end() ||
3811       arena_iter->first->IsWaitingForDeletion() ||
3812       arena_iter->second <= fault_page) {
3813     // Fault page isn't in any of the arenas that existed before we started
3814     // compaction. So map zeropage and return.
3815     ZeropageIoctl(fault_page, gPageSize, /*tolerate_eexist=*/true, tolerate_enoent);
3816   } else {
3817     // Find the linear-alloc space containing fault-page
3818     LinearAllocSpaceData* space_data = nullptr;
3819     for (auto& data : linear_alloc_spaces_data_) {
3820       if (data.begin_ <= fault_page && fault_page < data.end_) {
3821         space_data = &data;
3822         break;
3823       }
3824     }
3825     DCHECK_NE(space_data, nullptr);
3826     ptrdiff_t diff = space_data->shadow_.Begin() - space_data->begin_;
3827     size_t page_idx = DivideByPageSize(fault_page - space_data->begin_);
3828     Atomic<PageState>* state_arr =
3829         reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin());
3830     PageState state = state_arr[page_idx].load(std::memory_order_acquire);
3831     uint32_t backoff_count = 0;
3832     while (true) {
3833       switch (state) {
3834         case PageState::kUnprocessed: {
3835           // Acquire order to ensure we don't start writing to shadow map, which is
3836           // shared, before the CAS is successful.
3837           if (state_arr[page_idx].compare_exchange_strong(
3838                   state, PageState::kProcessing, std::memory_order_acquire)) {
3839             LinearAllocPageUpdater updater(this);
3840             uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page);
3841             // null first_obj indicates that it's a page from arena for
3842             // intern-table/class-table. So first object isn't required.
3843             if (first_obj != nullptr) {
3844               updater.MultiObjectArena(fault_page + diff, first_obj + diff);
3845             } else {
3846               updater.SingleObjectArena(fault_page + diff, gPageSize);
3847             }
3848             if (updater.WasLastPageTouched()) {
3849               state_arr[page_idx].store(PageState::kProcessed, std::memory_order_release);
3850               state = PageState::kProcessed;
3851               continue;
3852             } else {
3853               // If the page wasn't touched, then it means it is empty and
3854               // is most likely not present on the shadow-side. Furthermore,
3855               // since the shadow is also userfaultfd registered doing copy
3856               // ioctl fails as the copy-from-user in the kernel will cause
3857               // userfault. Instead, just map a zeropage, which is not only
3858               // correct but also efficient as it avoids unnecessary memcpy
3859               // in the kernel.
3860               if (ZeropageIoctl(fault_page,
3861                                 gPageSize,
3862                                 /*tolerate_eexist=*/false,
3863                                 tolerate_enoent)) {
3864                 state_arr[page_idx].store(PageState::kProcessedAndMapped,
3865                                           std::memory_order_release);
3866               }
3867               return;
3868             }
3869           }
3870         }
3871           continue;
3872         case PageState::kProcessed:
3873           // Map as many pages as possible in a single ioctl, without spending
3874           // time freeing pages.
3875           if (MapUpdatedLinearAllocPages(fault_page,
3876                                          fault_page + diff,
3877                                          state_arr + page_idx,
3878                                          space_data->end_ - fault_page,
3879                                          /*free_pages=*/false,
3880                                          /*single_ioctl=*/true,
3881                                          tolerate_enoent)) {
3882             return;
3883           }
3884           // fault_page was not mapped by this thread (some other thread claimed
3885           // it). Wait for it to be mapped before returning.
3886           FALLTHROUGH_INTENDED;
3887         case PageState::kProcessing:
3888         case PageState::kProcessingAndMapping:
3889         case PageState::kProcessedAndMapping:
3890           // Wait for the page to be mapped before returning.
3891           BackOff(backoff_count++);
3892           state = state_arr[page_idx].load(std::memory_order_acquire);
3893           continue;
3894         case PageState::kMutatorProcessing:
3895           LOG(FATAL) << "Unreachable";
3896           UNREACHABLE();
3897         case PageState::kProcessedAndMapped:
3898           // Somebody else took care of the page.
3899           return;
3900       }
3901       break;
3902     }
3903   }
3904 }
3905 
ProcessLinearAlloc()3906 void MarkCompact::ProcessLinearAlloc() {
3907   GcVisitedArenaPool* arena_pool =
3908       static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
3909   DCHECK_EQ(thread_running_gc_, Thread::Current());
3910   uint8_t* unmapped_range_start = nullptr;
3911   uint8_t* unmapped_range_end = nullptr;
3912   // Pointer to the linear-alloc space containing the current arena in the loop
3913   // below. Also helps in ensuring that two arenas, which are contiguous in
3914   // address space but are from different linear-alloc spaces, are not coalesced
3915   // into one range for mapping purpose.
3916   LinearAllocSpaceData* space_data = nullptr;
3917   Atomic<PageState>* state_arr = nullptr;
3918   ptrdiff_t diff = 0;
3919 
3920   auto map_pages = [&]() {
3921     DCHECK_NE(diff, 0);
3922     DCHECK_NE(space_data, nullptr);
3923     DCHECK_GE(unmapped_range_start, space_data->begin_);
3924     DCHECK_LT(unmapped_range_start, space_data->end_);
3925     DCHECK_GT(unmapped_range_end, space_data->begin_);
3926     DCHECK_LE(unmapped_range_end, space_data->end_);
3927     DCHECK_LT(unmapped_range_start, unmapped_range_end);
3928     DCHECK_ALIGNED_PARAM(unmapped_range_end - unmapped_range_start, gPageSize);
3929     size_t page_idx = DivideByPageSize(unmapped_range_start - space_data->begin_);
3930     MapUpdatedLinearAllocPages(unmapped_range_start,
3931                                unmapped_range_start + diff,
3932                                state_arr + page_idx,
3933                                unmapped_range_end - unmapped_range_start,
3934                                /*free_pages=*/true,
3935                                /*single_ioctl=*/false,
3936                                /*tolerate_enoent=*/false);
3937   };
3938   for (auto& pair : linear_alloc_arenas_) {
3939     const TrackedArena* arena = pair.first;
3940     size_t arena_size = arena->Size();
3941     uint8_t* arena_begin = arena->Begin();
3942     // linear_alloc_arenas_ is sorted on arena-begin. So we will get all arenas
3943     // in that order.
3944     DCHECK_LE(unmapped_range_end, arena_begin);
3945     DCHECK(space_data == nullptr || arena_begin > space_data->begin_)
3946         << "space-begin:" << static_cast<void*>(space_data->begin_)
3947         << " arena-begin:" << static_cast<void*>(arena_begin);
3948     if (space_data == nullptr || space_data->end_ <= arena_begin) {
3949       // Map the processed arenas as we are switching to another space.
3950       if (space_data != nullptr && unmapped_range_end != nullptr) {
3951         map_pages();
3952         unmapped_range_end = nullptr;
3953       }
3954       // Find the linear-alloc space containing the arena
3955       LinearAllocSpaceData* curr_space_data = space_data;
3956       for (auto& data : linear_alloc_spaces_data_) {
3957         if (data.begin_ <= arena_begin && arena_begin < data.end_) {
3958           // Since arenas are sorted, the next space should be higher in address
3959           // order than the current one.
3960           DCHECK(space_data == nullptr || data.begin_ >= space_data->end_);
3961           diff = data.shadow_.Begin() - data.begin_;
3962           state_arr = reinterpret_cast<Atomic<PageState>*>(data.page_status_map_.Begin());
3963           space_data = &data;
3964           break;
3965         }
3966       }
3967       CHECK_NE(space_data, curr_space_data)
3968           << "Couldn't find space for arena-begin:" << static_cast<void*>(arena_begin);
3969     }
3970     // Map the processed arenas if we found a hole within the current space.
3971     if (unmapped_range_end != nullptr && unmapped_range_end < arena_begin) {
3972       map_pages();
3973       unmapped_range_end = nullptr;
3974     }
3975     if (unmapped_range_end == nullptr) {
3976       unmapped_range_start = unmapped_range_end = arena_begin;
3977     }
3978     DCHECK_NE(unmapped_range_start, nullptr);
3979     // It's ok to include all arenas in the unmapped range. Since the
3980     // corresponding state bytes will be kUnprocessed, we will skip calling
3981     // ioctl and madvise on arenas which are waiting to be deleted.
3982     unmapped_range_end += arena_size;
3983     {
3984       // Acquire arena-pool's lock (in shared-mode) so that the arena being updated
3985       // does not get deleted at the same time. If this critical section is too
3986       // long and impacts mutator response time, then we get rid of this lock by
3987       // holding onto memory ranges of all deleted (since compaction pause)
3988       // arenas until completion finishes.
3989       ReaderMutexLock rmu(thread_running_gc_, arena_pool->GetLock());
3990       // If any arenas were freed since compaction pause then skip them from
3991       // visiting.
3992       if (arena->IsWaitingForDeletion()) {
3993         continue;
3994       }
3995       uint8_t* last_byte = pair.second;
3996       DCHECK_ALIGNED_PARAM(last_byte, gPageSize);
3997       auto visitor = [space_data, last_byte, diff, this, state_arr](
3998                          uint8_t* page_begin,
3999                          uint8_t* first_obj,
4000                          size_t page_size) REQUIRES_SHARED(Locks::mutator_lock_) {
4001         // No need to process pages past last_byte as they already have updated
4002         // gc-roots, if any.
4003         if (page_begin >= last_byte) {
4004           return;
4005         }
4006         LinearAllocPageUpdater updater(this);
4007         size_t page_idx = DivideByPageSize(page_begin - space_data->begin_);
4008         DCHECK_LT(page_idx, space_data->page_status_map_.Size());
4009         PageState expected_state = PageState::kUnprocessed;
4010         // Acquire order to ensure that we don't start accessing the shadow page,
4011         // which is shared with other threads, prior to CAS. Also, for same
4012         // reason, we used 'release' order for changing the state to 'processed'.
4013         if (state_arr[page_idx].compare_exchange_strong(
4014                 expected_state, PageState::kProcessing, std::memory_order_acquire)) {
4015           // null first_obj indicates that it's a page from arena for
4016           // intern-table/class-table. So first object isn't required.
4017           if (first_obj != nullptr) {
4018             updater.MultiObjectArena(page_begin + diff, first_obj + diff);
4019           } else {
4020             DCHECK_EQ(page_size, gPageSize);
4021             updater.SingleObjectArena(page_begin + diff, page_size);
4022           }
4023           expected_state = PageState::kProcessing;
4024           // Store is sufficient as no other thread could be modifying it. Use
4025           // release order to ensure that the writes to shadow page are
4026           // committed to memory before.
4027           if (updater.WasLastPageTouched()) {
4028             state_arr[page_idx].store(PageState::kProcessed, std::memory_order_release);
4029           } else {
4030             // See comment in ConcurrentlyProcessLinearAllocPage() with same situation.
4031             ZeropageIoctl(
4032                 page_begin, gPageSize, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
4033             // Ioctl will act as release fence.
4034             state_arr[page_idx].store(PageState::kProcessedAndMapped, std::memory_order_release);
4035           }
4036         }
4037       };
4038 
4039       arena->VisitRoots(visitor);
4040     }
4041   }
4042   if (unmapped_range_end > unmapped_range_start) {
4043     // Map remaining pages.
4044     map_pages();
4045   }
4046 }
4047 
RegisterUffd(void * addr,size_t size)4048 void MarkCompact::RegisterUffd(void* addr, size_t size) {
4049   DCHECK(IsValidFd(uffd_));
4050   struct uffdio_register uffd_register;
4051   uffd_register.range.start = reinterpret_cast<uintptr_t>(addr);
4052   uffd_register.range.len = size;
4053   uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
4054   CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0)
4055       << "ioctl_userfaultfd: register failed: " << strerror(errno)
4056       << ". start:" << static_cast<void*>(addr) << " len:" << PrettySize(size);
4057 }
4058 
4059 // TODO: sometime we may want to tolerate certain error conditions (like ENOMEM
4060 // when we unregister the unused portion of the moving-space). Implement support
4061 // for that.
UnregisterUffd(uint8_t * start,size_t len)4062 void MarkCompact::UnregisterUffd(uint8_t* start, size_t len) {
4063   DCHECK(IsValidFd(uffd_));
4064   struct uffdio_range range;
4065   range.start = reinterpret_cast<uintptr_t>(start);
4066   range.len = len;
4067   CHECK_EQ(ioctl(uffd_, UFFDIO_UNREGISTER, &range), 0)
4068       << "ioctl_userfaultfd: unregister failed: " << strerror(errno)
4069       << ". addr:" << static_cast<void*>(start) << " len:" << PrettySize(len);
4070 }
4071 
CompactionPhase()4072 void MarkCompact::CompactionPhase() {
4073   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4074   {
4075     int32_t freed_bytes = black_objs_slide_diff_;
4076     bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
4077     RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
4078   }
4079 
4080   CompactMovingSpace<kCopyMode>(compaction_buffers_map_.Begin());
4081 
4082   ProcessLinearAlloc();
4083 
4084   auto wait_for_compaction_counter = [this](size_t idx) {
4085     SigbusCounterType count = sigbus_in_progress_count_[idx].fetch_or(
4086         kSigbusCounterCompactionDoneMask, std::memory_order_acq_rel);
4087     // Wait for SIGBUS handlers already in play.
4088     for (uint32_t i = 0; count > 0; i++) {
4089       BackOff(i);
4090       count = sigbus_in_progress_count_[idx].load(std::memory_order_acquire);
4091       count &= ~kSigbusCounterCompactionDoneMask;
4092     }
4093   };
4094   // Set compaction-done bit in the first counter to indicate that gc-thread
4095   // is done compacting and mutators should stop incrementing this counter.
4096   // Mutator should tolerate ENOENT after this. This helps avoid priority
4097   // inversion in case mutators need to map zero-pages after compaction is
4098   // finished but before gc-thread manages to unregister the spaces.
4099   wait_for_compaction_counter(0);
4100 
4101   // Unregister moving-space
4102   size_t moving_space_size = bump_pointer_space_->Capacity();
4103   size_t used_size = (moving_first_objs_count_ + black_page_count_) * gPageSize;
4104   if (used_size > 0) {
4105     UnregisterUffd(bump_pointer_space_->Begin(), used_size);
4106   }
4107   // Unregister linear-alloc spaces
4108   for (auto& data : linear_alloc_spaces_data_) {
4109     DCHECK_EQ(data.end_ - data.begin_, static_cast<ssize_t>(data.shadow_.Size()));
4110     UnregisterUffd(data.begin_, data.shadow_.Size());
4111   }
4112   GetCurrentIteration()->SetAppSlowPathDurationMs(MilliTime() - app_slow_path_start_time_);
4113 
4114   // Set compaction-done bit in the second counter to indicate that gc-thread
4115   // is done unregistering the spaces and therefore mutators, if in SIGBUS,
4116   // should return without attempting to map the faulted page. When the mutator
4117   // will access the address again, it will succeed. Once this counter is 0,
4118   // the gc-thread can safely initialize/madvise the data structures.
4119   wait_for_compaction_counter(1);
4120 
4121   // Release all of the memory taken by moving-space's from-map
4122   from_space_map_.MadviseDontNeedAndZero();
4123   // mprotect(PROT_NONE) all maps except to-space in debug-mode to catch any unexpected accesses.
4124   DCHECK_EQ(mprotect(from_space_begin_, moving_space_size, PROT_NONE), 0)
4125       << "mprotect(PROT_NONE) for from-space failed: " << strerror(errno);
4126 
4127   // madvise linear-allocs's page-status array. Note that we don't need to
4128   // madvise the shado-map as the pages from it were reclaimed in
4129   // ProcessLinearAlloc() after arenas were mapped.
4130   for (auto& data : linear_alloc_spaces_data_) {
4131     data.page_status_map_.MadviseDontNeedAndZero();
4132   }
4133 }
4134 
4135 template <size_t kBufferSize>
4136 class MarkCompact::ThreadRootsVisitor : public RootVisitor {
4137  public:
ThreadRootsVisitor(MarkCompact * mark_compact,Thread * const self)4138   explicit ThreadRootsVisitor(MarkCompact* mark_compact, Thread* const self)
4139         : mark_compact_(mark_compact), self_(self) {}
4140 
~ThreadRootsVisitor()4141   ~ThreadRootsVisitor() {
4142     Flush();
4143   }
4144 
VisitRoots(mirror::Object *** roots,size_t count,const RootInfo & info)4145   void VisitRoots(mirror::Object*** roots,
4146                   size_t count,
4147                   [[maybe_unused]] const RootInfo& info) override
4148       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
4149     for (size_t i = 0; i < count; i++) {
4150       mirror::Object* obj = *roots[i];
4151       if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
4152         Push(obj);
4153       }
4154     }
4155   }
4156 
VisitRoots(mirror::CompressedReference<mirror::Object> ** roots,size_t count,const RootInfo & info)4157   void VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
4158                   size_t count,
4159                   [[maybe_unused]] const RootInfo& info) override
4160       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
4161     for (size_t i = 0; i < count; i++) {
4162       mirror::Object* obj = roots[i]->AsMirrorPtr();
4163       if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
4164         Push(obj);
4165       }
4166     }
4167   }
4168 
4169  private:
Flush()4170   void Flush() REQUIRES_SHARED(Locks::mutator_lock_)
4171                REQUIRES(Locks::heap_bitmap_lock_) {
4172     StackReference<mirror::Object>* start;
4173     StackReference<mirror::Object>* end;
4174     {
4175       MutexLock mu(self_, mark_compact_->lock_);
4176       // Loop here because even after expanding once it may not be sufficient to
4177       // accommodate all references. It's almost impossible, but there is no harm
4178       // in implementing it this way.
4179       while (!mark_compact_->mark_stack_->BumpBack(idx_, &start, &end)) {
4180         mark_compact_->ExpandMarkStack();
4181       }
4182     }
4183     while (idx_ > 0) {
4184       *start++ = roots_[--idx_];
4185     }
4186     DCHECK_EQ(start, end);
4187   }
4188 
Push(mirror::Object * obj)4189   void Push(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_)
4190                                  REQUIRES(Locks::heap_bitmap_lock_) {
4191     if (UNLIKELY(idx_ >= kBufferSize)) {
4192       Flush();
4193     }
4194     roots_[idx_++].Assign(obj);
4195   }
4196 
4197   StackReference<mirror::Object> roots_[kBufferSize];
4198   size_t idx_ = 0;
4199   MarkCompact* const mark_compact_;
4200   Thread* const self_;
4201 };
4202 
4203 class MarkCompact::CheckpointMarkThreadRoots : public Closure {
4204  public:
CheckpointMarkThreadRoots(MarkCompact * mark_compact)4205   explicit CheckpointMarkThreadRoots(MarkCompact* mark_compact) : mark_compact_(mark_compact) {}
4206 
Run(Thread * thread)4207   void Run(Thread* thread) override NO_THREAD_SAFETY_ANALYSIS {
4208     ScopedTrace trace("Marking thread roots");
4209     // Note: self is not necessarily equal to thread since thread may be
4210     // suspended.
4211     Thread* const self = Thread::Current();
4212     CHECK(thread == self
4213           || thread->IsSuspended()
4214           || thread->GetState() == ThreadState::kWaitingPerformingGc)
4215         << thread->GetState() << " thread " << thread << " self " << self;
4216     {
4217       ThreadRootsVisitor</*kBufferSize*/ 20> visitor(mark_compact_, self);
4218       thread->VisitRoots(&visitor, kVisitRootFlagAllRoots);
4219     }
4220     // Clear page-buffer to prepare for compaction phase.
4221     thread->SetThreadLocalGcBuffer(nullptr);
4222 
4223     // If thread is a running mutator, then act on behalf of the garbage
4224     // collector. See the code in ThreadList::RunCheckpoint.
4225     mark_compact_->GetBarrier().Pass(self);
4226   }
4227 
4228  private:
4229   MarkCompact* const mark_compact_;
4230 };
4231 
MarkRootsCheckpoint(Thread * self,Runtime * runtime)4232 void MarkCompact::MarkRootsCheckpoint(Thread* self, Runtime* runtime) {
4233   // We revote TLABs later during paused round of marking.
4234   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4235   CheckpointMarkThreadRoots check_point(this);
4236   ThreadList* thread_list = runtime->GetThreadList();
4237   gc_barrier_.Init(self, 0);
4238   // Request the check point is run on all threads returning a count of the threads that must
4239   // run through the barrier including self.
4240   size_t barrier_count = thread_list->RunCheckpoint(&check_point);
4241   // Release locks then wait for all mutator threads to pass the barrier.
4242   // If there are no threads to wait which implys that all the checkpoint functions are finished,
4243   // then no need to release locks.
4244   if (barrier_count == 0) {
4245     return;
4246   }
4247   Locks::heap_bitmap_lock_->ExclusiveUnlock(self);
4248   Locks::mutator_lock_->SharedUnlock(self);
4249   {
4250     ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun);
4251     gc_barrier_.Increment(self, barrier_count);
4252   }
4253   Locks::mutator_lock_->SharedLock(self);
4254   Locks::heap_bitmap_lock_->ExclusiveLock(self);
4255   ProcessMarkStack();
4256 }
4257 
MarkNonThreadRoots(Runtime * runtime)4258 void MarkCompact::MarkNonThreadRoots(Runtime* runtime) {
4259   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4260   runtime->VisitNonThreadRoots(this);
4261   ProcessMarkStack();
4262 }
4263 
MarkConcurrentRoots(VisitRootFlags flags,Runtime * runtime)4264 void MarkCompact::MarkConcurrentRoots(VisitRootFlags flags, Runtime* runtime) {
4265   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4266   runtime->VisitConcurrentRoots(this, flags);
4267   ProcessMarkStack();
4268 }
4269 
RevokeAllThreadLocalBuffers()4270 void MarkCompact::RevokeAllThreadLocalBuffers() {
4271   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4272   bump_pointer_space_->RevokeAllThreadLocalBuffers();
4273 }
4274 
4275 class MarkCompact::ScanObjectVisitor {
4276  public:
ScanObjectVisitor(MarkCompact * const mark_compact)4277   explicit ScanObjectVisitor(MarkCompact* const mark_compact) ALWAYS_INLINE
4278       : mark_compact_(mark_compact) {}
4279 
operator ()(ObjPtr<mirror::Object> obj) const4280   void operator()(ObjPtr<mirror::Object> obj) const
4281       ALWAYS_INLINE
4282       REQUIRES(Locks::heap_bitmap_lock_)
4283       REQUIRES_SHARED(Locks::mutator_lock_) {
4284     mark_compact_->ScanObject</*kUpdateLiveWords*/ false>(obj.Ptr());
4285   }
4286 
4287  private:
4288   MarkCompact* const mark_compact_;
4289 };
4290 
UpdateAndMarkModUnion()4291 void MarkCompact::UpdateAndMarkModUnion() {
4292   accounting::CardTable* const card_table = heap_->GetCardTable();
4293   for (const auto& space : immune_spaces_.GetSpaces()) {
4294     const char* name = space->IsZygoteSpace()
4295         ? "UpdateAndMarkZygoteModUnionTable"
4296         : "UpdateAndMarkImageModUnionTable";
4297     DCHECK(space->IsZygoteSpace() || space->IsImageSpace()) << *space;
4298     TimingLogger::ScopedTiming t(name, GetTimings());
4299     accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
4300     if (table != nullptr) {
4301       // UpdateAndMarkReferences() doesn't visit Reference-type objects. But
4302       // that's fine because these objects are immutable enough (referent can
4303       // only be cleared) and hence the only referents they can have are intra-space.
4304       table->UpdateAndMarkReferences(this);
4305     } else {
4306       // No mod-union table, scan all dirty/aged cards in the corresponding
4307       // card-table. This can only occur for app images.
4308       card_table->Scan</*kClearCard*/ false>(space->GetMarkBitmap(),
4309                                              space->Begin(),
4310                                              space->End(),
4311                                              ScanObjectVisitor(this),
4312                                              gc::accounting::CardTable::kCardAged);
4313     }
4314   }
4315 }
4316 
ScanOldGenObjects()4317 void MarkCompact::ScanOldGenObjects() {
4318   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4319   accounting::CardTable* const card_table = heap_->GetCardTable();
4320   // Moving space
4321   card_table->Scan</*kClearCard=*/false>(moving_space_bitmap_,
4322                                          moving_space_begin_,
4323                                          old_gen_end_,
4324                                          ScanObjectVisitor(this),
4325                                          gc::accounting::CardTable::kCardAged2);
4326   ProcessMarkStack();
4327   // Non-moving space
4328   card_table->Scan</*kClearCard=*/false>(non_moving_space_bitmap_,
4329                                          non_moving_space_->Begin(),
4330                                          non_moving_space_->End(),
4331                                          ScanObjectVisitor(this),
4332                                          gc::accounting::CardTable::kCardAged2);
4333   ProcessMarkStack();
4334 }
4335 
MarkReachableObjects()4336 void MarkCompact::MarkReachableObjects() {
4337   UpdateAndMarkModUnion();
4338   // Recursively mark all the non-image bits set in the mark bitmap.
4339   ProcessMarkStack();
4340   if (young_gen_) {
4341     // For the object overlapping on the old-gen boundary, we need to visit it
4342     // to make sure that we don't miss the references in the mid-gen area, and
4343     // also update the corresponding liveness info.
4344     if (old_gen_end_ > moving_space_begin_) {
4345       uintptr_t old_gen_end = reinterpret_cast<uintptr_t>(old_gen_end_);
4346       mirror::Object* obj = moving_space_bitmap_->FindPrecedingObject(old_gen_end - kAlignment);
4347       if (obj != nullptr) {
4348         size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>();
4349         if (reinterpret_cast<uintptr_t>(obj) + RoundUp(obj_size, kAlignment) > old_gen_end) {
4350           ScanObject</*kUpdateLiveWords=*/true>(obj);
4351         }
4352       }
4353     }
4354     ScanOldGenObjects();
4355   }
4356 }
4357 
ScanDirtyObjects(bool paused,uint8_t minimum_age)4358 void MarkCompact::ScanDirtyObjects(bool paused, uint8_t minimum_age) {
4359   accounting::CardTable* card_table = heap_->GetCardTable();
4360   for (const auto& space : heap_->GetContinuousSpaces()) {
4361     const char* name = nullptr;
4362     switch (space->GetGcRetentionPolicy()) {
4363     case space::kGcRetentionPolicyNeverCollect:
4364       name = paused ? "(Paused)ScanGrayImmuneSpaceObjects" : "ScanGrayImmuneSpaceObjects";
4365       break;
4366     case space::kGcRetentionPolicyFullCollect:
4367       name = paused ? "(Paused)ScanGrayZygoteSpaceObjects" : "ScanGrayZygoteSpaceObjects";
4368       break;
4369     case space::kGcRetentionPolicyAlwaysCollect:
4370       DCHECK(space == bump_pointer_space_ || space == non_moving_space_);
4371       name = paused ? "(Paused)ScanGrayAllocSpaceObjects" : "ScanGrayAllocSpaceObjects";
4372       break;
4373     }
4374     TimingLogger::ScopedTiming t(name, GetTimings());
4375     if (paused && use_generational_ &&
4376         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
4377       DCHECK_EQ(minimum_age, accounting::CardTable::kCardDirty);
4378       auto mod_visitor = [](uint8_t* card, uint8_t cur_val) {
4379         DCHECK_EQ(cur_val, accounting::CardTable::kCardDirty);
4380         *card = accounting::CardTable::kCardAged;
4381       };
4382 
4383       card_table->Scan</*kClearCard=*/false>(space->GetMarkBitmap(),
4384                                              space->Begin(),
4385                                              space->End(),
4386                                              ScanObjectVisitor(this),
4387                                              mod_visitor,
4388                                              minimum_age);
4389     } else {
4390       card_table->Scan</*kClearCard=*/false>(space->GetMarkBitmap(),
4391                                              space->Begin(),
4392                                              space->End(),
4393                                              ScanObjectVisitor(this),
4394                                              minimum_age);
4395     }
4396     ProcessMarkStack();
4397   }
4398 }
4399 
RecursiveMarkDirtyObjects(bool paused,uint8_t minimum_age)4400 void MarkCompact::RecursiveMarkDirtyObjects(bool paused, uint8_t minimum_age) {
4401   ScanDirtyObjects(paused, minimum_age);
4402   CHECK(mark_stack_->IsEmpty());
4403 }
4404 
MarkRoots(VisitRootFlags flags)4405 void MarkCompact::MarkRoots(VisitRootFlags flags) {
4406   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4407   Runtime* runtime = Runtime::Current();
4408   // Make sure that the checkpoint which collects the stack roots is the first
4409   // one capturning GC-roots. As this one is supposed to find the address
4410   // everything allocated after that (during this marking phase) will be
4411   // considered 'marked'.
4412   MarkRootsCheckpoint(thread_running_gc_, runtime);
4413   MarkNonThreadRoots(runtime);
4414   MarkConcurrentRoots(flags, runtime);
4415 }
4416 
PreCleanCards()4417 void MarkCompact::PreCleanCards() {
4418   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4419   CHECK(!Locks::mutator_lock_->IsExclusiveHeld(thread_running_gc_));
4420   // Age the card-table before thread stack scanning checkpoint in MarkRoots()
4421   // as it ensures that there are no in-progress write barriers which started
4422   // prior to aging the card-table.
4423   PrepareForMarking(/*pre_marking=*/false);
4424   MarkRoots(static_cast<VisitRootFlags>(kVisitRootFlagClearRootLog | kVisitRootFlagNewRoots));
4425   RecursiveMarkDirtyObjects(/*paused*/ false, accounting::CardTable::kCardDirty - 1);
4426 }
4427 
4428 // In a concurrent marking algorithm, if we are not using a write/read barrier, as
4429 // in this case, then we need a stop-the-world (STW) round in the end to mark
4430 // objects which were written into concurrently while concurrent marking was
4431 // performed.
4432 // In order to minimize the pause time, we could take one of the two approaches:
4433 // 1. Keep repeating concurrent marking of dirty cards until the time spent goes
4434 // below a threshold.
4435 // 2. Do two rounds concurrently and then attempt a paused one. If we figure
4436 // that it's taking too long, then resume mutators and retry.
4437 //
4438 // Given the non-trivial fixed overhead of running a round (card table and root
4439 // scan), it might be better to go with approach 2.
MarkingPhase()4440 void MarkCompact::MarkingPhase() {
4441   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4442   DCHECK_EQ(thread_running_gc_, Thread::Current());
4443   WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
4444   MaybeClampGcStructures();
4445   PrepareForMarking(/*pre_marking=*/true);
4446   MarkZygoteLargeObjects();
4447   MarkRoots(
4448         static_cast<VisitRootFlags>(kVisitRootFlagAllRoots | kVisitRootFlagStartLoggingNewRoots));
4449   MarkReachableObjects();
4450   // Pre-clean dirtied cards to reduce pauses.
4451   PreCleanCards();
4452 
4453   // Setup reference processing and forward soft references once before enabling
4454   // slow path (in MarkingPause)
4455   ReferenceProcessor* rp = GetHeap()->GetReferenceProcessor();
4456   bool clear_soft_references = GetCurrentIteration()->GetClearSoftReferences();
4457   rp->Setup(thread_running_gc_, this, /*concurrent=*/ true, clear_soft_references);
4458   if (!clear_soft_references) {
4459     // Forward as many SoftReferences as possible before inhibiting reference access.
4460     rp->ForwardSoftReferences(GetTimings());
4461   }
4462 }
4463 
4464 class MarkCompact::RefFieldsVisitor {
4465  public:
RefFieldsVisitor(MarkCompact * const mark_compact)4466   ALWAYS_INLINE RefFieldsVisitor(MarkCompact* const mark_compact)
4467       : mark_compact_(mark_compact),
4468         young_gen_begin_(mark_compact->mid_gen_end_),
4469         young_gen_end_(mark_compact->moving_space_end_),
4470         dirty_card_(false),
4471         // Ideally we should only check for objects outside young-gen. However,
4472         // the boundary of young-gen can change later in PrepareForCompaction()
4473         // as we need the mid-gen-end to be page-aligned. Since most of the
4474         // objects don't have native-roots, it's not too costly to check all
4475         // objects being visited during marking.
4476         check_native_roots_to_young_gen_(mark_compact->use_generational_) {}
4477 
ShouldDirtyCard() const4478   bool ShouldDirtyCard() const { return dirty_card_; }
4479 
operator ()(mirror::Object * obj,MemberOffset offset,bool is_static) const4480   ALWAYS_INLINE void operator()(mirror::Object* obj,
4481                                 MemberOffset offset,
4482                                 [[maybe_unused]] bool is_static) const
4483       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4484     if (kCheckLocks) {
4485       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
4486       Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
4487     }
4488     mirror::Object* ref = obj->GetFieldObject<mirror::Object>(offset);
4489     mark_compact_->MarkObject(ref, obj, offset);
4490   }
4491 
operator ()(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref) const4492   void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) const ALWAYS_INLINE
4493       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4494     mark_compact_->DelayReferenceReferent(klass, ref);
4495   }
4496 
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const4497   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
4498       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4499     if (!root->IsNull()) {
4500       VisitRoot(root);
4501     }
4502   }
4503 
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const4504   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
4505       REQUIRES(Locks::heap_bitmap_lock_)
4506       REQUIRES_SHARED(Locks::mutator_lock_) {
4507     if (kCheckLocks) {
4508       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
4509       Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
4510     }
4511     mirror::Object* ref = root->AsMirrorPtr();
4512     mark_compact_->MarkObject(ref);
4513     if (check_native_roots_to_young_gen_) {
4514       dirty_card_ |= reinterpret_cast<uint8_t*>(ref) >= young_gen_begin_ &&
4515                      reinterpret_cast<uint8_t*>(ref) < young_gen_end_;
4516     }
4517   }
4518 
4519  private:
4520   MarkCompact* const mark_compact_;
4521   uint8_t* const young_gen_begin_;
4522   uint8_t* const young_gen_end_;
4523   mutable bool dirty_card_;
4524   const bool check_native_roots_to_young_gen_;
4525 };
4526 
4527 template <size_t kAlignment>
LiveBytesInBitmapWord(size_t chunk_idx) const4528 size_t MarkCompact::LiveWordsBitmap<kAlignment>::LiveBytesInBitmapWord(size_t chunk_idx) const {
4529   const size_t index = chunk_idx * kBitmapWordsPerVectorWord;
4530   size_t words = 0;
4531   for (uint32_t i = 0; i < kBitmapWordsPerVectorWord; i++) {
4532     words += POPCOUNT(Bitmap::Begin()[index + i]);
4533   }
4534   return words * kAlignment;
4535 }
4536 
UpdateLivenessInfo(mirror::Object * obj,size_t obj_size)4537 void MarkCompact::UpdateLivenessInfo(mirror::Object* obj, size_t obj_size) {
4538   DCHECK(obj != nullptr);
4539   DCHECK_EQ(obj_size, obj->SizeOf<kDefaultVerifyFlags>());
4540   uintptr_t obj_begin = reinterpret_cast<uintptr_t>(obj);
4541   UpdateClassAfterObjectMap(obj);
4542   size_t size = RoundUp(obj_size, kAlignment);
4543   uintptr_t bit_index = live_words_bitmap_->SetLiveWords(obj_begin, size);
4544   size_t chunk_idx =
4545       (obj_begin - reinterpret_cast<uintptr_t>(moving_space_begin_)) / kOffsetChunkSize;
4546   // Compute the bit-index within the chunk-info vector word.
4547   bit_index %= kBitsPerVectorWord;
4548   size_t first_chunk_portion = std::min(size, (kBitsPerVectorWord - bit_index) * kAlignment);
4549   chunk_info_vec_[chunk_idx] += first_chunk_portion;
4550   DCHECK_LE(chunk_info_vec_[chunk_idx], kOffsetChunkSize)
4551       << "first_chunk_portion:" << first_chunk_portion
4552       << " obj-size:" << RoundUp(obj_size, kAlignment);
4553   chunk_idx++;
4554   DCHECK_LE(first_chunk_portion, size);
4555   for (size -= first_chunk_portion; size > kOffsetChunkSize; size -= kOffsetChunkSize) {
4556     DCHECK_EQ(chunk_info_vec_[chunk_idx], 0u);
4557     chunk_info_vec_[chunk_idx++] = kOffsetChunkSize;
4558   }
4559   chunk_info_vec_[chunk_idx] += size;
4560   DCHECK_LE(chunk_info_vec_[chunk_idx], kOffsetChunkSize)
4561       << "size:" << size << " obj-size:" << RoundUp(obj_size, kAlignment);
4562 }
4563 
4564 template <bool kUpdateLiveWords>
ScanObject(mirror::Object * obj)4565 void MarkCompact::ScanObject(mirror::Object* obj) {
4566   mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
4567   // TODO(lokeshgidra): Remove the following condition once b/373609505 is fixed.
4568   if (UNLIKELY(klass == nullptr)) {
4569     // It was seen in ConcurrentCopying GC that after a small wait when we reload
4570     // the class pointer, it turns out to be a valid class object. So as a workaround,
4571     // we can continue execution and log an error that this happened.
4572     for (size_t i = 0; i < 1000; i++) {
4573       // Wait for 1ms at a time. Don't wait for more than 1 second in total.
4574       usleep(1000);
4575       klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
4576       if (klass != nullptr) {
4577         break;
4578       }
4579     }
4580     if (klass == nullptr) {
4581       // It must be heap corruption.
4582       LOG(FATAL_WITHOUT_ABORT) << "klass pointer for obj: " << obj << " found to be null."
4583                                << " black_dense_end: " << static_cast<void*>(black_dense_end_)
4584                                << " mid_gen_end: " << static_cast<void*>(mid_gen_end_)
4585                                << " prev_post_compact_end: " << prev_post_compact_end_
4586                                << " prev_black_allocations_begin: " << prev_black_allocations_begin_
4587                                << " prev_black_dense_end: " << prev_black_dense_end_
4588                                << " prev_gc_young: " << prev_gc_young_
4589                                << " prev_gc_performed_compaction: "
4590                                << prev_gc_performed_compaction_;
4591       heap_->GetVerification()->LogHeapCorruption(
4592           obj, mirror::Object::ClassOffset(), klass, /*fatal=*/true);
4593     }
4594   }
4595   // The size of `obj` is used both here (to update `bytes_scanned_`) and in
4596   // `UpdateLivenessInfo`. As fetching this value can be expensive, do it once
4597   // here and pass that information to `UpdateLivenessInfo`.
4598   size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>();
4599   bytes_scanned_ += obj_size;
4600 
4601   RefFieldsVisitor visitor(this);
4602   DCHECK(IsMarked(obj)) << "Scanning marked object " << obj << "\n" << heap_->DumpSpaces();
4603   if (kUpdateLiveWords && HasAddress(obj)) {
4604     UpdateLivenessInfo(obj, obj_size);
4605     freed_objects_--;
4606   }
4607   obj->VisitReferences(visitor, visitor);
4608   // old-gen cards for objects containing references to mid-gen needs to be kept
4609   // dirty for re-scan in the next GC cycle. We take care of that majorly during
4610   // compaction-phase as that enables us to implicitly take care of
4611   // black-allocated objects as well. Unfortunately, since we don't visit
4612   // native-roots during compaction, that has to be captured during marking.
4613   //
4614   // Note that we can't dirty the cards right away because then we will wrongly
4615   // age them during re-scan of this marking-phase, and thereby may loose them
4616   // by the end of the GC cycle.
4617   if (visitor.ShouldDirtyCard()) {
4618     dirty_cards_later_vec_.push_back(obj);
4619   }
4620 }
4621 
4622 // Scan anything that's on the mark stack.
ProcessMarkStack()4623 void MarkCompact::ProcessMarkStack() {
4624   // TODO: eventually get rid of this as we now call this function quite a few times.
4625   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4626   // TODO: try prefetch like in CMS
4627   while (!mark_stack_->IsEmpty()) {
4628     mirror::Object* obj = mark_stack_->PopBack();
4629     DCHECK(obj != nullptr);
4630     ScanObject</*kUpdateLiveWords*/ true>(obj);
4631   }
4632 }
4633 
ExpandMarkStack()4634 void MarkCompact::ExpandMarkStack() {
4635   const size_t new_size = mark_stack_->Capacity() * 2;
4636   std::vector<StackReference<mirror::Object>> temp(mark_stack_->Begin(),
4637                                                    mark_stack_->End());
4638   mark_stack_->Resize(new_size);
4639   for (auto& ref : temp) {
4640     mark_stack_->PushBack(ref.AsMirrorPtr());
4641   }
4642   DCHECK(!mark_stack_->IsFull());
4643 }
4644 
PushOnMarkStack(mirror::Object * obj)4645 inline void MarkCompact::PushOnMarkStack(mirror::Object* obj) {
4646   if (UNLIKELY(mark_stack_->IsFull())) {
4647     ExpandMarkStack();
4648   }
4649   mark_stack_->PushBack(obj);
4650 }
4651 
MarkObjectNonNull(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4652 inline void MarkCompact::MarkObjectNonNull(mirror::Object* obj,
4653                                            mirror::Object* holder,
4654                                            MemberOffset offset) {
4655   DCHECK(obj != nullptr);
4656   if (MarkObjectNonNullNoPush</*kParallel*/false>(obj, holder, offset)) {
4657     PushOnMarkStack(obj);
4658   }
4659 }
4660 
4661 template <bool kParallel>
MarkObjectNonNullNoPush(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4662 inline bool MarkCompact::MarkObjectNonNullNoPush(mirror::Object* obj,
4663                                                  mirror::Object* holder,
4664                                                  MemberOffset offset) {
4665   // We expect most of the referenes to be in bump-pointer space, so try that
4666   // first to keep the cost of this function minimal.
4667   if (LIKELY(HasAddress(obj))) {
4668     // If obj is in old-gen (during young-gc) then we shouldn't add it to
4669     // mark-stack to limit marking to young generation.
4670     if (young_gen_ && reinterpret_cast<uint8_t*>(obj) < old_gen_end_) {
4671       DCHECK(moving_space_bitmap_->Test(obj));
4672       return false;
4673     }
4674     return kParallel ? !moving_space_bitmap_->AtomicTestAndSet(obj)
4675                      : !moving_space_bitmap_->Set(obj);
4676   } else if (non_moving_space_bitmap_->HasAddress(obj)) {
4677     return kParallel ? !non_moving_space_bitmap_->AtomicTestAndSet(obj)
4678                      : !non_moving_space_bitmap_->Set(obj);
4679   } else if (immune_spaces_.ContainsObject(obj)) {
4680     DCHECK(IsMarked(obj) != nullptr);
4681     return false;
4682   } else {
4683     // Must be a large-object space, otherwise it's a case of heap corruption.
4684     if (!IsAlignedParam(obj, space::LargeObjectSpace::ObjectAlignment())) {
4685       // Objects in large-object space are aligned to the large-object alignment.
4686       // So if we have an object which doesn't belong to any space and is not
4687       // page-aligned as well, then it's memory corruption.
4688       // TODO: implement protect/unprotect in bump-pointer space.
4689       heap_->GetVerification()->LogHeapCorruption(holder, offset, obj, /*fatal*/ true);
4690     }
4691     DCHECK_NE(heap_->GetLargeObjectsSpace(), nullptr)
4692         << "ref=" << obj
4693         << " doesn't belong to any of the spaces and large object space doesn't exist";
4694     accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
4695     DCHECK(los_bitmap->HasAddress(obj));
4696     if (kParallel) {
4697       los_bitmap->AtomicTestAndSet(obj);
4698     } else {
4699       los_bitmap->Set(obj);
4700     }
4701     // We only have primitive arrays in large object space. So there is no
4702     // reason to push into mark-stack.
4703     DCHECK(obj->IsString() || (obj->IsArrayInstance() && !obj->IsObjectArray()));
4704     return false;
4705   }
4706 }
4707 
MarkObject(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4708 inline void MarkCompact::MarkObject(mirror::Object* obj,
4709                                     mirror::Object* holder,
4710                                     MemberOffset offset) {
4711   if (obj != nullptr) {
4712     MarkObjectNonNull(obj, holder, offset);
4713   }
4714 }
4715 
MarkObject(mirror::Object * obj)4716 mirror::Object* MarkCompact::MarkObject(mirror::Object* obj) {
4717   MarkObject(obj, nullptr, MemberOffset(0));
4718   return obj;
4719 }
4720 
MarkHeapReference(mirror::HeapReference<mirror::Object> * obj,bool do_atomic_update)4721 void MarkCompact::MarkHeapReference(mirror::HeapReference<mirror::Object>* obj,
4722                                     [[maybe_unused]] bool do_atomic_update) {
4723   MarkObject(obj->AsMirrorPtr(), nullptr, MemberOffset(0));
4724 }
4725 
VisitRoots(mirror::Object *** roots,size_t count,const RootInfo & info)4726 void MarkCompact::VisitRoots(mirror::Object*** roots,
4727                              size_t count,
4728                              const RootInfo& info) {
4729   if (compacting_) {
4730     uint8_t* moving_space_begin = black_dense_end_;
4731     uint8_t* moving_space_end = moving_space_end_;
4732     for (size_t i = 0; i < count; ++i) {
4733       UpdateRoot(roots[i], moving_space_begin, moving_space_end, info);
4734     }
4735   } else {
4736     for (size_t i = 0; i < count; ++i) {
4737       MarkObjectNonNull(*roots[i]);
4738     }
4739   }
4740 }
4741 
VisitRoots(mirror::CompressedReference<mirror::Object> ** roots,size_t count,const RootInfo & info)4742 void MarkCompact::VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
4743                              size_t count,
4744                              const RootInfo& info) {
4745   // TODO: do we need to check if the root is null or not?
4746   if (compacting_) {
4747     uint8_t* moving_space_begin = black_dense_end_;
4748     uint8_t* moving_space_end = moving_space_end_;
4749     for (size_t i = 0; i < count; ++i) {
4750       UpdateRoot(roots[i], moving_space_begin, moving_space_end, info);
4751     }
4752   } else {
4753     for (size_t i = 0; i < count; ++i) {
4754       MarkObjectNonNull(roots[i]->AsMirrorPtr());
4755     }
4756   }
4757 }
4758 
IsMarked(mirror::Object * obj)4759 mirror::Object* MarkCompact::IsMarked(mirror::Object* obj) {
4760   if (HasAddress(obj)) {
4761     const bool is_black = reinterpret_cast<uint8_t*>(obj) >= black_allocations_begin_;
4762     if (compacting_) {
4763       if (is_black) {
4764         return PostCompactBlackObjAddr(obj);
4765       } else if (moving_space_bitmap_->Test(obj)) {
4766         if (reinterpret_cast<uint8_t*>(obj) < black_dense_end_) {
4767           return obj;
4768         } else {
4769           return PostCompactOldObjAddr(obj);
4770         }
4771       } else {
4772         return nullptr;
4773       }
4774     }
4775     return (is_black || moving_space_bitmap_->Test(obj)) ? obj : nullptr;
4776   } else if (non_moving_space_bitmap_->HasAddress(obj)) {
4777     if (non_moving_space_bitmap_->Test(obj)) {
4778       return obj;
4779     }
4780   } else if (immune_spaces_.ContainsObject(obj)) {
4781     return obj;
4782   } else {
4783     DCHECK(heap_->GetLargeObjectsSpace())
4784         << "ref=" << obj
4785         << " doesn't belong to any of the spaces and large object space doesn't exist";
4786     accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
4787     if (los_bitmap->HasAddress(obj)) {
4788       DCHECK(IsAlignedParam(obj, space::LargeObjectSpace::ObjectAlignment()));
4789       if (los_bitmap->Test(obj)) {
4790         return obj;
4791       }
4792     } else {
4793       // The given obj is not in any of the known spaces, so return null. This could
4794       // happen for instance in interpreter caches wherein a concurrent updation
4795       // to the cache could result in obj being a non-reference. This is
4796       // tolerable because SweepInterpreterCaches only updates if the given
4797       // object has moved, which can't be the case for the non-reference.
4798       return nullptr;
4799     }
4800   }
4801   return marking_done_ && IsOnAllocStack(obj) ? obj : nullptr;
4802 }
4803 
IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object> * obj,bool do_atomic_update)4804 bool MarkCompact::IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj,
4805                                               [[maybe_unused]] bool do_atomic_update) {
4806   mirror::Object* ref = obj->AsMirrorPtr();
4807   if (ref == nullptr) {
4808     return true;
4809   }
4810   return IsMarked(ref);
4811 }
4812 
4813 // Process the 'referent' field in a java.lang.ref.Reference. If the referent
4814 // has not yet been marked, put it on the appropriate list in the heap for later
4815 // processing.
DelayReferenceReferent(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref)4816 void MarkCompact::DelayReferenceReferent(ObjPtr<mirror::Class> klass,
4817                                          ObjPtr<mirror::Reference> ref) {
4818   heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, this);
4819 }
4820 
4821 template <typename Visitor>
4822 class MarkCompact::VisitReferencesVisitor {
4823  public:
VisitReferencesVisitor(Visitor visitor)4824   explicit VisitReferencesVisitor(Visitor visitor) : visitor_(visitor) {}
4825 
operator ()(mirror::Object * obj,MemberOffset offset,bool is_static) const4826   ALWAYS_INLINE void operator()(mirror::Object* obj,
4827                                 MemberOffset offset,
4828                                 [[maybe_unused]] bool is_static) const
4829       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4830     visitor_(obj->GetFieldObject<mirror::Object>(offset));
4831   }
4832 
operator ()(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref) const4833   ALWAYS_INLINE void operator()([[maybe_unused]] ObjPtr<mirror::Class> klass,
4834                                 ObjPtr<mirror::Reference> ref) const
4835       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4836     visitor_(ref.Ptr());
4837   }
4838 
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const4839   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
4840       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4841     if (!root->IsNull()) {
4842       VisitRoot(root);
4843     }
4844   }
4845 
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const4846   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
4847       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4848     visitor_(root->AsMirrorPtr());
4849   }
4850 
4851  private:
4852   Visitor visitor_;
4853 };
4854 
VerifyNoMissingCardMarks()4855 void MarkCompact::VerifyNoMissingCardMarks() {
4856   if (kVerifyNoMissingCardMarks) {
4857     accounting::CardTable* card_table = heap_->GetCardTable();
4858     auto obj_visitor = [&](mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) {
4859       bool found = false;
4860       VisitReferencesVisitor visitor(
4861           [begin = old_gen_end_, end = moving_space_end_, &found](mirror::Object* ref) {
4862             found |= ref >= reinterpret_cast<mirror::Object*>(begin) &&
4863                      ref < reinterpret_cast<mirror::Object*>(end);
4864           });
4865       obj->VisitReferences</*kVisitNativeRoots=*/true>(visitor, visitor);
4866       if (found) {
4867         size_t obj_size = RoundUp(obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
4868         if (!card_table->IsDirty(obj) &&
4869             reinterpret_cast<uint8_t*>(obj) + obj_size <= old_gen_end_) {
4870           std::ostringstream oss;
4871           obj->DumpReferences</*kDumpNativeRoots=*/true>(oss, /*dump_type_of=*/true);
4872           LOG(FATAL_WITHOUT_ABORT)
4873               << "Object " << obj << " (" << obj->PrettyTypeOf()
4874               << ") has references to mid-gen/young-gen:"
4875               << "\n obj-size = " << obj_size
4876               << "\n old-gen-end = " << static_cast<void*>(old_gen_end_)
4877               << "\n mid-gen-end = " << static_cast<void*>(mid_gen_end_) << "\n references =\n"
4878               << oss.str();
4879           heap_->GetVerification()->LogHeapCorruption(
4880               /*holder=*/nullptr, MemberOffset(0), obj, /*fatal=*/true);
4881         }
4882       }
4883     };
4884     moving_space_bitmap_->VisitMarkedRange(reinterpret_cast<uintptr_t>(moving_space_begin_),
4885                                            reinterpret_cast<uintptr_t>(old_gen_end_),
4886                                            obj_visitor);
4887   }
4888 }
4889 
VerifyPostGCObjects(bool performed_compaction,uint8_t * mark_bitmap_clear_end)4890 void MarkCompact::VerifyPostGCObjects(bool performed_compaction, uint8_t* mark_bitmap_clear_end) {
4891   if (kVerifyPostGCObjects) {
4892     mirror::Object* last_visited_obj = nullptr;
4893     auto obj_visitor =
4894         [&](mirror::Object* obj, bool verify_bitmap = false) REQUIRES_SHARED(Locks::mutator_lock_) {
4895           std::vector<mirror::Object*> invalid_refs;
4896           if (verify_bitmap && !moving_space_bitmap_->Test(obj)) {
4897             LOG(FATAL) << "Obj " << obj << " (" << obj->PrettyTypeOf()
4898                        << ") doesn't have mark-bit set"
4899                        << "\n prev-black-dense-end = " << static_cast<void*>(prev_black_dense_end_)
4900                        << "\n old-gen-end = " << static_cast<void*>(old_gen_end_)
4901                        << "\n mid-gen-end = " << static_cast<void*>(mid_gen_end_);
4902           }
4903           VisitReferencesVisitor visitor(
4904               [verification = heap_->GetVerification(), &invalid_refs](mirror::Object* ref)
4905                   REQUIRES_SHARED(Locks::mutator_lock_) {
4906                     if (ref != nullptr && !verification->IsValidObject(ref)) {
4907                       invalid_refs.push_back(ref);
4908                     }
4909                   });
4910           obj->VisitReferences</*kVisitNativeRoots=*/true>(visitor, visitor);
4911           if (!invalid_refs.empty()) {
4912             std::ostringstream oss;
4913             for (mirror::Object* ref : invalid_refs) {
4914               oss << ref << " ";
4915             }
4916             LOG(FATAL_WITHOUT_ABORT)
4917                 << "Object " << obj << " (" << obj->PrettyTypeOf() << ") has invalid references:\n"
4918                 << oss.str() << "\ncard = " << static_cast<int>(heap_->GetCardTable()->GetCard(obj))
4919                 << "\n prev-black-dense-end = " << static_cast<void*>(prev_black_dense_end_)
4920                 << "\n old-gen-end = " << static_cast<void*>(old_gen_end_)
4921                 << "\n mid-gen-end = " << static_cast<void*>(mid_gen_end_)
4922                 << "\n black-allocations-begin = " << static_cast<void*>(black_allocations_begin_);
4923 
4924             // Calling PrettyTypeOf() on a stale reference mostly results in segfault.
4925             oss.str("");
4926             obj->DumpReferences</*kDumpNativeRoots=*/true>(oss, /*dump_type_of=*/false);
4927             LOG(FATAL_WITHOUT_ABORT) << "\n references =\n" << oss.str();
4928 
4929             heap_->GetVerification()->LogHeapCorruption(
4930                 /*holder=*/nullptr, MemberOffset(0), obj, /*fatal=*/true);
4931           }
4932           last_visited_obj = obj;
4933         };
4934     non_moving_space_bitmap_->VisitAllMarked(obj_visitor);
4935     last_visited_obj = nullptr;
4936     // We should verify all objects that have survived, which means old and mid-gen
4937     // Objects that were promoted to old-gen and mid-gen in this GC cycle are tightly
4938     // packed, except if compaction was not performed. So we use object size to walk
4939     // the heap and also verify that the mark-bit is set in the tightly packed portion.
4940     moving_space_bitmap_->VisitMarkedRange(
4941         reinterpret_cast<uintptr_t>(moving_space_begin_),
4942         reinterpret_cast<uintptr_t>(performed_compaction ? prev_black_dense_end_
4943                                                          : mark_bitmap_clear_end),
4944         obj_visitor);
4945     if (performed_compaction) {
4946       mirror::Object* obj = last_visited_obj;
4947       if (obj == nullptr || AlignUp(reinterpret_cast<uint8_t*>(obj) + obj->SizeOf(), kAlignment) <
4948                                 prev_black_dense_end_) {
4949         obj = reinterpret_cast<mirror::Object*>(prev_black_dense_end_);
4950       }
4951       while (reinterpret_cast<uint8_t*>(obj) < mid_gen_end_ && obj->GetClass() != nullptr) {
4952         // Objects in mid-gen will not have their corresponding mark-bits set.
4953         obj_visitor(obj, reinterpret_cast<void*>(obj) < black_dense_end_);
4954         uintptr_t next = reinterpret_cast<uintptr_t>(obj) + obj->SizeOf();
4955         obj = reinterpret_cast<mirror::Object*>(RoundUp(next, kAlignment));
4956       }
4957     }
4958   }
4959 }
4960 
FinishPhase(bool performed_compaction)4961 void MarkCompact::FinishPhase(bool performed_compaction) {
4962   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4963   GetCurrentIteration()->SetScannedBytes(bytes_scanned_);
4964   bool is_zygote = Runtime::Current()->IsZygote();
4965   compacting_ = false;
4966   marking_done_ = false;
4967   uint8_t* mark_bitmap_clear_end = black_dense_end_;
4968   LOG(DEBUG) << "ART-GC black_dense_end:" << static_cast<void*>(black_dense_end_)
4969              << " mid_gen_end:" << static_cast<void*>(mid_gen_end_)
4970              << " post_compact_end:" << static_cast<void*>(post_compact_end_)
4971              << " black_allocations_begin:" << static_cast<void*>(black_allocations_begin_)
4972              << " young:" << young_gen_ << " performed_compaction:" << performed_compaction;
4973 
4974   // Retain values of some fields for logging in next GC cycle, in case there is
4975   // a memory corruption detected.
4976   prev_black_allocations_begin_ = static_cast<void*>(black_allocations_begin_);
4977   prev_black_dense_end_ = static_cast<void*>(black_dense_end_);
4978   prev_post_compact_end_ = static_cast<void*>(post_compact_end_);
4979   prev_gc_young_ = young_gen_;
4980   prev_gc_performed_compaction_ = performed_compaction;
4981 
4982   // Whether compaction is performend or not, we always set post_compact_end_
4983   // before reaching here.
4984   CHECK_NE(post_compact_end_, nullptr);
4985   if (use_generational_) {
4986     {
4987       ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_);
4988       // We need to retain and update class-after-object map for old-gen as
4989       // that won't be created in next young-gc.
4990       // Jump to the first class which is getting promoted to old-gen. Since
4991       // it is not compacted, references into old-gen don't need to be udated.
4992       // All pairs in mid-gen will be updated with post-compact addresses and
4993       // retained, as mid-gen is getting consumed into old-gen now. All pairs
4994       // after mid-gen will be erased as they are not required in next GC cycle.
4995       auto iter = class_after_obj_map_.lower_bound(
4996           ObjReference::FromMirrorPtr(reinterpret_cast<mirror::Object*>(old_gen_end_)));
4997       while (iter != class_after_obj_map_.end()) {
4998         mirror::Object* klass = iter->first.AsMirrorPtr();
4999         mirror::Object* obj = iter->second.AsMirrorPtr();
5000         DCHECK_GT(klass, obj);
5001         // Black allocations begin after marking-pause. Therefore, we cannot
5002         // have a situation wherein class is allocated after the pause while its
5003         // object is before.
5004         if (reinterpret_cast<uint8_t*>(klass) >= black_allocations_begin_) {
5005           for (auto it = iter; it != class_after_obj_map_.end(); it++) {
5006             DCHECK_GE(reinterpret_cast<uint8_t*>(it->second.AsMirrorPtr()),
5007                       black_allocations_begin_);
5008           }
5009           class_after_obj_map_.erase(iter, class_after_obj_map_.end());
5010           break;
5011         }
5012 
5013         DCHECK(moving_space_bitmap_->Test(klass));
5014         DCHECK(moving_space_bitmap_->Test(obj));
5015         // As 'mid_gen_end_' is where our old-gen will end now, compute compacted
5016         // addresses of <class, object> for comparisons and updating in the map.
5017         mirror::Object* compacted_klass = klass;
5018         mirror::Object* compacted_obj = obj;
5019         if (performed_compaction) {
5020           compacted_klass = PostCompactAddress(klass, old_gen_end_, moving_space_end_);
5021           compacted_obj = PostCompactAddress(obj, old_gen_end_, moving_space_end_);
5022           DCHECK_GT(compacted_klass, compacted_obj);
5023         }
5024         if (reinterpret_cast<uint8_t*>(compacted_obj) >= mid_gen_end_) {
5025           iter = class_after_obj_map_.erase(iter);
5026           continue;
5027         } else if (mid_to_old_promo_bit_vec_.get() != nullptr) {
5028           if (reinterpret_cast<uint8_t*>(compacted_klass) >= old_gen_end_) {
5029             DCHECK(mid_to_old_promo_bit_vec_->IsBitSet(
5030                 (reinterpret_cast<uint8_t*>(compacted_obj) - old_gen_end_) / kAlignment));
5031           }
5032           if (reinterpret_cast<uint8_t*>(compacted_klass) < mid_gen_end_) {
5033             DCHECK(mid_to_old_promo_bit_vec_->IsBitSet(
5034                 (reinterpret_cast<uint8_t*>(compacted_klass) - old_gen_end_) / kAlignment));
5035           }
5036         }
5037         if (performed_compaction) {
5038           auto nh = class_after_obj_map_.extract(iter++);
5039           nh.key() = ObjReference::FromMirrorPtr(compacted_klass);
5040           nh.mapped() = ObjReference::FromMirrorPtr(compacted_obj);
5041           auto success = class_after_obj_map_.insert(iter, std::move(nh));
5042           CHECK_EQ(success->first.AsMirrorPtr(), compacted_klass);
5043         } else {
5044           iter++;
5045         }
5046       }
5047 
5048       // Dirty the cards for objects captured from native-roots during marking-phase.
5049       accounting::CardTable* card_table = heap_->GetCardTable();
5050       for (auto obj : dirty_cards_later_vec_) {
5051         // Only moving and non-moving spaces are relevant as the remaining
5052         // spaces are all immune-spaces which anyways use card-table.
5053         if (HasAddress(obj)) {
5054           // Objects in young-gen that refer to other young-gen objects don't
5055           // need to be tracked.
5056           // The vector contains pre-compact object references whereas
5057           // 'mid_gen_end_' is post-compact boundary. So compare against
5058           // post-compact object reference.
5059           mirror::Object* compacted_obj =
5060               performed_compaction ? PostCompactAddress(obj, black_dense_end_, moving_space_end_)
5061                                    : obj;
5062           if (reinterpret_cast<uint8_t*>(compacted_obj) < mid_gen_end_) {
5063             card_table->MarkCard(compacted_obj);
5064           }
5065         } else if (non_moving_space_->HasAddress(obj)) {
5066           card_table->MarkCard(obj);
5067         }
5068       }
5069     }
5070     dirty_cards_later_vec_.clear();
5071 
5072     // Copy mid-gen bitmap into moving-space's mark-bitmap
5073     if (mid_to_old_promo_bit_vec_.get() != nullptr) {
5074       DCHECK_EQ(mid_to_old_promo_bit_vec_->GetBitSizeOf(),
5075                 (mid_gen_end_ - old_gen_end_) / kObjectAlignment);
5076       uint32_t* bitmap_begin = reinterpret_cast<uint32_t*>(moving_space_bitmap_->Begin());
5077       DCHECK(IsAligned<kObjectAlignment * BitVector::kWordBits>(gPageSize));
5078       size_t index = (old_gen_end_ - moving_space_begin_) / kObjectAlignment / BitVector::kWordBits;
5079       mid_to_old_promo_bit_vec_->CopyTo(&bitmap_begin[index],
5080                                         mid_to_old_promo_bit_vec_->GetSizeOf());
5081       mid_to_old_promo_bit_vec_.reset(nullptr);
5082     } else if (!performed_compaction) {
5083       // We typically only retain the mark-bitmap for the old-generation as the
5084       // objects following it are expected to be contiguous. However, when
5085       // compaction is not performed, we may have decided to tolerate few holes
5086       // here and there. So we have to retain the bitmap for the entire
5087       // 'compacted' portion of the heap, which is up to mid-gen-end.
5088       DCHECK_LE(old_gen_end_, post_compact_end_);
5089       mark_bitmap_clear_end = post_compact_end_;
5090     }
5091     // Promote all mid-gen objects to old-gen and young-gen objects to mid-gen
5092     // for next GC cycle.
5093     old_gen_end_ = mid_gen_end_;
5094     mid_gen_end_ = post_compact_end_;
5095     post_compact_end_ = nullptr;
5096 
5097     // Verify (in debug builds) after updating mark-bitmap if class-after-object
5098     // map is correct or not.
5099     for (auto iter : class_after_obj_map_) {
5100       DCHECK(moving_space_bitmap_->Test(iter.second.AsMirrorPtr()));
5101       mirror::Object* klass = iter.first.AsMirrorPtr();
5102       DCHECK_IMPLIES(!moving_space_bitmap_->Test(klass),
5103                      reinterpret_cast<uint8_t*>(klass) >= old_gen_end_);
5104     }
5105   } else {
5106     class_after_obj_map_.clear();
5107     if (!performed_compaction) {
5108       DCHECK_LE(old_gen_end_, post_compact_end_);
5109       mark_bitmap_clear_end = post_compact_end_;
5110     }
5111   }
5112   // Black-dense region, which requires bitmap for object-walk, could be larger
5113   // than old-gen. Therefore, until next GC retain the bitmap for entire
5114   // black-dense region. At the beginning of next cycle, we clear [old_gen_end_,
5115   // moving_space_end_).
5116   mark_bitmap_clear_end = std::max(black_dense_end_, mark_bitmap_clear_end);
5117   DCHECK_ALIGNED_PARAM(mark_bitmap_clear_end, gPageSize);
5118   if (moving_space_begin_ == mark_bitmap_clear_end) {
5119     moving_space_bitmap_->Clear();
5120   } else {
5121     DCHECK_LT(moving_space_begin_, mark_bitmap_clear_end);
5122     DCHECK_LE(mark_bitmap_clear_end, moving_space_end_);
5123     moving_space_bitmap_->ClearRange(reinterpret_cast<mirror::Object*>(mark_bitmap_clear_end),
5124                                      reinterpret_cast<mirror::Object*>(moving_space_end_));
5125   }
5126   bump_pointer_space_->SetBlackDenseRegionSize(mark_bitmap_clear_end - moving_space_begin_);
5127 
5128   if (UNLIKELY(is_zygote && IsValidFd(uffd_))) {
5129     // This unregisters all ranges as a side-effect.
5130     close(uffd_);
5131     uffd_ = kFdUnused;
5132     uffd_initialized_ = false;
5133   }
5134   CHECK(mark_stack_->IsEmpty());  // Ensure that the mark stack is empty.
5135   mark_stack_->Reset();
5136   ZeroAndReleaseMemory(compaction_buffers_map_.Begin(), compaction_buffers_map_.Size());
5137   info_map_.MadviseDontNeedAndZero();
5138   live_words_bitmap_->ClearBitmap();
5139   DCHECK_EQ(thread_running_gc_, Thread::Current());
5140   if (kIsDebugBuild) {
5141     MutexLock mu(thread_running_gc_, lock_);
5142     if (updated_roots_.get() != nullptr) {
5143       updated_roots_->clear();
5144     }
5145   }
5146   linear_alloc_arenas_.clear();
5147   {
5148     ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_);
5149     WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_);
5150     heap_->ClearMarkedObjects();
5151     if (use_generational_) {
5152       if (performed_compaction) {
5153         // Clear the bits set temporarily for black allocations in non-moving
5154         // space in UpdateNonMovingSpaceBlackAllocations(), which is called when
5155         // we perform compaction, so that objects are considered for GC in next cycle.
5156         accounting::ObjectStack* stack = heap_->GetAllocationStack();
5157         const StackReference<mirror::Object>* limit = stack->End();
5158         for (StackReference<mirror::Object>* it = stack->Begin(); it != limit; ++it) {
5159           mirror::Object* obj = it->AsMirrorPtr();
5160           if (obj != nullptr && non_moving_space_bitmap_->HasAddress(obj)) {
5161             non_moving_space_bitmap_->Clear(obj);
5162           }
5163         }
5164       } else {
5165         // Since we didn't perform compaction, we need to identify old objects
5166         // referring to the mid-gen.
5167         auto obj_visitor = [this, card_table = heap_->GetCardTable()](mirror::Object* obj) {
5168           bool found = false;
5169           VisitReferencesVisitor visitor(
5170               [begin = old_gen_end_, end = mid_gen_end_, &found](mirror::Object* ref) {
5171                 found |= ref >= reinterpret_cast<mirror::Object*>(begin) &&
5172                          ref < reinterpret_cast<mirror::Object*>(end);
5173               });
5174           uint8_t* card = card_table->CardFromAddr(obj);
5175           if (*card == accounting::CardTable::kCardDirty) {
5176             return;
5177           }
5178           // Native-roots are captured during marking and the corresponding cards are already
5179           // dirtied above.
5180           obj->VisitReferences</*kVisitNativeRoots=*/false>(visitor, visitor);
5181           if (found) {
5182             *card = accounting::CardTable::kCardDirty;
5183           }
5184         };
5185         moving_space_bitmap_->VisitMarkedRange(reinterpret_cast<uintptr_t>(moving_space_begin_),
5186                                                reinterpret_cast<uintptr_t>(old_gen_end_),
5187                                                obj_visitor);
5188         non_moving_space_bitmap_->VisitAllMarked(obj_visitor);
5189       }
5190     }
5191   }
5192   GcVisitedArenaPool* arena_pool =
5193       static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
5194   arena_pool->DeleteUnusedArenas();
5195 
5196   if (kVerifyNoMissingCardMarks && use_generational_) {
5197     // This must be done in a pause as otherwise verification between mutation
5198     // and card-dirtying by a mutator will spuriosely fail.
5199     ScopedPause pause(this);
5200     WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
5201     VerifyNoMissingCardMarks();
5202   }
5203   if (kVerifyPostGCObjects && use_generational_) {
5204     ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_);
5205     WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_);
5206     VerifyPostGCObjects(performed_compaction, mark_bitmap_clear_end);
5207   }
5208 }
5209 
5210 }  // namespace collector
5211 }  // namespace gc
5212 }  // namespace art
5213