• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <fcntl.h>
18 // Glibc v2.19 doesn't include these in fcntl.h so host builds will fail without.
19 #if !defined(FALLOC_FL_PUNCH_HOLE) || !defined(FALLOC_FL_KEEP_SIZE)
20 #include <linux/falloc.h>
21 #endif
22 #include <linux/userfaultfd.h>
23 #include <poll.h>
24 #include <sys/ioctl.h>
25 #include <sys/mman.h>
26 #include <sys/resource.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29 
30 #include <fstream>
31 #include <numeric>
32 #include <string>
33 #include <string_view>
34 #include <vector>
35 
36 #include "android-base/file.h"
37 #include "android-base/parsebool.h"
38 #include "android-base/parseint.h"
39 #include "android-base/properties.h"
40 #include "android-base/strings.h"
41 #include "base/file_utils.h"
42 #include "base/memfd.h"
43 #include "base/quasi_atomic.h"
44 #include "base/systrace.h"
45 #include "base/utils.h"
46 #include "gc/accounting/mod_union_table-inl.h"
47 #include "gc/collector_type.h"
48 #include "gc/reference_processor.h"
49 #include "gc/space/bump_pointer_space.h"
50 #include "gc/task_processor.h"
51 #include "gc/verification-inl.h"
52 #include "jit/jit_code_cache.h"
53 #include "mark_compact-inl.h"
54 #include "mirror/object-refvisitor-inl.h"
55 #include "read_barrier_config.h"
56 #include "scoped_thread_state_change-inl.h"
57 #include "sigchain.h"
58 #include "thread_list.h"
59 
60 #ifdef ART_TARGET_ANDROID
61 #include "com_android_art.h"
62 #endif
63 
64 #ifndef __BIONIC__
65 #ifndef MREMAP_DONTUNMAP
66 #define MREMAP_DONTUNMAP 4
67 #endif
68 #ifndef MAP_FIXED_NOREPLACE
69 #define MAP_FIXED_NOREPLACE 0x100000
70 #endif
71 #ifndef __NR_userfaultfd
72 #if defined(__x86_64__)
73 #define __NR_userfaultfd 323
74 #elif defined(__i386__)
75 #define __NR_userfaultfd 374
76 #elif defined(__aarch64__)
77 #define __NR_userfaultfd 282
78 #elif defined(__arm__)
79 #define __NR_userfaultfd 388
80 #else
81 #error "__NR_userfaultfd undefined"
82 #endif
83 #endif  // __NR_userfaultfd
84 #endif  // __BIONIC__
85 
86 namespace {
87 
88 using ::android::base::GetBoolProperty;
89 using ::android::base::ParseBool;
90 using ::android::base::ParseBoolResult;
91 
92 }  // namespace
93 
94 namespace art {
95 
HaveMremapDontunmap()96 static bool HaveMremapDontunmap() {
97   void* old = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
98   CHECK_NE(old, MAP_FAILED);
99   void* addr = mremap(old, kPageSize, kPageSize, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, nullptr);
100   CHECK_EQ(munmap(old, kPageSize), 0);
101   if (addr != MAP_FAILED) {
102     CHECK_EQ(munmap(addr, kPageSize), 0);
103     return true;
104   } else {
105     return false;
106   }
107 }
108 // We require MREMAP_DONTUNMAP functionality of the mremap syscall, which was
109 // introduced in 5.13 kernel version. But it was backported to GKI kernels.
110 static bool gHaveMremapDontunmap = IsKernelVersionAtLeast(5, 13) || HaveMremapDontunmap();
111 // Bitmap of features supported by userfaultfd. This is obtained via uffd API ioctl.
112 static uint64_t gUffdFeatures = 0;
113 // Both, missing and minor faults on shmem are needed only for minor-fault mode.
114 static constexpr uint64_t kUffdFeaturesForMinorFault =
115     UFFD_FEATURE_MISSING_SHMEM | UFFD_FEATURE_MINOR_SHMEM;
116 static constexpr uint64_t kUffdFeaturesForSigbus = UFFD_FEATURE_SIGBUS;
117 // We consider SIGBUS feature necessary to enable this GC as it's superior than
118 // threading-based implementation for janks. However, since we have the latter
119 // already implemented, for testing purposes, we allow choosing either of the
120 // two at boot time in the constructor below.
121 // Note that having minor-fault feature implies having SIGBUS feature as the
122 // latter was introduced earlier than the former. In other words, having
123 // minor-fault feature implies having SIGBUS. We still want minor-fault to be
124 // available for making jit-code-cache updation concurrent, which uses shmem.
125 static constexpr uint64_t kUffdFeaturesRequired =
126     kUffdFeaturesForMinorFault | kUffdFeaturesForSigbus;
127 
KernelSupportsUffd()128 bool KernelSupportsUffd() {
129 #ifdef __linux__
130   if (gHaveMremapDontunmap) {
131     int fd = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
132     // On non-android devices we may not have the kernel patches that restrict
133     // userfaultfd to user mode. But that is not a security concern as we are
134     // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
135     if (!kIsTargetAndroid && fd == -1 && errno == EINVAL) {
136       fd = syscall(__NR_userfaultfd, O_CLOEXEC);
137     }
138     if (fd >= 0) {
139       // We are only fetching the available features, which is returned by the
140       // ioctl.
141       struct uffdio_api api = {.api = UFFD_API, .features = 0, .ioctls = 0};
142       CHECK_EQ(ioctl(fd, UFFDIO_API, &api), 0) << "ioctl_userfaultfd : API:" << strerror(errno);
143       gUffdFeatures = api.features;
144       close(fd);
145       // Allow this GC to be used only if minor-fault and sigbus feature is available.
146       return (api.features & kUffdFeaturesRequired) == kUffdFeaturesRequired;
147     }
148   }
149 #endif
150   return false;
151 }
152 
153 // The other cases are defined as constexpr in runtime/read_barrier_config.h
154 #if !defined(ART_FORCE_USE_READ_BARRIER) && defined(ART_USE_READ_BARRIER)
155 // Returns collector type asked to be used on the cmdline.
FetchCmdlineGcType()156 static gc::CollectorType FetchCmdlineGcType() {
157   std::string argv;
158   gc::CollectorType gc_type = gc::CollectorType::kCollectorTypeNone;
159   if (android::base::ReadFileToString("/proc/self/cmdline", &argv)) {
160     if (argv.find("-Xgc:CMC") != std::string::npos) {
161       gc_type = gc::CollectorType::kCollectorTypeCMC;
162     } else if (argv.find("-Xgc:CC") != std::string::npos) {
163       gc_type = gc::CollectorType::kCollectorTypeCC;
164     }
165   }
166   return gc_type;
167 }
168 
169 #ifdef ART_TARGET_ANDROID
GetOverrideCacheInfoFd()170 static int GetOverrideCacheInfoFd() {
171   std::string args_str;
172   if (!android::base::ReadFileToString("/proc/self/cmdline", &args_str)) {
173     LOG(WARNING) << "Failed to load /proc/self/cmdline";
174     return -1;
175   }
176   std::vector<std::string_view> args;
177   Split(std::string_view(args_str), /*separator=*/'\0', &args);
178   for (std::string_view arg : args) {
179     if (android::base::ConsumePrefix(&arg, "--cache-info-fd=")) {  // This is a dex2oat flag.
180       int fd;
181       if (!android::base::ParseInt(std::string(arg), &fd)) {
182         LOG(ERROR) << "Failed to parse --cache-info-fd (value: '" << arg << "')";
183         return -1;
184       }
185       return fd;
186     }
187   }
188   return -1;
189 }
190 
GetCachedBoolProperty(const std::string & key,bool default_value)191 static bool GetCachedBoolProperty(const std::string& key, bool default_value) {
192   // For simplicity, we don't handle multiple calls because otherwise we would have to reset the fd.
193   static bool called = false;
194   CHECK(!called) << "GetCachedBoolProperty can be called only once";
195   called = true;
196 
197   std::string cache_info_contents;
198   int fd = GetOverrideCacheInfoFd();
199   if (fd >= 0) {
200     if (!android::base::ReadFdToString(fd, &cache_info_contents)) {
201       PLOG(ERROR) << "Failed to read cache-info from fd " << fd;
202       return default_value;
203     }
204   } else {
205     std::string path = GetApexDataDalvikCacheDirectory(InstructionSet::kNone) + "/cache-info.xml";
206     if (!android::base::ReadFileToString(path, &cache_info_contents)) {
207       // If the file is not found, then we are in chroot or in a standalone runtime process (e.g.,
208       // IncidentHelper), or odsign/odrefresh failed to generate and sign the cache info. There's
209       // nothing we can do.
210       if (errno != ENOENT) {
211         PLOG(ERROR) << "Failed to read cache-info from the default path";
212       }
213       return default_value;
214     }
215   }
216 
217   std::optional<com::android::art::CacheInfo> cache_info =
218       com::android::art::parse(cache_info_contents.c_str());
219   if (!cache_info.has_value()) {
220     // This should never happen.
221     LOG(ERROR) << "Failed to parse cache-info";
222     return default_value;
223   }
224   const com::android::art::KeyValuePairList* list = cache_info->getFirstSystemProperties();
225   if (list == nullptr) {
226     // This should never happen.
227     LOG(ERROR) << "Missing system properties from cache-info";
228     return default_value;
229   }
230   const std::vector<com::android::art::KeyValuePair>& properties = list->getItem();
231   for (const com::android::art::KeyValuePair& pair : properties) {
232     if (pair.getK() == key) {
233       ParseBoolResult result = ParseBool(pair.getV());
234       switch (result) {
235         case ParseBoolResult::kTrue:
236           return true;
237         case ParseBoolResult::kFalse:
238           return false;
239         case ParseBoolResult::kError:
240           return default_value;
241       }
242     }
243   }
244   return default_value;
245 }
246 
SysPropSaysUffdGc()247 static bool SysPropSaysUffdGc() {
248   // The phenotype flag can change at time time after boot, but it shouldn't take effect until a
249   // reboot. Therefore, we read the phenotype flag from the cache info, which is generated on boot.
250   return GetCachedBoolProperty("persist.device_config.runtime_native_boot.enable_uffd_gc",
251                                GetBoolProperty("ro.dalvik.vm.enable_uffd_gc", false));
252 }
253 #else
254 // Never called.
SysPropSaysUffdGc()255 static bool SysPropSaysUffdGc() { return false; }
256 #endif
257 
ShouldUseUserfaultfd()258 static bool ShouldUseUserfaultfd() {
259   static_assert(kUseBakerReadBarrier || kUseTableLookupReadBarrier);
260 #ifdef __linux__
261   // Use CMC/CC if that is being explicitly asked for on cmdline. Otherwise,
262   // always use CC on host. On target, use CMC only if system properties says so
263   // and the kernel supports it.
264   gc::CollectorType gc_type = FetchCmdlineGcType();
265   return gc_type == gc::CollectorType::kCollectorTypeCMC ||
266          (gc_type == gc::CollectorType::kCollectorTypeNone &&
267           kIsTargetAndroid &&
268           SysPropSaysUffdGc() &&
269           KernelSupportsUffd());
270 #else
271   return false;
272 #endif
273 }
274 
275 const bool gUseUserfaultfd = ShouldUseUserfaultfd();
276 const bool gUseReadBarrier = !gUseUserfaultfd;
277 #endif
278 
279 namespace gc {
280 namespace collector {
281 
282 // Turn off kCheckLocks when profiling the GC as it slows down the GC
283 // significantly.
284 static constexpr bool kCheckLocks = kDebugLocking;
285 static constexpr bool kVerifyRootsMarked = kIsDebugBuild;
286 // Two threads should suffice on devices.
287 static constexpr size_t kMaxNumUffdWorkers = 2;
288 // Number of compaction buffers reserved for mutator threads in SIGBUS feature
289 // case. It's extremely unlikely that we will ever have more than these number
290 // of mutator threads trying to access the moving-space during one compaction
291 // phase. Using a lower number in debug builds to hopefully catch the issue
292 // before it becomes a problem on user builds.
293 static constexpr size_t kMutatorCompactionBufferCount = kIsDebugBuild ? 256 : 512;
294 // Minimum from-space chunk to be madvised (during concurrent compaction) in one go.
295 static constexpr ssize_t kMinFromSpaceMadviseSize = 1 * MB;
296 // Concurrent compaction termination logic is different (and slightly more efficient) if the
297 // kernel has the fault-retry feature (allowing repeated faults on the same page), which was
298 // introduced in 5.7 (https://android-review.git.corp.google.com/c/kernel/common/+/1540088).
299 // This allows a single page fault to be handled, in turn, by each worker thread, only waking
300 // up the GC thread at the end.
301 static const bool gKernelHasFaultRetry = IsKernelVersionAtLeast(5, 7);
302 
GetUffdAndMinorFault()303 std::pair<bool, bool> MarkCompact::GetUffdAndMinorFault() {
304   bool uffd_available;
305   // In most cases the gUffdFeatures will already be initialized at boot time
306   // when libart is loaded. On very old kernels we may get '0' from the kernel,
307   // in which case we would be doing the syscalls each time this function is
308   // called. But that's very unlikely case. There are no correctness issues as
309   // the response from kernel never changes after boot.
310   if (UNLIKELY(gUffdFeatures == 0)) {
311     uffd_available = KernelSupportsUffd();
312   } else {
313     // We can have any uffd features only if uffd exists.
314     uffd_available = true;
315   }
316   bool minor_fault_available =
317       (gUffdFeatures & kUffdFeaturesForMinorFault) == kUffdFeaturesForMinorFault;
318   return std::pair<bool, bool>(uffd_available, minor_fault_available);
319 }
320 
CreateUserfaultfd(bool post_fork)321 bool MarkCompact::CreateUserfaultfd(bool post_fork) {
322   if (post_fork || uffd_ == kFdUnused) {
323     // Check if we have MREMAP_DONTUNMAP here for cases where
324     // 'ART_USE_READ_BARRIER=false' is used. Additionally, this check ensures
325     // that userfaultfd isn't used on old kernels, which cause random ioctl
326     // failures.
327     if (gHaveMremapDontunmap) {
328       // Don't use O_NONBLOCK as we rely on read waiting on uffd_ if there isn't
329       // any read event available. We don't use poll.
330       uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
331       // On non-android devices we may not have the kernel patches that restrict
332       // userfaultfd to user mode. But that is not a security concern as we are
333       // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
334       if (!kIsTargetAndroid && UNLIKELY(uffd_ == -1 && errno == EINVAL)) {
335         uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC);
336       }
337       if (UNLIKELY(uffd_ == -1)) {
338         uffd_ = kFallbackMode;
339         LOG(WARNING) << "Userfaultfd isn't supported (reason: " << strerror(errno)
340                      << ") and therefore falling back to stop-the-world compaction.";
341       } else {
342         DCHECK(IsValidFd(uffd_));
343         // Initialize uffd with the features which are required and available.
344         struct uffdio_api api = {.api = UFFD_API, .features = gUffdFeatures, .ioctls = 0};
345         api.features &= use_uffd_sigbus_ ? kUffdFeaturesRequired : kUffdFeaturesForMinorFault;
346         CHECK_EQ(ioctl(uffd_, UFFDIO_API, &api), 0)
347             << "ioctl_userfaultfd: API: " << strerror(errno);
348       }
349     } else {
350       uffd_ = kFallbackMode;
351     }
352   }
353   uffd_initialized_ = !post_fork || uffd_ == kFallbackMode;
354   return IsValidFd(uffd_);
355 }
356 
357 template <size_t kAlignment>
Create(uintptr_t begin,uintptr_t end)358 MarkCompact::LiveWordsBitmap<kAlignment>* MarkCompact::LiveWordsBitmap<kAlignment>::Create(
359     uintptr_t begin, uintptr_t end) {
360   return static_cast<LiveWordsBitmap<kAlignment>*>(
361           MemRangeBitmap::Create("Concurrent Mark Compact live words bitmap", begin, end));
362 }
363 
IsSigbusFeatureAvailable()364 static bool IsSigbusFeatureAvailable() {
365   MarkCompact::GetUffdAndMinorFault();
366   return gUffdFeatures & UFFD_FEATURE_SIGBUS;
367 }
368 
MarkCompact(Heap * heap)369 MarkCompact::MarkCompact(Heap* heap)
370     : GarbageCollector(heap, "concurrent mark compact"),
371       gc_barrier_(0),
372       lock_("mark compact lock", kGenericBottomLock),
373       bump_pointer_space_(heap->GetBumpPointerSpace()),
374       moving_space_bitmap_(bump_pointer_space_->GetMarkBitmap()),
375       moving_to_space_fd_(kFdUnused),
376       moving_from_space_fd_(kFdUnused),
377       uffd_(kFdUnused),
378       sigbus_in_progress_count_(kSigbusCounterCompactionDoneMask),
379       compaction_in_progress_count_(0),
380       thread_pool_counter_(0),
381       compacting_(false),
382       uffd_initialized_(false),
383       uffd_minor_fault_supported_(false),
384       use_uffd_sigbus_(IsSigbusFeatureAvailable()),
385       minor_fault_initialized_(false),
386       map_linear_alloc_shared_(false) {
387   if (kIsDebugBuild) {
388     updated_roots_.reset(new std::unordered_set<void*>());
389   }
390   // TODO: When using minor-fault feature, the first GC after zygote-fork
391   // requires mapping the linear-alloc again with MAP_SHARED. This leaves a
392   // gap for suspended threads to access linear-alloc when it's empty (after
393   // mremap) and not yet userfaultfd registered. This cannot be fixed by merely
394   // doing uffd registration first. For now, just assert that we are not using
395   // minor-fault. Eventually, a cleanup of linear-alloc update logic to only
396   // use private anonymous would be ideal.
397   CHECK(!uffd_minor_fault_supported_);
398 
399   // TODO: Depending on how the bump-pointer space move is implemented. If we
400   // switch between two virtual memories each time, then we will have to
401   // initialize live_words_bitmap_ accordingly.
402   live_words_bitmap_.reset(LiveWordsBitmap<kAlignment>::Create(
403           reinterpret_cast<uintptr_t>(bump_pointer_space_->Begin()),
404           reinterpret_cast<uintptr_t>(bump_pointer_space_->Limit())));
405 
406   // Create one MemMap for all the data structures
407   size_t moving_space_size = bump_pointer_space_->Capacity();
408   size_t chunk_info_vec_size = moving_space_size / kOffsetChunkSize;
409   size_t nr_moving_pages = moving_space_size / kPageSize;
410   size_t nr_non_moving_pages = heap->GetNonMovingSpace()->Capacity() / kPageSize;
411 
412   std::string err_msg;
413   info_map_ = MemMap::MapAnonymous("Concurrent mark-compact chunk-info vector",
414                                    chunk_info_vec_size * sizeof(uint32_t)
415                                    + nr_non_moving_pages * sizeof(ObjReference)
416                                    + nr_moving_pages * sizeof(ObjReference)
417                                    + nr_moving_pages * sizeof(uint32_t),
418                                    PROT_READ | PROT_WRITE,
419                                    /*low_4gb=*/ false,
420                                    &err_msg);
421   if (UNLIKELY(!info_map_.IsValid())) {
422     LOG(FATAL) << "Failed to allocate concurrent mark-compact chunk-info vector: " << err_msg;
423   } else {
424     uint8_t* p = info_map_.Begin();
425     chunk_info_vec_ = reinterpret_cast<uint32_t*>(p);
426     vector_length_ = chunk_info_vec_size;
427 
428     p += chunk_info_vec_size * sizeof(uint32_t);
429     first_objs_non_moving_space_ = reinterpret_cast<ObjReference*>(p);
430 
431     p += nr_non_moving_pages * sizeof(ObjReference);
432     first_objs_moving_space_ = reinterpret_cast<ObjReference*>(p);
433 
434     p += nr_moving_pages * sizeof(ObjReference);
435     pre_compact_offset_moving_space_ = reinterpret_cast<uint32_t*>(p);
436   }
437 
438   size_t moving_space_alignment = BestPageTableAlignment(moving_space_size);
439   // The moving space is created at a fixed address, which is expected to be
440   // PMD-size aligned.
441   if (!IsAlignedParam(bump_pointer_space_->Begin(), moving_space_alignment)) {
442     LOG(WARNING) << "Bump pointer space is not aligned to " << PrettySize(moving_space_alignment)
443                  << ". This can lead to longer stop-the-world pauses for compaction";
444   }
445   // NOTE: PROT_NONE is used here as these mappings are for address space reservation
446   // only and will be used only after appropriately remapping them.
447   from_space_map_ = MemMap::MapAnonymousAligned("Concurrent mark-compact from-space",
448                                                 moving_space_size,
449                                                 PROT_NONE,
450                                                 /*low_4gb=*/kObjPtrPoisoning,
451                                                 moving_space_alignment,
452                                                 &err_msg);
453   if (UNLIKELY(!from_space_map_.IsValid())) {
454     LOG(FATAL) << "Failed to allocate concurrent mark-compact from-space" << err_msg;
455   } else {
456     from_space_begin_ = from_space_map_.Begin();
457   }
458 
459   // In some cases (32-bit or kObjPtrPoisoning) it's too much to ask for 3
460   // heap-sized mappings in low-4GB. So tolerate failure here by attempting to
461   // mmap again right before the compaction pause. And if even that fails, then
462   // running the GC cycle in copy-mode rather than minor-fault.
463   //
464   // This map doesn't have to be aligned to 2MB as we don't mremap on it.
465   if (!kObjPtrPoisoning && uffd_minor_fault_supported_) {
466     // We need this map only if minor-fault feature is supported. But in that case
467     // don't create the mapping if obj-ptr poisoning is enabled as then the mapping
468     // has to be created in low_4gb. Doing this here rather than later causes the
469     // Dex2oatImageTest.TestExtension gtest to fail in 64-bit platforms.
470     shadow_to_space_map_ = MemMap::MapAnonymous("Concurrent mark-compact moving-space shadow",
471                                                 moving_space_size,
472                                                 PROT_NONE,
473                                                 /*low_4gb=*/false,
474                                                 &err_msg);
475     if (!shadow_to_space_map_.IsValid()) {
476       LOG(WARNING) << "Failed to allocate concurrent mark-compact moving-space shadow: " << err_msg;
477     }
478   }
479   const size_t num_pages =
480       1 + (use_uffd_sigbus_ ? kMutatorCompactionBufferCount :
481                               std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers));
482   compaction_buffers_map_ = MemMap::MapAnonymous("Concurrent mark-compact compaction buffers",
483                                                  kPageSize * num_pages,
484                                                  PROT_READ | PROT_WRITE,
485                                                  /*low_4gb=*/kObjPtrPoisoning,
486                                                  &err_msg);
487   if (UNLIKELY(!compaction_buffers_map_.IsValid())) {
488     LOG(FATAL) << "Failed to allocate concurrent mark-compact compaction buffers" << err_msg;
489   }
490   // We also use the first page-sized buffer for the purpose of terminating concurrent compaction.
491   conc_compaction_termination_page_ = compaction_buffers_map_.Begin();
492   // Touch the page deliberately to avoid userfaults on it. We madvise it in
493   // CompactionPhase() before using it to terminate concurrent compaction.
494   ForceRead(conc_compaction_termination_page_);
495 
496   // In most of the cases, we don't expect more than one LinearAlloc space.
497   linear_alloc_spaces_data_.reserve(1);
498 
499   // Initialize GC metrics.
500   metrics::ArtMetrics* metrics = GetMetrics();
501   // The mark-compact collector supports only full-heap collections at the moment.
502   gc_time_histogram_ = metrics->FullGcCollectionTime();
503   metrics_gc_count_ = metrics->FullGcCount();
504   metrics_gc_count_delta_ = metrics->FullGcCountDelta();
505   gc_throughput_histogram_ = metrics->FullGcThroughput();
506   gc_tracing_throughput_hist_ = metrics->FullGcTracingThroughput();
507   gc_throughput_avg_ = metrics->FullGcThroughputAvg();
508   gc_tracing_throughput_avg_ = metrics->FullGcTracingThroughputAvg();
509   gc_scanned_bytes_ = metrics->FullGcScannedBytes();
510   gc_scanned_bytes_delta_ = metrics->FullGcScannedBytesDelta();
511   gc_freed_bytes_ = metrics->FullGcFreedBytes();
512   gc_freed_bytes_delta_ = metrics->FullGcFreedBytesDelta();
513   gc_duration_ = metrics->FullGcDuration();
514   gc_duration_delta_ = metrics->FullGcDurationDelta();
515   are_metrics_initialized_ = true;
516 }
517 
AddLinearAllocSpaceData(uint8_t * begin,size_t len)518 void MarkCompact::AddLinearAllocSpaceData(uint8_t* begin, size_t len) {
519   DCHECK_ALIGNED(begin, kPageSize);
520   DCHECK_ALIGNED(len, kPageSize);
521   DCHECK_GE(len, kPMDSize);
522   size_t alignment = BestPageTableAlignment(len);
523   bool is_shared = false;
524   // We use MAP_SHARED on non-zygote processes for leveraging userfaultfd's minor-fault feature.
525   if (map_linear_alloc_shared_) {
526     void* ret = mmap(begin,
527                      len,
528                      PROT_READ | PROT_WRITE,
529                      MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
530                      /*fd=*/-1,
531                      /*offset=*/0);
532     CHECK_EQ(ret, begin) << "mmap failed: " << strerror(errno);
533     is_shared = true;
534   }
535   std::string err_msg;
536   MemMap shadow(MemMap::MapAnonymousAligned("linear-alloc shadow map",
537                                             len,
538                                             PROT_NONE,
539                                             /*low_4gb=*/false,
540                                             alignment,
541                                             &err_msg));
542   if (!shadow.IsValid()) {
543     LOG(FATAL) << "Failed to allocate linear-alloc shadow map: " << err_msg;
544     UNREACHABLE();
545   }
546 
547   MemMap page_status_map(MemMap::MapAnonymous("linear-alloc page-status map",
548                                               len / kPageSize,
549                                               PROT_READ | PROT_WRITE,
550                                               /*low_4gb=*/false,
551                                               &err_msg));
552   if (!page_status_map.IsValid()) {
553     LOG(FATAL) << "Failed to allocate linear-alloc page-status shadow map: " << err_msg;
554     UNREACHABLE();
555   }
556   linear_alloc_spaces_data_.emplace_back(std::forward<MemMap>(shadow),
557                                          std::forward<MemMap>(page_status_map),
558                                          begin,
559                                          begin + len,
560                                          is_shared);
561 }
562 
BindAndResetBitmaps()563 void MarkCompact::BindAndResetBitmaps() {
564   // TODO: We need to hold heap_bitmap_lock_ only for populating immune_spaces.
565   // The card-table and mod-union-table processing can be done without it. So
566   // change the logic below. Note that the bitmap clearing would require the
567   // lock.
568   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
569   accounting::CardTable* const card_table = heap_->GetCardTable();
570   // Mark all of the spaces we never collect as immune.
571   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
572     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
573         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
574       CHECK(space->IsZygoteSpace() || space->IsImageSpace());
575       immune_spaces_.AddSpace(space);
576       accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
577       if (table != nullptr) {
578         table->ProcessCards();
579       } else {
580         // Keep cards aged if we don't have a mod-union table since we may need
581         // to scan them in future GCs. This case is for app images.
582         // TODO: We could probably scan the objects right here to avoid doing
583         // another scan through the card-table.
584         card_table->ModifyCardsAtomic(
585             space->Begin(),
586             space->End(),
587             [](uint8_t card) {
588               return (card == gc::accounting::CardTable::kCardClean)
589                   ? card
590                   : gc::accounting::CardTable::kCardAged;
591             },
592             /* card modified visitor */ VoidFunctor());
593       }
594     } else {
595       CHECK(!space->IsZygoteSpace());
596       CHECK(!space->IsImageSpace());
597       // The card-table corresponding to bump-pointer and non-moving space can
598       // be cleared, because we are going to traverse all the reachable objects
599       // in these spaces. This card-table will eventually be used to track
600       // mutations while concurrent marking is going on.
601       card_table->ClearCardRange(space->Begin(), space->Limit());
602       if (space != bump_pointer_space_) {
603         CHECK_EQ(space, heap_->GetNonMovingSpace());
604         non_moving_space_ = space;
605         non_moving_space_bitmap_ = space->GetMarkBitmap();
606       }
607     }
608   }
609 }
610 
MarkZygoteLargeObjects()611 void MarkCompact::MarkZygoteLargeObjects() {
612   Thread* self = thread_running_gc_;
613   DCHECK_EQ(self, Thread::Current());
614   space::LargeObjectSpace* const los = heap_->GetLargeObjectsSpace();
615   if (los != nullptr) {
616     // Pick the current live bitmap (mark bitmap if swapped).
617     accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap();
618     accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap();
619     // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept.
620     std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic();
621     live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first),
622                                   reinterpret_cast<uintptr_t>(range.second),
623                                   [mark_bitmap, los, self](mirror::Object* obj)
624                                       REQUIRES(Locks::heap_bitmap_lock_)
625                                           REQUIRES_SHARED(Locks::mutator_lock_) {
626                                             if (los->IsZygoteLargeObject(self, obj)) {
627                                               mark_bitmap->Set(obj);
628                                             }
629                                           });
630   }
631 }
632 
InitializePhase()633 void MarkCompact::InitializePhase() {
634   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
635   mark_stack_ = heap_->GetMarkStack();
636   CHECK(mark_stack_->IsEmpty());
637   immune_spaces_.Reset();
638   moving_first_objs_count_ = 0;
639   non_moving_first_objs_count_ = 0;
640   black_page_count_ = 0;
641   bytes_scanned_ = 0;
642   freed_objects_ = 0;
643   // The first buffer is used by gc-thread.
644   compaction_buffer_counter_.store(1, std::memory_order_relaxed);
645   from_space_slide_diff_ = from_space_begin_ - bump_pointer_space_->Begin();
646   black_allocations_begin_ = bump_pointer_space_->Limit();
647   walk_super_class_cache_ = nullptr;
648   // TODO: Would it suffice to read it once in the constructor, which is called
649   // in zygote process?
650   pointer_size_ = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
651 }
652 
653 class MarkCompact::ThreadFlipVisitor : public Closure {
654  public:
ThreadFlipVisitor(MarkCompact * collector)655   explicit ThreadFlipVisitor(MarkCompact* collector) : collector_(collector) {}
656 
Run(Thread * thread)657   void Run(Thread* thread) override REQUIRES_SHARED(Locks::mutator_lock_) {
658     // Note: self is not necessarily equal to thread since thread may be suspended.
659     Thread* self = Thread::Current();
660     CHECK(thread == self || thread->GetState() != ThreadState::kRunnable)
661         << thread->GetState() << " thread " << thread << " self " << self;
662     thread->VisitRoots(collector_, kVisitRootFlagAllRoots);
663     // Interpreter cache is thread-local so it needs to be swept either in a
664     // flip, or a stop-the-world pause.
665     CHECK(collector_->compacting_);
666     thread->SweepInterpreterCache(collector_);
667     thread->AdjustTlab(collector_->black_objs_slide_diff_);
668     collector_->GetBarrier().Pass(self);
669   }
670 
671  private:
672   MarkCompact* const collector_;
673 };
674 
675 class MarkCompact::FlipCallback : public Closure {
676  public:
FlipCallback(MarkCompact * collector)677   explicit FlipCallback(MarkCompact* collector) : collector_(collector) {}
678 
Run(Thread * thread ATTRIBUTE_UNUSED)679   void Run(Thread* thread ATTRIBUTE_UNUSED) override REQUIRES(Locks::mutator_lock_) {
680     collector_->CompactionPause();
681   }
682 
683  private:
684   MarkCompact* const collector_;
685 };
686 
RunPhases()687 void MarkCompact::RunPhases() {
688   Thread* self = Thread::Current();
689   thread_running_gc_ = self;
690   Runtime* runtime = Runtime::Current();
691   InitializePhase();
692   GetHeap()->PreGcVerification(this);
693   {
694     ReaderMutexLock mu(self, *Locks::mutator_lock_);
695     MarkingPhase();
696   }
697   {
698     // Marking pause
699     ScopedPause pause(this);
700     MarkingPause();
701     if (kIsDebugBuild) {
702       bump_pointer_space_->AssertAllThreadLocalBuffersAreRevoked();
703     }
704   }
705   // To increase likelihood of black allocations. For testing purposes only.
706   if (kIsDebugBuild && heap_->GetTaskProcessor()->GetRunningThread() == thread_running_gc_) {
707     usleep(500'000);
708   }
709   {
710     ReaderMutexLock mu(self, *Locks::mutator_lock_);
711     ReclaimPhase();
712     PrepareForCompaction();
713   }
714   if (uffd_ != kFallbackMode && !use_uffd_sigbus_) {
715     heap_->GetThreadPool()->WaitForWorkersToBeCreated();
716   }
717 
718   {
719     // Compaction pause
720     gc_barrier_.Init(self, 0);
721     ThreadFlipVisitor visitor(this);
722     FlipCallback callback(this);
723     size_t barrier_count = runtime->GetThreadList()->FlipThreadRoots(
724         &visitor, &callback, this, GetHeap()->GetGcPauseListener());
725     {
726       ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun);
727       gc_barrier_.Increment(self, barrier_count);
728     }
729   }
730 
731   if (IsValidFd(uffd_)) {
732     ReaderMutexLock mu(self, *Locks::mutator_lock_);
733     CompactionPhase();
734   }
735 
736   FinishPhase();
737   thread_running_gc_ = nullptr;
738   GetHeap()->PostGcVerification(this);
739 }
740 
InitMovingSpaceFirstObjects(const size_t vec_len)741 void MarkCompact::InitMovingSpaceFirstObjects(const size_t vec_len) {
742   // Find the first live word first.
743   size_t to_space_page_idx = 0;
744   uint32_t offset_in_chunk_word;
745   uint32_t offset;
746   mirror::Object* obj;
747   const uintptr_t heap_begin = moving_space_bitmap_->HeapBegin();
748 
749   size_t chunk_idx;
750   // Find the first live word in the space
751   for (chunk_idx = 0; chunk_info_vec_[chunk_idx] == 0; chunk_idx++) {
752     if (chunk_idx > vec_len) {
753       // We don't have any live data on the moving-space.
754       return;
755     }
756   }
757   // Use live-words bitmap to find the first word
758   offset_in_chunk_word = live_words_bitmap_->FindNthLiveWordOffset(chunk_idx, /*n*/ 0);
759   offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
760   DCHECK(live_words_bitmap_->Test(offset)) << "offset=" << offset
761                                            << " chunk_idx=" << chunk_idx
762                                            << " N=0"
763                                            << " offset_in_word=" << offset_in_chunk_word
764                                            << " word=" << std::hex
765                                            << live_words_bitmap_->GetWord(chunk_idx);
766   // The first object doesn't require using FindPrecedingObject().
767   obj = reinterpret_cast<mirror::Object*>(heap_begin + offset * kAlignment);
768   // TODO: add a check to validate the object.
769 
770   pre_compact_offset_moving_space_[to_space_page_idx] = offset;
771   first_objs_moving_space_[to_space_page_idx].Assign(obj);
772   to_space_page_idx++;
773 
774   uint32_t page_live_bytes = 0;
775   while (true) {
776     for (; page_live_bytes <= kPageSize; chunk_idx++) {
777       if (chunk_idx > vec_len) {
778         moving_first_objs_count_ = to_space_page_idx;
779         return;
780       }
781       page_live_bytes += chunk_info_vec_[chunk_idx];
782     }
783     chunk_idx--;
784     page_live_bytes -= kPageSize;
785     DCHECK_LE(page_live_bytes, kOffsetChunkSize);
786     DCHECK_LE(page_live_bytes, chunk_info_vec_[chunk_idx])
787         << " chunk_idx=" << chunk_idx
788         << " to_space_page_idx=" << to_space_page_idx
789         << " vec_len=" << vec_len;
790     DCHECK(IsAligned<kAlignment>(chunk_info_vec_[chunk_idx] - page_live_bytes));
791     offset_in_chunk_word =
792             live_words_bitmap_->FindNthLiveWordOffset(
793                 chunk_idx, (chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment);
794     offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
795     DCHECK(live_words_bitmap_->Test(offset))
796         << "offset=" << offset
797         << " chunk_idx=" << chunk_idx
798         << " N=" << ((chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment)
799         << " offset_in_word=" << offset_in_chunk_word
800         << " word=" << std::hex << live_words_bitmap_->GetWord(chunk_idx);
801     // TODO: Can we optimize this for large objects? If we are continuing a
802     // large object that spans multiple pages, then we may be able to do without
803     // calling FindPrecedingObject().
804     //
805     // Find the object which encapsulates offset in it, which could be
806     // starting at offset itself.
807     obj = moving_space_bitmap_->FindPrecedingObject(heap_begin + offset * kAlignment);
808     // TODO: add a check to validate the object.
809     pre_compact_offset_moving_space_[to_space_page_idx] = offset;
810     first_objs_moving_space_[to_space_page_idx].Assign(obj);
811     to_space_page_idx++;
812     chunk_idx++;
813   }
814 }
815 
InitNonMovingSpaceFirstObjects()816 void MarkCompact::InitNonMovingSpaceFirstObjects() {
817   accounting::ContinuousSpaceBitmap* bitmap = non_moving_space_->GetLiveBitmap();
818   uintptr_t begin = reinterpret_cast<uintptr_t>(non_moving_space_->Begin());
819   const uintptr_t end = reinterpret_cast<uintptr_t>(non_moving_space_->End());
820   mirror::Object* prev_obj;
821   size_t page_idx;
822   {
823     // Find first live object
824     mirror::Object* obj = nullptr;
825     bitmap->VisitMarkedRange</*kVisitOnce*/ true>(begin,
826                                                   end,
827                                                   [&obj] (mirror::Object* o) {
828                                                     obj = o;
829                                                   });
830     if (obj == nullptr) {
831       // There are no live objects in the non-moving space
832       return;
833     }
834     page_idx = (reinterpret_cast<uintptr_t>(obj) - begin) / kPageSize;
835     first_objs_non_moving_space_[page_idx++].Assign(obj);
836     prev_obj = obj;
837   }
838   // TODO: check obj is valid
839   uintptr_t prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
840                            + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
841   // For every page find the object starting from which we need to call
842   // VisitReferences. It could either be an object that started on some
843   // preceding page, or some object starting within this page.
844   begin = RoundDown(reinterpret_cast<uintptr_t>(prev_obj) + kPageSize, kPageSize);
845   while (begin < end) {
846     // Utilize, if any, large object that started in some preceding page, but
847     // overlaps with this page as well.
848     if (prev_obj != nullptr && prev_obj_end > begin) {
849       DCHECK_LT(prev_obj, reinterpret_cast<mirror::Object*>(begin));
850       first_objs_non_moving_space_[page_idx].Assign(prev_obj);
851       mirror::Class* klass = prev_obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
852       if (bump_pointer_space_->HasAddress(klass)) {
853         LOG(WARNING) << "found inter-page object " << prev_obj
854                      << " in non-moving space with klass " << klass
855                      << " in moving space";
856       }
857     } else {
858       prev_obj_end = 0;
859       // It's sufficient to only search for previous object in the preceding page.
860       // If no live object started in that page and some object had started in
861       // the page preceding to that page, which was big enough to overlap with
862       // the current page, then we wouldn't be in the else part.
863       prev_obj = bitmap->FindPrecedingObject(begin, begin - kPageSize);
864       if (prev_obj != nullptr) {
865         prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
866                         + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
867       }
868       if (prev_obj_end > begin) {
869         mirror::Class* klass = prev_obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
870         if (bump_pointer_space_->HasAddress(klass)) {
871           LOG(WARNING) << "found inter-page object " << prev_obj
872                        << " in non-moving space with klass " << klass
873                        << " in moving space";
874         }
875         first_objs_non_moving_space_[page_idx].Assign(prev_obj);
876       } else {
877         // Find the first live object in this page
878         bitmap->VisitMarkedRange</*kVisitOnce*/ true>(
879                 begin,
880                 begin + kPageSize,
881                 [this, page_idx] (mirror::Object* obj) {
882                   first_objs_non_moving_space_[page_idx].Assign(obj);
883                 });
884       }
885       // An empty entry indicates that the page has no live objects and hence
886       // can be skipped.
887     }
888     begin += kPageSize;
889     page_idx++;
890   }
891   non_moving_first_objs_count_ = page_idx;
892 }
893 
CanCompactMovingSpaceWithMinorFault()894 bool MarkCompact::CanCompactMovingSpaceWithMinorFault() {
895   size_t min_size = (moving_first_objs_count_ + black_page_count_) * kPageSize;
896   return minor_fault_initialized_ && shadow_to_space_map_.IsValid() &&
897          shadow_to_space_map_.Size() >= min_size;
898 }
899 
900 class MarkCompact::ConcurrentCompactionGcTask : public SelfDeletingTask {
901  public:
ConcurrentCompactionGcTask(MarkCompact * collector,size_t idx)902   explicit ConcurrentCompactionGcTask(MarkCompact* collector, size_t idx)
903       : collector_(collector), index_(idx) {}
904 
Run(Thread * self ATTRIBUTE_UNUSED)905   void Run(Thread* self ATTRIBUTE_UNUSED) override REQUIRES_SHARED(Locks::mutator_lock_) {
906     if (collector_->CanCompactMovingSpaceWithMinorFault()) {
907       collector_->ConcurrentCompaction<MarkCompact::kMinorFaultMode>(/*buf=*/nullptr);
908     } else {
909       // The passed page/buf to ConcurrentCompaction is used by the thread as a
910       // kPageSize buffer for compacting and updating objects into and then
911       // passing the buf to uffd ioctls.
912       uint8_t* buf = collector_->compaction_buffers_map_.Begin() + index_ * kPageSize;
913       collector_->ConcurrentCompaction<MarkCompact::kCopyMode>(buf);
914     }
915   }
916 
917  private:
918   MarkCompact* const collector_;
919   size_t index_;
920 };
921 
PrepareForCompaction()922 void MarkCompact::PrepareForCompaction() {
923   uint8_t* space_begin = bump_pointer_space_->Begin();
924   size_t vector_len = (black_allocations_begin_ - space_begin) / kOffsetChunkSize;
925   DCHECK_LE(vector_len, vector_length_);
926   for (size_t i = 0; i < vector_len; i++) {
927     DCHECK_LE(chunk_info_vec_[i], kOffsetChunkSize);
928     DCHECK_EQ(chunk_info_vec_[i], live_words_bitmap_->LiveBytesInBitmapWord(i));
929   }
930   InitMovingSpaceFirstObjects(vector_len);
931   InitNonMovingSpaceFirstObjects();
932 
933   // TODO: We can do a lot of neat tricks with this offset vector to tune the
934   // compaction as we wish. Originally, the compaction algorithm slides all
935   // live objects towards the beginning of the heap. This is nice because it
936   // keeps the spatial locality of objects intact.
937   // However, sometimes it's desired to compact objects in certain portions
938   // of the heap. For instance, it is expected that, over time,
939   // objects towards the beginning of the heap are long lived and are always
940   // densely packed. In this case, it makes sense to only update references in
941   // there and not try to compact it.
942   // Furthermore, we might have some large objects and may not want to move such
943   // objects.
944   // We can adjust, without too much effort, the values in the chunk_info_vec_ such
945   // that the objects in the dense beginning area aren't moved. OTOH, large
946   // objects, which could be anywhere in the heap, could also be kept from
947   // moving by using a similar trick. The only issue is that by doing this we will
948   // leave an unused hole in the middle of the heap which can't be used for
949   // allocations until we do a *full* compaction.
950   //
951   // At this point every element in the chunk_info_vec_ contains the live-bytes
952   // of the corresponding chunk. For old-to-new address computation we need
953   // every element to reflect total live-bytes till the corresponding chunk.
954 
955   // Live-bytes count is required to compute post_compact_end_ below.
956   uint32_t total;
957   // Update the vector one past the heap usage as it is required for black
958   // allocated objects' post-compact address computation.
959   if (vector_len < vector_length_) {
960     vector_len++;
961     total = 0;
962   } else {
963     // Fetch the value stored in the last element before it gets overwritten by
964     // std::exclusive_scan().
965     total = chunk_info_vec_[vector_len - 1];
966   }
967   std::exclusive_scan(chunk_info_vec_, chunk_info_vec_ + vector_len, chunk_info_vec_, 0);
968   total += chunk_info_vec_[vector_len - 1];
969 
970   for (size_t i = vector_len; i < vector_length_; i++) {
971     DCHECK_EQ(chunk_info_vec_[i], 0u);
972   }
973   post_compact_end_ = AlignUp(space_begin + total, kPageSize);
974   CHECK_EQ(post_compact_end_, space_begin + moving_first_objs_count_ * kPageSize);
975   black_objs_slide_diff_ = black_allocations_begin_ - post_compact_end_;
976   // How do we handle compaction of heap portion used for allocations after the
977   // marking-pause?
978   // All allocations after the marking-pause are considered black (reachable)
979   // for this GC cycle. However, they need not be allocated contiguously as
980   // different mutators use TLABs. So we will compact the heap till the point
981   // where allocations took place before the marking-pause. And everything after
982   // that will be slid with TLAB holes, and then TLAB info in TLS will be
983   // appropriately updated in the pre-compaction pause.
984   // The chunk-info vector entries for the post marking-pause allocations will be
985   // also updated in the pre-compaction pause.
986 
987   bool is_zygote = Runtime::Current()->IsZygote();
988   if (!uffd_initialized_ && CreateUserfaultfd(/*post_fork*/false)) {
989     if (!use_uffd_sigbus_) {
990       // Register the buffer that we use for terminating concurrent compaction
991       struct uffdio_register uffd_register;
992       uffd_register.range.start = reinterpret_cast<uintptr_t>(conc_compaction_termination_page_);
993       uffd_register.range.len = kPageSize;
994       uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
995       CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0)
996           << "ioctl_userfaultfd: register compaction termination page: " << strerror(errno);
997     }
998     if (!uffd_minor_fault_supported_ && shadow_to_space_map_.IsValid()) {
999       // A valid shadow-map for moving space is only possible if we
1000       // were able to map it in the constructor. That also means that its size
1001       // matches the moving-space.
1002       CHECK_EQ(shadow_to_space_map_.Size(), bump_pointer_space_->Capacity());
1003       // Release the shadow map for moving-space if we don't support minor-fault
1004       // as it's not required.
1005       shadow_to_space_map_.Reset();
1006     }
1007   }
1008   // For zygote we create the thread pool each time before starting compaction,
1009   // and get rid of it when finished. This is expected to happen rarely as
1010   // zygote spends most of the time in native fork loop.
1011   if (uffd_ != kFallbackMode) {
1012     if (!use_uffd_sigbus_) {
1013       ThreadPool* pool = heap_->GetThreadPool();
1014       if (UNLIKELY(pool == nullptr)) {
1015         // On devices with 2 cores, GetParallelGCThreadCount() will return 1,
1016         // which is desired number of workers on such devices.
1017         heap_->CreateThreadPool(std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers));
1018         pool = heap_->GetThreadPool();
1019       }
1020       size_t num_threads = pool->GetThreadCount();
1021       thread_pool_counter_ = num_threads;
1022       for (size_t i = 0; i < num_threads; i++) {
1023         pool->AddTask(thread_running_gc_, new ConcurrentCompactionGcTask(this, i + 1));
1024       }
1025       CHECK_EQ(pool->GetTaskCount(thread_running_gc_), num_threads);
1026     }
1027     /*
1028      * Possible scenarios for mappings:
1029      * A) All zygote GCs (or if minor-fault feature isn't available): uses
1030      * uffd's copy mode
1031      *  1) For moving-space ('to' space is same as the moving-space):
1032      *    a) Private-anonymous mappings for 'to' and 'from' space are created in
1033      *    the constructor.
1034      *    b) In the compaction pause, we mremap(dontunmap) from 'to' space to
1035      *    'from' space. This results in moving all pages to 'from' space and
1036      *    emptying the 'to' space, thereby preparing it for userfaultfd
1037      *    registration.
1038      *
1039      *  2) For linear-alloc space:
1040      *    a) Private-anonymous mappings for the linear-alloc and its 'shadow'
1041      *    are created by the arena-pool.
1042      *    b) In the compaction pause, we mremap(dontumap) with similar effect as
1043      *    (A.1.b) above.
1044      *
1045      * B) First GC after zygote: uses uffd's copy-mode
1046      *  1) For moving-space:
1047      *    a) If the mmap for shadow-map has been successful in the constructor,
1048      *    then we remap it (mmap with MAP_FIXED) to get a shared-anonymous
1049      *    mapping.
1050      *    b) Else, we create two memfd and ftruncate them to the moving-space
1051      *    size.
1052      *    c) Same as (A.1.b)
1053      *    d) If (B.1.a), then mremap(dontunmap) from shadow-map to
1054      *    'to' space. This will make both of them map to the same pages
1055      *    e) If (B.1.b), then mmap with the first memfd in shared mode on the
1056      *    'to' space.
1057      *    f) At the end of compaction, we will have moved the moving-space
1058      *    objects to a MAP_SHARED mapping, readying it for minor-fault from next
1059      *    GC cycle.
1060      *
1061      *  2) For linear-alloc space:
1062      *    a) Same as (A.2.b)
1063      *    b) mmap a shared-anonymous mapping onto the linear-alloc space.
1064      *    c) Same as (B.1.f)
1065      *
1066      * C) All subsequent GCs: preferable minor-fault mode. But may also require
1067      * using copy-mode.
1068      *  1) For moving-space:
1069      *    a) If the shadow-map is created and no memfd was used, then that means
1070      *    we are using shared-anonymous. Therefore, mmap a shared-anonymous on
1071      *    the shadow-space.
1072      *    b) If the shadow-map is not mapped yet, then mmap one with a size
1073      *    big enough to hold the compacted moving space. This may fail, in which
1074      *    case we will use uffd's copy-mode.
1075      *    c) If (b) is successful, then mmap the free memfd onto shadow-map.
1076      *    d) Same as (A.1.b)
1077      *    e) In compaction pause, if the shadow-map was not created, then use
1078      *    copy-mode.
1079      *    f) Else, if the created map is smaller than the required-size, then
1080      *    use mremap (without dontunmap) to expand the size. If failed, then use
1081      *    copy-mode.
1082      *    g) Otherwise, same as (B.1.d) and use minor-fault mode.
1083      *
1084      *  2) For linear-alloc space:
1085      *    a) Same as (A.2.b)
1086      *    b) Use minor-fault mode
1087      */
1088     auto mmap_shadow_map = [this](int flags, int fd) {
1089       void* ret = mmap(shadow_to_space_map_.Begin(),
1090                        shadow_to_space_map_.Size(),
1091                        PROT_READ | PROT_WRITE,
1092                        flags,
1093                        fd,
1094                        /*offset=*/0);
1095       DCHECK_NE(ret, MAP_FAILED) << "mmap for moving-space shadow failed:" << strerror(errno);
1096     };
1097     // Setup all the virtual memory ranges required for concurrent compaction.
1098     if (minor_fault_initialized_) {
1099       DCHECK(!is_zygote);
1100       if (UNLIKELY(!shadow_to_space_map_.IsValid())) {
1101         // This case happens only once on the first GC in minor-fault mode, if
1102         // we were unable to reserve shadow-map for moving-space in the
1103         // beginning.
1104         DCHECK_GE(moving_to_space_fd_, 0);
1105         // Take extra 4MB to reduce the likelihood of requiring resizing this
1106         // map in the pause due to black allocations.
1107         size_t reqd_size = std::min(moving_first_objs_count_ * kPageSize + 4 * MB,
1108                                     bump_pointer_space_->Capacity());
1109         // We cannot support memory-tool with shadow-map (as it requires
1110         // appending a redzone) in this case because the mapping may have to be expanded
1111         // using mremap (in KernelPreparation()), which would ignore the redzone.
1112         // MemMap::MapFile() appends a redzone, but MemMap::MapAnonymous() doesn't.
1113         std::string err_msg;
1114         shadow_to_space_map_ = MemMap::MapAnonymous("moving-space-shadow",
1115                                                     reqd_size,
1116                                                     PROT_NONE,
1117                                                     /*low_4gb=*/kObjPtrPoisoning,
1118                                                     &err_msg);
1119 
1120         if (shadow_to_space_map_.IsValid()) {
1121           CHECK(!kMemoryToolAddsRedzones || shadow_to_space_map_.GetRedzoneSize() == 0u);
1122           // We want to use MemMap to get low-4GB mapping, if required, but then also
1123           // want to have its ownership as we may grow it (in
1124           // KernelPreparation()). If the ownership is not taken and we try to
1125           // resize MemMap, then it unmaps the virtual range.
1126           MemMap temp = shadow_to_space_map_.TakeReservedMemory(shadow_to_space_map_.Size(),
1127                                                                 /*reuse*/ true);
1128           std::swap(temp, shadow_to_space_map_);
1129           DCHECK(!temp.IsValid());
1130         } else {
1131           LOG(WARNING) << "Failed to create moving space's shadow map of " << PrettySize(reqd_size)
1132                        << " size. " << err_msg;
1133         }
1134       }
1135 
1136       if (LIKELY(shadow_to_space_map_.IsValid())) {
1137         int fd = moving_to_space_fd_;
1138         int mmap_flags = MAP_SHARED | MAP_FIXED;
1139         if (fd == kFdUnused) {
1140           // Unused moving-to-space fd means we are using anonymous shared
1141           // mapping.
1142           DCHECK_EQ(shadow_to_space_map_.Size(), bump_pointer_space_->Capacity());
1143           mmap_flags |= MAP_ANONYMOUS;
1144           fd = -1;
1145         }
1146         // If the map is smaller than required, then we'll do mremap in the
1147         // compaction pause to increase the size.
1148         mmap_shadow_map(mmap_flags, fd);
1149       }
1150 
1151       for (auto& data : linear_alloc_spaces_data_) {
1152         DCHECK_EQ(mprotect(data.shadow_.Begin(), data.shadow_.Size(), PROT_READ | PROT_WRITE), 0)
1153             << "mprotect failed: " << strerror(errno);
1154       }
1155     } else if (!is_zygote && uffd_minor_fault_supported_) {
1156       // First GC after zygote-fork. We will still use uffd's copy mode but will
1157       // use it to move objects to MAP_SHARED (to prepare for subsequent GCs, which
1158       // will use uffd's minor-fault feature).
1159       if (shadow_to_space_map_.IsValid() &&
1160           shadow_to_space_map_.Size() == bump_pointer_space_->Capacity()) {
1161         mmap_shadow_map(MAP_SHARED | MAP_FIXED | MAP_ANONYMOUS, /*fd=*/-1);
1162       } else {
1163         size_t size = bump_pointer_space_->Capacity();
1164         DCHECK_EQ(moving_to_space_fd_, kFdUnused);
1165         DCHECK_EQ(moving_from_space_fd_, kFdUnused);
1166         const char* name = bump_pointer_space_->GetName();
1167         moving_to_space_fd_ = memfd_create(name, MFD_CLOEXEC);
1168         CHECK_NE(moving_to_space_fd_, -1)
1169             << "memfd_create: failed for " << name << ": " << strerror(errno);
1170         moving_from_space_fd_ = memfd_create(name, MFD_CLOEXEC);
1171         CHECK_NE(moving_from_space_fd_, -1)
1172             << "memfd_create: failed for " << name << ": " << strerror(errno);
1173 
1174         // memfds are considered as files from resource limits point of view.
1175         // And the moving space could be several hundred MBs. So increase the
1176         // limit, if it's lower than moving-space size.
1177         bool rlimit_changed = false;
1178         rlimit rlim_read;
1179         CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlim_read), 0) << "getrlimit failed: " << strerror(errno);
1180         if (rlim_read.rlim_cur < size) {
1181           rlimit_changed = true;
1182           rlimit rlim = rlim_read;
1183           rlim.rlim_cur = size;
1184           CHECK_EQ(setrlimit(RLIMIT_FSIZE, &rlim), 0) << "setrlimit failed: " << strerror(errno);
1185         }
1186 
1187         // moving-space will map this fd so that we compact objects into it.
1188         int ret = ftruncate(moving_to_space_fd_, size);
1189         CHECK_EQ(ret, 0) << "ftruncate failed for moving-space:" << strerror(errno);
1190         ret = ftruncate(moving_from_space_fd_, size);
1191         CHECK_EQ(ret, 0) << "ftruncate failed for moving-space:" << strerror(errno);
1192 
1193         if (rlimit_changed) {
1194           // reset the rlimit to the original limits.
1195           CHECK_EQ(setrlimit(RLIMIT_FSIZE, &rlim_read), 0)
1196               << "setrlimit failed: " << strerror(errno);
1197         }
1198       }
1199     }
1200   }
1201 }
1202 
1203 class MarkCompact::VerifyRootMarkedVisitor : public SingleRootVisitor {
1204  public:
VerifyRootMarkedVisitor(MarkCompact * collector)1205   explicit VerifyRootMarkedVisitor(MarkCompact* collector) : collector_(collector) { }
1206 
VisitRoot(mirror::Object * root,const RootInfo & info)1207   void VisitRoot(mirror::Object* root, const RootInfo& info) override
1208       REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
1209     CHECK(collector_->IsMarked(root) != nullptr) << info.ToString();
1210   }
1211 
1212  private:
1213   MarkCompact* const collector_;
1214 };
1215 
ReMarkRoots(Runtime * runtime)1216 void MarkCompact::ReMarkRoots(Runtime* runtime) {
1217   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1218   DCHECK_EQ(thread_running_gc_, Thread::Current());
1219   Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
1220   MarkNonThreadRoots(runtime);
1221   MarkConcurrentRoots(static_cast<VisitRootFlags>(kVisitRootFlagNewRoots
1222                                                   | kVisitRootFlagStopLoggingNewRoots
1223                                                   | kVisitRootFlagClearRootLog),
1224                       runtime);
1225 
1226   if (kVerifyRootsMarked) {
1227     TimingLogger::ScopedTiming t2("(Paused)VerifyRoots", GetTimings());
1228     VerifyRootMarkedVisitor visitor(this);
1229     runtime->VisitRoots(&visitor);
1230   }
1231 }
1232 
MarkingPause()1233 void MarkCompact::MarkingPause() {
1234   TimingLogger::ScopedTiming t("(Paused)MarkingPause", GetTimings());
1235   Runtime* runtime = Runtime::Current();
1236   Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
1237   {
1238     // Handle the dirty objects as we are a concurrent GC
1239     WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1240     {
1241       MutexLock mu2(thread_running_gc_, *Locks::runtime_shutdown_lock_);
1242       MutexLock mu3(thread_running_gc_, *Locks::thread_list_lock_);
1243       std::list<Thread*> thread_list = runtime->GetThreadList()->GetList();
1244       for (Thread* thread : thread_list) {
1245         thread->VisitRoots(this, static_cast<VisitRootFlags>(0));
1246         DCHECK_EQ(thread->GetThreadLocalGcBuffer(), nullptr);
1247         // Need to revoke all the thread-local allocation stacks since we will
1248         // swap the allocation stacks (below) and don't want anybody to allocate
1249         // into the live stack.
1250         thread->RevokeThreadLocalAllocationStack();
1251         bump_pointer_space_->RevokeThreadLocalBuffers(thread);
1252       }
1253     }
1254     // Fetch only the accumulated objects-allocated count as it is guaranteed to
1255     // be up-to-date after the TLAB revocation above.
1256     freed_objects_ += bump_pointer_space_->GetAccumulatedObjectsAllocated();
1257     // Capture 'end' of moving-space at this point. Every allocation beyond this
1258     // point will be considered as black.
1259     // Align-up to page boundary so that black allocations happen from next page
1260     // onwards. Also, it ensures that 'end' is aligned for card-table's
1261     // ClearCardRange().
1262     black_allocations_begin_ = bump_pointer_space_->AlignEnd(thread_running_gc_, kPageSize);
1263     DCHECK(IsAligned<kAlignment>(black_allocations_begin_));
1264     black_allocations_begin_ = AlignUp(black_allocations_begin_, kPageSize);
1265 
1266     // Re-mark root set. Doesn't include thread-roots as they are already marked
1267     // above.
1268     ReMarkRoots(runtime);
1269     // Scan dirty objects.
1270     RecursiveMarkDirtyObjects(/*paused*/ true, accounting::CardTable::kCardDirty);
1271     {
1272       TimingLogger::ScopedTiming t2("SwapStacks", GetTimings());
1273       heap_->SwapStacks();
1274       live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
1275     }
1276   }
1277   // TODO: For PreSweepingGcVerification(), find correct strategy to visit/walk
1278   // objects in bump-pointer space when we have a mark-bitmap to indicate live
1279   // objects. At the same time we also need to be able to visit black allocations,
1280   // even though they are not marked in the bitmap. Without both of these we fail
1281   // pre-sweeping verification. As well as we leave windows open wherein a
1282   // VisitObjects/Walk on the space would either miss some objects or visit
1283   // unreachable ones. These windows are when we are switching from shared
1284   // mutator-lock to exclusive and vice-versa starting from here till compaction pause.
1285   // heap_->PreSweepingGcVerification(this);
1286 
1287   // Disallow new system weaks to prevent a race which occurs when someone adds
1288   // a new system weak before we sweep them. Since this new system weak may not
1289   // be marked, the GC may incorrectly sweep it. This also fixes a race where
1290   // interning may attempt to return a strong reference to a string that is
1291   // about to be swept.
1292   runtime->DisallowNewSystemWeaks();
1293   // Enable the reference processing slow path, needs to be done with mutators
1294   // paused since there is no lock in the GetReferent fast path.
1295   heap_->GetReferenceProcessor()->EnableSlowPath();
1296 }
1297 
SweepSystemWeaks(Thread * self,Runtime * runtime,const bool paused)1298 void MarkCompact::SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused) {
1299   TimingLogger::ScopedTiming t(paused ? "(Paused)SweepSystemWeaks" : "SweepSystemWeaks",
1300                                GetTimings());
1301   ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
1302   runtime->SweepSystemWeaks(this);
1303 }
1304 
ProcessReferences(Thread * self)1305 void MarkCompact::ProcessReferences(Thread* self) {
1306   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
1307   GetHeap()->GetReferenceProcessor()->ProcessReferences(self, GetTimings());
1308 }
1309 
Sweep(bool swap_bitmaps)1310 void MarkCompact::Sweep(bool swap_bitmaps) {
1311   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1312   // Ensure that nobody inserted objects in the live stack after we swapped the
1313   // stacks.
1314   CHECK_GE(live_stack_freeze_size_, GetHeap()->GetLiveStack()->Size());
1315   {
1316     TimingLogger::ScopedTiming t2("MarkAllocStackAsLive", GetTimings());
1317     // Mark everything allocated since the last GC as live so that we can sweep
1318     // concurrently, knowing that new allocations won't be marked as live.
1319     accounting::ObjectStack* live_stack = heap_->GetLiveStack();
1320     heap_->MarkAllocStackAsLive(live_stack);
1321     live_stack->Reset();
1322     DCHECK(mark_stack_->IsEmpty());
1323   }
1324   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
1325     if (space->IsContinuousMemMapAllocSpace() && space != bump_pointer_space_) {
1326       space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
1327       TimingLogger::ScopedTiming split(
1328           alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepMallocSpace",
1329           GetTimings());
1330       RecordFree(alloc_space->Sweep(swap_bitmaps));
1331     }
1332   }
1333   SweepLargeObjects(swap_bitmaps);
1334 }
1335 
SweepLargeObjects(bool swap_bitmaps)1336 void MarkCompact::SweepLargeObjects(bool swap_bitmaps) {
1337   space::LargeObjectSpace* los = heap_->GetLargeObjectsSpace();
1338   if (los != nullptr) {
1339     TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
1340     RecordFreeLOS(los->Sweep(swap_bitmaps));
1341   }
1342 }
1343 
ReclaimPhase()1344 void MarkCompact::ReclaimPhase() {
1345   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1346   DCHECK(thread_running_gc_ == Thread::Current());
1347   Runtime* const runtime = Runtime::Current();
1348   // Process the references concurrently.
1349   ProcessReferences(thread_running_gc_);
1350   // TODO: Try to merge this system-weak sweeping with the one while updating
1351   // references during the compaction pause.
1352   SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ false);
1353   runtime->AllowNewSystemWeaks();
1354   // Clean up class loaders after system weaks are swept since that is how we know if class
1355   // unloading occurred.
1356   runtime->GetClassLinker()->CleanupClassLoaders();
1357   {
1358     WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1359     // Reclaim unmarked objects.
1360     Sweep(false);
1361     // Swap the live and mark bitmaps for each space which we modified space. This is an
1362     // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
1363     // bitmaps.
1364     SwapBitmaps();
1365     // Unbind the live and mark bitmaps.
1366     GetHeap()->UnBindBitmaps();
1367   }
1368 }
1369 
1370 // We want to avoid checking for every reference if it's within the page or
1371 // not. This can be done if we know where in the page the holder object lies.
1372 // If it doesn't overlap either boundaries then we can skip the checks.
1373 template <bool kCheckBegin, bool kCheckEnd>
1374 class MarkCompact::RefsUpdateVisitor {
1375  public:
RefsUpdateVisitor(MarkCompact * collector,mirror::Object * obj,uint8_t * begin,uint8_t * end)1376   explicit RefsUpdateVisitor(MarkCompact* collector,
1377                              mirror::Object* obj,
1378                              uint8_t* begin,
1379                              uint8_t* end)
1380       : collector_(collector), obj_(obj), begin_(begin), end_(end) {
1381     DCHECK(!kCheckBegin || begin != nullptr);
1382     DCHECK(!kCheckEnd || end != nullptr);
1383   }
1384 
operator ()(mirror::Object * old ATTRIBUTE_UNUSED,MemberOffset offset,bool) const1385   void operator()(mirror::Object* old ATTRIBUTE_UNUSED, MemberOffset offset, bool /* is_static */)
1386       const ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_)
1387       REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
1388     bool update = true;
1389     if (kCheckBegin || kCheckEnd) {
1390       uint8_t* ref = reinterpret_cast<uint8_t*>(obj_) + offset.Int32Value();
1391       update = (!kCheckBegin || ref >= begin_) && (!kCheckEnd || ref < end_);
1392     }
1393     if (update) {
1394       collector_->UpdateRef(obj_, offset);
1395     }
1396   }
1397 
1398   // For object arrays we don't need to check boundaries here as it's done in
1399   // VisitReferenes().
1400   // TODO: Optimize reference updating using SIMD instructions. Object arrays
1401   // are perfect as all references are tightly packed.
operator ()(mirror::Object * old ATTRIBUTE_UNUSED,MemberOffset offset,bool,bool) const1402   void operator()(mirror::Object* old ATTRIBUTE_UNUSED,
1403                   MemberOffset offset,
1404                   bool /*is_static*/,
1405                   bool /*is_obj_array*/)
1406       const ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_)
1407       REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
1408     collector_->UpdateRef(obj_, offset);
1409   }
1410 
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const1411   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
1412       ALWAYS_INLINE
1413       REQUIRES_SHARED(Locks::mutator_lock_) {
1414     if (!root->IsNull()) {
1415       VisitRoot(root);
1416     }
1417   }
1418 
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const1419   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
1420       ALWAYS_INLINE
1421       REQUIRES_SHARED(Locks::mutator_lock_) {
1422     collector_->UpdateRoot(root);
1423   }
1424 
1425  private:
1426   MarkCompact* const collector_;
1427   mirror::Object* const obj_;
1428   uint8_t* const begin_;
1429   uint8_t* const end_;
1430 };
1431 
IsValidObject(mirror::Object * obj) const1432 bool MarkCompact::IsValidObject(mirror::Object* obj) const {
1433   mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
1434   if (!heap_->GetVerification()->IsValidHeapObjectAddress(klass)) {
1435     return false;
1436   }
1437   return heap_->GetVerification()->IsValidClassUnchecked<kWithFromSpaceBarrier>(
1438           obj->GetClass<kVerifyNone, kWithFromSpaceBarrier>());
1439 }
1440 
1441 template <typename Callback>
VerifyObject(mirror::Object * ref,Callback & callback) const1442 void MarkCompact::VerifyObject(mirror::Object* ref, Callback& callback) const {
1443   if (kIsDebugBuild) {
1444     mirror::Class* klass = ref->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1445     mirror::Class* pre_compact_klass = ref->GetClass<kVerifyNone, kWithoutReadBarrier>();
1446     mirror::Class* klass_klass = klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1447     mirror::Class* klass_klass_klass = klass_klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1448     if (bump_pointer_space_->HasAddress(pre_compact_klass) &&
1449         reinterpret_cast<uint8_t*>(pre_compact_klass) < black_allocations_begin_) {
1450       CHECK(moving_space_bitmap_->Test(pre_compact_klass))
1451           << "ref=" << ref
1452           << " post_compact_end=" << static_cast<void*>(post_compact_end_)
1453           << " pre_compact_klass=" << pre_compact_klass
1454           << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
1455       CHECK(live_words_bitmap_->Test(pre_compact_klass));
1456     }
1457     if (!IsValidObject(ref)) {
1458       std::ostringstream oss;
1459       oss << "Invalid object: "
1460           << "ref=" << ref
1461           << " klass=" << klass
1462           << " klass_klass=" << klass_klass
1463           << " klass_klass_klass=" << klass_klass_klass
1464           << " pre_compact_klass=" << pre_compact_klass
1465           << " from_space_begin=" << static_cast<void*>(from_space_begin_)
1466           << " pre_compact_begin=" << static_cast<void*>(bump_pointer_space_->Begin())
1467           << " post_compact_end=" << static_cast<void*>(post_compact_end_)
1468           << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
1469 
1470       // Call callback before dumping larger data like RAM and space dumps.
1471       callback(oss);
1472 
1473       oss << " \nobject="
1474           << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(ref), 128)
1475           << " \nklass(from)="
1476           << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(klass), 128)
1477           << "spaces:\n";
1478       heap_->DumpSpaces(oss);
1479       LOG(FATAL) << oss.str();
1480     }
1481   }
1482 }
1483 
CompactPage(mirror::Object * obj,uint32_t offset,uint8_t * addr,bool needs_memset_zero)1484 void MarkCompact::CompactPage(mirror::Object* obj,
1485                               uint32_t offset,
1486                               uint8_t* addr,
1487                               bool needs_memset_zero) {
1488   DCHECK(moving_space_bitmap_->Test(obj)
1489          && live_words_bitmap_->Test(obj));
1490   DCHECK(live_words_bitmap_->Test(offset)) << "obj=" << obj
1491                                            << " offset=" << offset
1492                                            << " addr=" << static_cast<void*>(addr)
1493                                            << " black_allocs_begin="
1494                                            << static_cast<void*>(black_allocations_begin_)
1495                                            << " post_compact_addr="
1496                                            << static_cast<void*>(post_compact_end_);
1497   uint8_t* const start_addr = addr;
1498   // How many distinct live-strides do we have.
1499   size_t stride_count = 0;
1500   uint8_t* last_stride = addr;
1501   uint32_t last_stride_begin = 0;
1502   auto verify_obj_callback = [&] (std::ostream& os) {
1503                                os << " stride_count=" << stride_count
1504                                   << " last_stride=" << static_cast<void*>(last_stride)
1505                                   << " offset=" << offset
1506                                   << " start_addr=" << static_cast<void*>(start_addr);
1507                              };
1508   obj = GetFromSpaceAddr(obj);
1509   live_words_bitmap_->VisitLiveStrides(offset,
1510                                        black_allocations_begin_,
1511                                        kPageSize,
1512                                        [&addr,
1513                                         &last_stride,
1514                                         &stride_count,
1515                                         &last_stride_begin,
1516                                         verify_obj_callback,
1517                                         this] (uint32_t stride_begin,
1518                                                size_t stride_size,
1519                                                bool /*is_last*/)
1520                                         REQUIRES_SHARED(Locks::mutator_lock_) {
1521                                          const size_t stride_in_bytes = stride_size * kAlignment;
1522                                          DCHECK_LE(stride_in_bytes, kPageSize);
1523                                          last_stride_begin = stride_begin;
1524                                          DCHECK(IsAligned<kAlignment>(addr));
1525                                          memcpy(addr,
1526                                                 from_space_begin_ + stride_begin * kAlignment,
1527                                                 stride_in_bytes);
1528                                          if (kIsDebugBuild) {
1529                                            uint8_t* space_begin = bump_pointer_space_->Begin();
1530                                            // We can interpret the first word of the stride as an
1531                                            // obj only from second stride onwards, as the first
1532                                            // stride's first-object may have started on previous
1533                                            // page. The only exception is the first page of the
1534                                            // moving space.
1535                                            if (stride_count > 0
1536                                                || stride_begin * kAlignment < kPageSize) {
1537                                              mirror::Object* o =
1538                                                 reinterpret_cast<mirror::Object*>(space_begin
1539                                                                                   + stride_begin
1540                                                                                   * kAlignment);
1541                                              CHECK(live_words_bitmap_->Test(o)) << "ref=" << o;
1542                                              CHECK(moving_space_bitmap_->Test(o))
1543                                                  << "ref=" << o
1544                                                  << " bitmap: "
1545                                                  << moving_space_bitmap_->DumpMemAround(o);
1546                                              VerifyObject(reinterpret_cast<mirror::Object*>(addr),
1547                                                           verify_obj_callback);
1548                                            }
1549                                          }
1550                                          last_stride = addr;
1551                                          addr += stride_in_bytes;
1552                                          stride_count++;
1553                                        });
1554   DCHECK_LT(last_stride, start_addr + kPageSize);
1555   DCHECK_GT(stride_count, 0u);
1556   size_t obj_size = 0;
1557   uint32_t offset_within_obj = offset * kAlignment
1558                                - (reinterpret_cast<uint8_t*>(obj) - from_space_begin_);
1559   // First object
1560   if (offset_within_obj > 0) {
1561     mirror::Object* to_ref = reinterpret_cast<mirror::Object*>(start_addr - offset_within_obj);
1562     if (stride_count > 1) {
1563       RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this,
1564                                                                          to_ref,
1565                                                                          start_addr,
1566                                                                          nullptr);
1567       obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(
1568               visitor, MemberOffset(offset_within_obj), MemberOffset(-1));
1569     } else {
1570       RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this,
1571                                                                         to_ref,
1572                                                                         start_addr,
1573                                                                         start_addr + kPageSize);
1574       obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(
1575               visitor, MemberOffset(offset_within_obj), MemberOffset(offset_within_obj
1576                                                                      + kPageSize));
1577     }
1578     obj_size = RoundUp(obj_size, kAlignment);
1579     DCHECK_GT(obj_size, offset_within_obj)
1580         << "obj:" << obj
1581         << " class:"
1582         << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
1583         << " to_addr:" << to_ref
1584         << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
1585         << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
1586         << " offset:" << offset * kAlignment
1587         << " class-after-obj-iter:"
1588         << (class_after_obj_iter_ != class_after_obj_ordered_map_.rend() ?
1589             class_after_obj_iter_->first.AsMirrorPtr() : nullptr)
1590         << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
1591         << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
1592         << " offset-of-last-idx:"
1593         << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
1594         << " first-obj-of-last-idx:"
1595         << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
1596 
1597     obj_size -= offset_within_obj;
1598     // If there is only one stride, then adjust last_stride_begin to the
1599     // end of the first object.
1600     if (stride_count == 1) {
1601       last_stride_begin += obj_size / kAlignment;
1602     }
1603   }
1604 
1605   // Except for the last page being compacted, the pages will have addr ==
1606   // start_addr + kPageSize.
1607   uint8_t* const end_addr = addr;
1608   addr = start_addr;
1609   size_t bytes_done = obj_size;
1610   // All strides except the last one can be updated without any boundary
1611   // checks.
1612   DCHECK_LE(addr, last_stride);
1613   size_t bytes_to_visit = last_stride - addr;
1614   DCHECK_LE(bytes_to_visit, kPageSize);
1615   while (bytes_to_visit > bytes_done) {
1616     mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
1617     VerifyObject(ref, verify_obj_callback);
1618     RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
1619             visitor(this, ref, nullptr, nullptr);
1620     obj_size = ref->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
1621     obj_size = RoundUp(obj_size, kAlignment);
1622     bytes_done += obj_size;
1623   }
1624   // Last stride may have multiple objects in it and we don't know where the
1625   // last object which crosses the page boundary starts, therefore check
1626   // page-end in all of these objects. Also, we need to call
1627   // VisitRefsForCompaction() with from-space object as we fetch object size,
1628   // which in case of klass requires 'class_size_'.
1629   uint8_t* from_addr = from_space_begin_ + last_stride_begin * kAlignment;
1630   bytes_to_visit = end_addr - addr;
1631   DCHECK_LE(bytes_to_visit, kPageSize);
1632   while (bytes_to_visit > bytes_done) {
1633     mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
1634     obj = reinterpret_cast<mirror::Object*>(from_addr);
1635     VerifyObject(ref, verify_obj_callback);
1636     RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true>
1637             visitor(this, ref, nullptr, start_addr + kPageSize);
1638     obj_size = obj->VisitRefsForCompaction(visitor,
1639                                            MemberOffset(0),
1640                                            MemberOffset(end_addr - (addr + bytes_done)));
1641     obj_size = RoundUp(obj_size, kAlignment);
1642     DCHECK_GT(obj_size, 0u)
1643         << "from_addr:" << obj
1644         << " from-space-class:"
1645         << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
1646         << " to_addr:" << ref
1647         << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
1648         << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
1649         << " offset:" << offset * kAlignment
1650         << " bytes_done:" << bytes_done
1651         << " class-after-obj-iter:"
1652         << (class_after_obj_iter_ != class_after_obj_ordered_map_.rend() ?
1653             class_after_obj_iter_->first.AsMirrorPtr() : nullptr)
1654         << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
1655         << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
1656         << " offset-of-last-idx:"
1657         << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
1658         << " first-obj-of-last-idx:"
1659         << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
1660 
1661     from_addr += obj_size;
1662     bytes_done += obj_size;
1663   }
1664   // The last page that we compact may have some bytes left untouched in the
1665   // end, we should zero them as the kernel copies at page granularity.
1666   if (needs_memset_zero && UNLIKELY(bytes_done < kPageSize)) {
1667     std::memset(addr + bytes_done, 0x0, kPageSize - bytes_done);
1668   }
1669 }
1670 
1671 // We store the starting point (pre_compact_page - first_obj) and first-chunk's
1672 // size. If more TLAB(s) started in this page, then those chunks are identified
1673 // using mark bitmap. All this info is prepared in UpdateMovingSpaceBlackAllocations().
1674 // If we find a set bit in the bitmap, then we copy the remaining page and then
1675 // use the bitmap to visit each object for updating references.
SlideBlackPage(mirror::Object * first_obj,const size_t page_idx,uint8_t * const pre_compact_page,uint8_t * dest,bool needs_memset_zero)1676 void MarkCompact::SlideBlackPage(mirror::Object* first_obj,
1677                                  const size_t page_idx,
1678                                  uint8_t* const pre_compact_page,
1679                                  uint8_t* dest,
1680                                  bool needs_memset_zero) {
1681   DCHECK(IsAligned<kPageSize>(pre_compact_page));
1682   size_t bytes_copied;
1683   const uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[page_idx];
1684   mirror::Object* next_page_first_obj = first_objs_moving_space_[page_idx + 1].AsMirrorPtr();
1685   uint8_t* src_addr = reinterpret_cast<uint8_t*>(GetFromSpaceAddr(first_obj));
1686   uint8_t* pre_compact_addr = reinterpret_cast<uint8_t*>(first_obj);
1687   uint8_t* const pre_compact_page_end = pre_compact_page + kPageSize;
1688   uint8_t* const dest_page_end = dest + kPageSize;
1689 
1690   auto verify_obj_callback = [&] (std::ostream& os) {
1691                                os << " first_obj=" << first_obj
1692                                   << " next_page_first_obj=" << next_page_first_obj
1693                                   << " first_chunk_sie=" << first_chunk_size
1694                                   << " dest=" << static_cast<void*>(dest)
1695                                   << " pre_compact_page="
1696                                   << static_cast<void* const>(pre_compact_page);
1697                              };
1698   // We have empty portion at the beginning of the page. Zero it.
1699   if (pre_compact_addr > pre_compact_page) {
1700     bytes_copied = pre_compact_addr - pre_compact_page;
1701     DCHECK_LT(bytes_copied, kPageSize);
1702     if (needs_memset_zero) {
1703       std::memset(dest, 0x0, bytes_copied);
1704     }
1705     dest += bytes_copied;
1706   } else {
1707     bytes_copied = 0;
1708     size_t offset = pre_compact_page - pre_compact_addr;
1709     pre_compact_addr = pre_compact_page;
1710     src_addr += offset;
1711     DCHECK(IsAligned<kPageSize>(src_addr));
1712   }
1713   // Copy the first chunk of live words
1714   std::memcpy(dest, src_addr, first_chunk_size);
1715   // Update references in the first chunk. Use object size to find next object.
1716   {
1717     size_t bytes_to_visit = first_chunk_size;
1718     size_t obj_size;
1719     // The first object started in some previous page. So we need to check the
1720     // beginning.
1721     DCHECK_LE(reinterpret_cast<uint8_t*>(first_obj), pre_compact_addr);
1722     size_t offset = pre_compact_addr - reinterpret_cast<uint8_t*>(first_obj);
1723     if (bytes_copied == 0 && offset > 0) {
1724       mirror::Object* to_obj = reinterpret_cast<mirror::Object*>(dest - offset);
1725       mirror::Object* from_obj = reinterpret_cast<mirror::Object*>(src_addr - offset);
1726       // If the next page's first-obj is in this page or nullptr, then we don't
1727       // need to check end boundary
1728       if (next_page_first_obj == nullptr
1729           || (first_obj != next_page_first_obj
1730               && reinterpret_cast<uint8_t*>(next_page_first_obj) <= pre_compact_page_end)) {
1731         RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this,
1732                                                                            to_obj,
1733                                                                            dest,
1734                                                                            nullptr);
1735         obj_size = from_obj->VisitRefsForCompaction<
1736                 /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
1737                                                                    MemberOffset(offset),
1738                                                                    MemberOffset(-1));
1739       } else {
1740         RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this,
1741                                                                           to_obj,
1742                                                                           dest,
1743                                                                           dest_page_end);
1744         obj_size = from_obj->VisitRefsForCompaction<
1745                 /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
1746                                                                    MemberOffset(offset),
1747                                                                    MemberOffset(offset
1748                                                                                 + kPageSize));
1749         if (first_obj == next_page_first_obj) {
1750           // First object is the only object on this page. So there's nothing else left to do.
1751           return;
1752         }
1753       }
1754       obj_size = RoundUp(obj_size, kAlignment);
1755       obj_size -= offset;
1756       dest += obj_size;
1757       bytes_to_visit -= obj_size;
1758     }
1759     bytes_copied += first_chunk_size;
1760     // If the last object in this page is next_page_first_obj, then we need to check end boundary
1761     bool check_last_obj = false;
1762     if (next_page_first_obj != nullptr
1763         && reinterpret_cast<uint8_t*>(next_page_first_obj) < pre_compact_page_end
1764         && bytes_copied == kPageSize) {
1765       size_t diff = pre_compact_page_end - reinterpret_cast<uint8_t*>(next_page_first_obj);
1766       DCHECK_LE(diff, kPageSize);
1767       DCHECK_LE(diff, bytes_to_visit);
1768       bytes_to_visit -= diff;
1769       check_last_obj = true;
1770     }
1771     while (bytes_to_visit > 0) {
1772       mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
1773       VerifyObject(dest_obj, verify_obj_callback);
1774       RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(this,
1775                                                                           dest_obj,
1776                                                                           nullptr,
1777                                                                           nullptr);
1778       obj_size = dest_obj->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
1779       obj_size = RoundUp(obj_size, kAlignment);
1780       bytes_to_visit -= obj_size;
1781       dest += obj_size;
1782     }
1783     DCHECK_EQ(bytes_to_visit, 0u);
1784     if (check_last_obj) {
1785       mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
1786       VerifyObject(dest_obj, verify_obj_callback);
1787       RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> visitor(this,
1788                                                                          dest_obj,
1789                                                                          nullptr,
1790                                                                          dest_page_end);
1791       mirror::Object* obj = GetFromSpaceAddr(next_page_first_obj);
1792       obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
1793                                                           MemberOffset(0),
1794                                                           MemberOffset(dest_page_end - dest));
1795       return;
1796     }
1797   }
1798 
1799   // Probably a TLAB finished on this page and/or a new TLAB started as well.
1800   if (bytes_copied < kPageSize) {
1801     src_addr += first_chunk_size;
1802     pre_compact_addr += first_chunk_size;
1803     // Use mark-bitmap to identify where objects are. First call
1804     // VisitMarkedRange for only the first marked bit. If found, zero all bytes
1805     // until that object and then call memcpy on the rest of the page.
1806     // Then call VisitMarkedRange for all marked bits *after* the one found in
1807     // this invocation. This time to visit references.
1808     uintptr_t start_visit = reinterpret_cast<uintptr_t>(pre_compact_addr);
1809     uintptr_t page_end = reinterpret_cast<uintptr_t>(pre_compact_page_end);
1810     mirror::Object* found_obj = nullptr;
1811     moving_space_bitmap_->VisitMarkedRange</*kVisitOnce*/true>(start_visit,
1812                                                                 page_end,
1813                                                                 [&found_obj](mirror::Object* obj) {
1814                                                                   found_obj = obj;
1815                                                                 });
1816     size_t remaining_bytes = kPageSize - bytes_copied;
1817     if (found_obj == nullptr) {
1818       if (needs_memset_zero) {
1819         // No more black objects in this page. Zero the remaining bytes and return.
1820         std::memset(dest, 0x0, remaining_bytes);
1821       }
1822       return;
1823     }
1824     // Copy everything in this page, which includes any zeroed regions
1825     // in-between.
1826     std::memcpy(dest, src_addr, remaining_bytes);
1827     DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
1828     moving_space_bitmap_->VisitMarkedRange(
1829             reinterpret_cast<uintptr_t>(found_obj) + mirror::kObjectHeaderSize,
1830             page_end,
1831             [&found_obj, pre_compact_addr, dest, this, verify_obj_callback] (mirror::Object* obj)
1832             REQUIRES_SHARED(Locks::mutator_lock_) {
1833               ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
1834               mirror::Object* ref = reinterpret_cast<mirror::Object*>(dest + diff);
1835               VerifyObject(ref, verify_obj_callback);
1836               RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
1837                       visitor(this, ref, nullptr, nullptr);
1838               ref->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
1839                                                                   MemberOffset(0),
1840                                                                   MemberOffset(-1));
1841               // Remember for next round.
1842               found_obj = obj;
1843             });
1844     // found_obj may have been updated in VisitMarkedRange. Visit the last found
1845     // object.
1846     DCHECK_GT(reinterpret_cast<uint8_t*>(found_obj), pre_compact_addr);
1847     DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
1848     ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
1849     mirror::Object* ref = reinterpret_cast<mirror::Object*>(dest + diff);
1850     VerifyObject(ref, verify_obj_callback);
1851     RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> visitor(this,
1852                                                                        ref,
1853                                                                        nullptr,
1854                                                                        dest_page_end);
1855     ref->VisitRefsForCompaction</*kFetchObjSize*/false>(
1856             visitor, MemberOffset(0), MemberOffset(page_end -
1857                                                    reinterpret_cast<uintptr_t>(found_obj)));
1858   }
1859 }
1860 
1861 template <bool kFirstPageMapping>
MapProcessedPages(uint8_t * to_space_start,Atomic<PageState> * state_arr,size_t arr_idx,size_t arr_len)1862 void MarkCompact::MapProcessedPages(uint8_t* to_space_start,
1863                                     Atomic<PageState>* state_arr,
1864                                     size_t arr_idx,
1865                                     size_t arr_len) {
1866   DCHECK(minor_fault_initialized_);
1867   DCHECK_LT(arr_idx, arr_len);
1868   DCHECK_ALIGNED(to_space_start, kPageSize);
1869   // Claim all the contiguous pages, which are ready to be mapped, and then do
1870   // so in a single ioctl. This helps avoid the overhead of invoking syscall
1871   // several times and also maps the already-processed pages, avoiding
1872   // unnecessary faults on them.
1873   size_t length = kFirstPageMapping ? kPageSize : 0;
1874   if (kFirstPageMapping) {
1875     arr_idx++;
1876   }
1877   // We need to guarantee that we don't end up sucsessfully marking a later
1878   // page 'mapping' and then fail to mark an earlier page. To guarantee that
1879   // we use acq_rel order.
1880   for (; arr_idx < arr_len; arr_idx++, length += kPageSize) {
1881     PageState expected_state = PageState::kProcessed;
1882     if (!state_arr[arr_idx].compare_exchange_strong(
1883             expected_state, PageState::kProcessedAndMapping, std::memory_order_acq_rel)) {
1884       break;
1885     }
1886   }
1887   if (length > 0) {
1888     // Note: We need the first page to be attempted (to be mapped) by the ioctl
1889     // as this function is called due to some mutator thread waiting on the
1890     // 'to_space_start' page. Therefore, the ioctl must always be called
1891     // with 'to_space_start' as the 'start' address because it can bail out in
1892     // the middle (not attempting to map the subsequent pages) if it finds any
1893     // page either already mapped in between, or missing on the shadow-map.
1894     struct uffdio_continue uffd_continue;
1895     uffd_continue.range.start = reinterpret_cast<uintptr_t>(to_space_start);
1896     uffd_continue.range.len = length;
1897     uffd_continue.mode = 0;
1898     int ret = ioctl(uffd_, UFFDIO_CONTINUE, &uffd_continue);
1899     if (UNLIKELY(ret == -1 && errno == EAGAIN)) {
1900       // This can happen only in linear-alloc.
1901       DCHECK(linear_alloc_spaces_data_.end() !=
1902              std::find_if(linear_alloc_spaces_data_.begin(),
1903                           linear_alloc_spaces_data_.end(),
1904                           [to_space_start](const LinearAllocSpaceData& data) {
1905                             return data.begin_ <= to_space_start && to_space_start < data.end_;
1906                           }));
1907 
1908       // This could happen if userfaultfd couldn't find any pages mapped in the
1909       // shadow map. For instance, if there are certain (contiguous) pages on
1910       // linear-alloc which are allocated and have first-object set-up but have
1911       // not been accessed yet.
1912       // Bail out by setting the remaining pages' state back to kProcessed and
1913       // then waking up any waiting threads.
1914       DCHECK_GE(uffd_continue.mapped, 0);
1915       DCHECK_ALIGNED(uffd_continue.mapped, kPageSize);
1916       DCHECK_LT(uffd_continue.mapped, static_cast<ssize_t>(length));
1917       if (kFirstPageMapping) {
1918         // In this case the first page must be mapped.
1919         DCHECK_GE(uffd_continue.mapped, static_cast<ssize_t>(kPageSize));
1920       }
1921       // Nobody would modify these pages' state simultaneously so only atomic
1922       // store is sufficient. Use 'release' order to ensure that all states are
1923       // modified sequentially.
1924       for (size_t remaining_len = length - uffd_continue.mapped; remaining_len > 0;
1925            remaining_len -= kPageSize) {
1926         arr_idx--;
1927         DCHECK_EQ(state_arr[arr_idx].load(std::memory_order_relaxed),
1928                   PageState::kProcessedAndMapping);
1929         state_arr[arr_idx].store(PageState::kProcessed, std::memory_order_release);
1930       }
1931       uffd_continue.range.start =
1932           reinterpret_cast<uintptr_t>(to_space_start) + uffd_continue.mapped;
1933       uffd_continue.range.len = length - uffd_continue.mapped;
1934       ret = ioctl(uffd_, UFFDIO_WAKE, &uffd_continue.range);
1935       CHECK_EQ(ret, 0) << "ioctl_userfaultfd: wake failed: " << strerror(errno);
1936     } else {
1937       // We may receive ENOENT if gc-thread unregisters the
1938       // range behind our back, which is fine because that
1939       // happens only when it knows compaction is done.
1940       CHECK(ret == 0 || !kFirstPageMapping || errno == ENOENT)
1941           << "ioctl_userfaultfd: continue failed: " << strerror(errno);
1942       if (ret == 0) {
1943         DCHECK_EQ(uffd_continue.mapped, static_cast<ssize_t>(length));
1944       }
1945     }
1946     if (use_uffd_sigbus_) {
1947       // Nobody else would modify these pages' state simultaneously so atomic
1948       // store is sufficient.
1949       for (; uffd_continue.mapped > 0; uffd_continue.mapped -= kPageSize) {
1950         arr_idx--;
1951         DCHECK_EQ(state_arr[arr_idx].load(std::memory_order_relaxed),
1952                   PageState::kProcessedAndMapping);
1953         state_arr[arr_idx].store(PageState::kProcessedAndMapped, std::memory_order_release);
1954       }
1955     }
1956   }
1957 }
1958 
ZeropageIoctl(void * addr,bool tolerate_eexist,bool tolerate_enoent)1959 void MarkCompact::ZeropageIoctl(void* addr, bool tolerate_eexist, bool tolerate_enoent) {
1960   struct uffdio_zeropage uffd_zeropage;
1961   DCHECK(IsAligned<kPageSize>(addr));
1962   uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(addr);
1963   uffd_zeropage.range.len = kPageSize;
1964   uffd_zeropage.mode = 0;
1965   int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage);
1966   if (LIKELY(ret == 0)) {
1967     DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(kPageSize));
1968   } else {
1969     CHECK((tolerate_enoent && errno == ENOENT) || (tolerate_eexist && errno == EEXIST))
1970         << "ioctl_userfaultfd: zeropage failed: " << strerror(errno) << ". addr:" << addr;
1971   }
1972 }
1973 
CopyIoctl(void * dst,void * buffer)1974 void MarkCompact::CopyIoctl(void* dst, void* buffer) {
1975   struct uffdio_copy uffd_copy;
1976   uffd_copy.src = reinterpret_cast<uintptr_t>(buffer);
1977   uffd_copy.dst = reinterpret_cast<uintptr_t>(dst);
1978   uffd_copy.len = kPageSize;
1979   uffd_copy.mode = 0;
1980   CHECK_EQ(ioctl(uffd_, UFFDIO_COPY, &uffd_copy), 0)
1981       << "ioctl_userfaultfd: copy failed: " << strerror(errno) << ". src:" << buffer
1982       << " dst:" << dst;
1983   DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(kPageSize));
1984 }
1985 
1986 template <int kMode, typename CompactionFn>
DoPageCompactionWithStateChange(size_t page_idx,size_t status_arr_len,uint8_t * to_space_page,uint8_t * page,CompactionFn func)1987 void MarkCompact::DoPageCompactionWithStateChange(size_t page_idx,
1988                                                   size_t status_arr_len,
1989                                                   uint8_t* to_space_page,
1990                                                   uint8_t* page,
1991                                                   CompactionFn func) {
1992   PageState expected_state = PageState::kUnprocessed;
1993   PageState desired_state =
1994       kMode == kCopyMode ? PageState::kProcessingAndMapping : PageState::kProcessing;
1995   // In the concurrent case (kMode != kFallbackMode) we need to ensure that the update
1996   // to moving_spaces_status_[page_idx] is released before the contents of the page are
1997   // made accessible to other threads.
1998   //
1999   // We need acquire ordering here to ensure that when the CAS fails, another thread
2000   // has completed processing the page, which is guaranteed by the release below.
2001   if (kMode == kFallbackMode || moving_pages_status_[page_idx].compare_exchange_strong(
2002                                     expected_state, desired_state, std::memory_order_acquire)) {
2003     func();
2004     if (kMode == kCopyMode) {
2005       CopyIoctl(to_space_page, page);
2006       if (use_uffd_sigbus_) {
2007         // Store is sufficient as no other thread would modify the status at this point.
2008         moving_pages_status_[page_idx].store(PageState::kProcessedAndMapped,
2009                                              std::memory_order_release);
2010       }
2011     } else if (kMode == kMinorFaultMode) {
2012       expected_state = PageState::kProcessing;
2013       desired_state = PageState::kProcessed;
2014       // the CAS needs to be with release order to ensure that stores to the
2015       // page makes it to memory *before* other threads observe that it's
2016       // ready to be mapped.
2017       if (!moving_pages_status_[page_idx].compare_exchange_strong(
2018               expected_state, desired_state, std::memory_order_release)) {
2019         // Some mutator has requested to map the page after processing it.
2020         DCHECK_EQ(expected_state, PageState::kProcessingAndMapping);
2021         MapProcessedPages</*kFirstPageMapping=*/true>(
2022             to_space_page, moving_pages_status_, page_idx, status_arr_len);
2023       }
2024     }
2025   } else {
2026     DCHECK_GT(expected_state, PageState::kProcessed);
2027   }
2028 }
2029 
FreeFromSpacePages(size_t cur_page_idx,int mode)2030 void MarkCompact::FreeFromSpacePages(size_t cur_page_idx, int mode) {
2031   // Thanks to sliding compaction, bump-pointer allocations, and reverse
2032   // compaction (see CompactMovingSpace) the logic here is pretty simple: find
2033   // the to-space page up to which compaction has finished, all the from-space
2034   // pages corresponding to this onwards can be freed. There are some corner
2035   // cases to be taken care of, which are described below.
2036   size_t idx = last_checked_reclaim_page_idx_;
2037   // Find the to-space page up to which the corresponding from-space pages can be
2038   // freed.
2039   for (; idx > cur_page_idx; idx--) {
2040     PageState state = moving_pages_status_[idx - 1].load(std::memory_order_acquire);
2041     if (state == PageState::kMutatorProcessing) {
2042       // Some mutator is working on the page.
2043       break;
2044     }
2045     DCHECK(state >= PageState::kProcessed ||
2046            (state == PageState::kUnprocessed &&
2047             (mode == kFallbackMode || idx > moving_first_objs_count_)));
2048   }
2049   DCHECK_LE(idx, last_checked_reclaim_page_idx_);
2050   if (idx == last_checked_reclaim_page_idx_) {
2051     // Nothing to do.
2052     return;
2053   }
2054 
2055   uint8_t* reclaim_begin;
2056   uint8_t* idx_addr;
2057   // Calculate the first from-space page to be freed using 'idx'. If the
2058   // first-object of the idx'th to-space page started before the corresponding
2059   // from-space page, which is almost always the case in the compaction portion
2060   // of the moving-space, then it indicates that the subsequent pages that are
2061   // yet to be compacted will need the from-space pages. Therefore, find the page
2062   // (from the already compacted pages) whose first-object is different from
2063   // ours. All the from-space pages starting from that one are safe to be
2064   // removed. Please note that this iteration is not expected to be long in
2065   // normal cases as objects are smaller than page size.
2066   if (idx >= moving_first_objs_count_) {
2067     // black-allocated portion of the moving-space
2068     idx_addr = black_allocations_begin_ + (idx - moving_first_objs_count_) * kPageSize;
2069     reclaim_begin = idx_addr;
2070     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2071     if (first_obj != nullptr && reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
2072       size_t idx_len = moving_first_objs_count_ + black_page_count_;
2073       for (size_t i = idx + 1; i < idx_len; i++) {
2074         mirror::Object* obj = first_objs_moving_space_[i].AsMirrorPtr();
2075         // A null first-object indicates that the corresponding to-space page is
2076         // not used yet. So we can compute its from-space page and use that.
2077         if (obj != first_obj) {
2078           reclaim_begin = obj != nullptr
2079                           ? AlignUp(reinterpret_cast<uint8_t*>(obj), kPageSize)
2080                           : (black_allocations_begin_ + (i - moving_first_objs_count_) * kPageSize);
2081           break;
2082         }
2083       }
2084     }
2085   } else {
2086     DCHECK_GE(pre_compact_offset_moving_space_[idx], 0u);
2087     idx_addr = bump_pointer_space_->Begin() + pre_compact_offset_moving_space_[idx] * kAlignment;
2088     reclaim_begin = idx_addr;
2089     DCHECK_LE(reclaim_begin, black_allocations_begin_);
2090     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2091     if (reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
2092       DCHECK_LT(idx, moving_first_objs_count_);
2093       mirror::Object* obj = first_obj;
2094       for (size_t i = idx + 1; i < moving_first_objs_count_; i++) {
2095         obj = first_objs_moving_space_[i].AsMirrorPtr();
2096         if (first_obj != obj) {
2097           DCHECK_LT(first_obj, obj);
2098           DCHECK_LT(reclaim_begin, reinterpret_cast<uint8_t*>(obj));
2099           reclaim_begin = reinterpret_cast<uint8_t*>(obj);
2100           break;
2101         }
2102       }
2103       if (obj == first_obj) {
2104         reclaim_begin = black_allocations_begin_;
2105       }
2106     }
2107     reclaim_begin = AlignUp(reclaim_begin, kPageSize);
2108   }
2109 
2110   DCHECK_NE(reclaim_begin, nullptr);
2111   DCHECK_ALIGNED(reclaim_begin, kPageSize);
2112   DCHECK_ALIGNED(last_reclaimed_page_, kPageSize);
2113   // Check if the 'class_after_obj_map_' map allows pages to be freed.
2114   for (; class_after_obj_iter_ != class_after_obj_ordered_map_.rend(); class_after_obj_iter_++) {
2115     mirror::Object* klass = class_after_obj_iter_->first.AsMirrorPtr();
2116     mirror::Class* from_klass = static_cast<mirror::Class*>(GetFromSpaceAddr(klass));
2117     // Check with class' end to ensure that, if required, the entire class survives.
2118     uint8_t* klass_end = reinterpret_cast<uint8_t*>(klass) + from_klass->SizeOf<kVerifyNone>();
2119     DCHECK_LE(klass_end, last_reclaimed_page_);
2120     if (reinterpret_cast<uint8_t*>(klass_end) >= reclaim_begin) {
2121       // Found a class which is in the reclaim range.
2122       uint8_t* obj_addr = reinterpret_cast<uint8_t*>(class_after_obj_iter_->second.AsMirrorPtr());
2123       // NOTE: Don't assert that obj is of 'klass' type as klass could instead
2124       // be its super-class.
2125       if (obj_addr < idx_addr) {
2126         // Its lowest-address object is not compacted yet. Reclaim starting from
2127         // the end of this class.
2128         reclaim_begin = AlignUp(klass_end, kPageSize);
2129       } else {
2130         // Continue consuming pairs wherein the lowest address object has already
2131         // been compacted.
2132         continue;
2133       }
2134     }
2135     // All the remaining class (and thereby corresponding object) addresses are
2136     // lower than the reclaim range.
2137     break;
2138   }
2139 
2140   ssize_t size = last_reclaimed_page_ - reclaim_begin;
2141   if (size >= kMinFromSpaceMadviseSize) {
2142     int behavior = minor_fault_initialized_ ? MADV_REMOVE : MADV_DONTNEED;
2143     CHECK_EQ(madvise(reclaim_begin + from_space_slide_diff_, size, behavior), 0)
2144         << "madvise of from-space failed: " << strerror(errno);
2145     last_reclaimed_page_ = reclaim_begin;
2146   }
2147   last_checked_reclaim_page_idx_ = idx;
2148 }
2149 
UpdateClassAfterObjMap()2150 void MarkCompact::UpdateClassAfterObjMap() {
2151   CHECK(class_after_obj_ordered_map_.empty());
2152   for (const auto& pair : class_after_obj_hash_map_) {
2153     auto super_class_iter = super_class_after_class_hash_map_.find(pair.first);
2154     ObjReference key = super_class_iter != super_class_after_class_hash_map_.end()
2155                        ? super_class_iter->second
2156                        : pair.first;
2157     if (std::less<mirror::Object*>{}(pair.second.AsMirrorPtr(), key.AsMirrorPtr()) &&
2158         bump_pointer_space_->HasAddress(key.AsMirrorPtr())) {
2159       auto [ret_iter, success] = class_after_obj_ordered_map_.try_emplace(key, pair.second);
2160       // It could fail only if the class 'key' has objects of its own, which are lower in
2161       // address order, as well of some of its derived class. In this case
2162       // choose the lowest address object.
2163       if (!success &&
2164           std::less<mirror::Object*>{}(pair.second.AsMirrorPtr(), ret_iter->second.AsMirrorPtr())) {
2165         ret_iter->second = pair.second;
2166       }
2167     }
2168   }
2169   class_after_obj_hash_map_.clear();
2170   super_class_after_class_hash_map_.clear();
2171 }
2172 
2173 template <int kMode>
CompactMovingSpace(uint8_t * page)2174 void MarkCompact::CompactMovingSpace(uint8_t* page) {
2175   // For every page we have a starting object, which may have started in some
2176   // preceding page, and an offset within that object from where we must start
2177   // copying.
2178   // Consult the live-words bitmap to copy all contiguously live words at a
2179   // time. These words may constitute multiple objects. To avoid the need for
2180   // consulting mark-bitmap to find where does the next live object start, we
2181   // use the object-size returned by VisitRefsForCompaction.
2182   //
2183   // We do the compaction in reverse direction so that the pages containing
2184   // TLAB and latest allocations are processed first.
2185   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
2186   size_t page_status_arr_len = moving_first_objs_count_ + black_page_count_;
2187   size_t idx = page_status_arr_len;
2188   uint8_t* to_space_end = bump_pointer_space_->Begin() + page_status_arr_len * kPageSize;
2189   uint8_t* shadow_space_end = nullptr;
2190   if (kMode == kMinorFaultMode) {
2191     shadow_space_end = shadow_to_space_map_.Begin() + page_status_arr_len * kPageSize;
2192   }
2193   uint8_t* pre_compact_page = black_allocations_begin_ + (black_page_count_ * kPageSize);
2194 
2195   DCHECK(IsAligned<kPageSize>(pre_compact_page));
2196 
2197   UpdateClassAfterObjMap();
2198   // These variables are maintained by FreeFromSpacePages().
2199   last_reclaimed_page_ = pre_compact_page;
2200   last_checked_reclaim_page_idx_ = idx;
2201   class_after_obj_iter_ = class_after_obj_ordered_map_.rbegin();
2202   // Allocated-black pages
2203   while (idx > moving_first_objs_count_) {
2204     idx--;
2205     pre_compact_page -= kPageSize;
2206     to_space_end -= kPageSize;
2207     if (kMode == kMinorFaultMode) {
2208       shadow_space_end -= kPageSize;
2209       page = shadow_space_end;
2210     } else if (kMode == kFallbackMode) {
2211       page = to_space_end;
2212     }
2213     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2214     if (first_obj != nullptr) {
2215       DoPageCompactionWithStateChange<kMode>(
2216           idx,
2217           page_status_arr_len,
2218           to_space_end,
2219           page,
2220           [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2221             SlideBlackPage(first_obj, idx, pre_compact_page, page, kMode == kCopyMode);
2222           });
2223       // We are sliding here, so no point attempting to madvise for every
2224       // page. Wait for enough pages to be done.
2225       if (idx % (kMinFromSpaceMadviseSize / kPageSize) == 0) {
2226         FreeFromSpacePages(idx, kMode);
2227       }
2228     }
2229   }
2230   DCHECK_EQ(pre_compact_page, black_allocations_begin_);
2231 
2232   while (idx > 0) {
2233     idx--;
2234     to_space_end -= kPageSize;
2235     if (kMode == kMinorFaultMode) {
2236       shadow_space_end -= kPageSize;
2237       page = shadow_space_end;
2238     } else if (kMode == kFallbackMode) {
2239       page = to_space_end;
2240     }
2241     mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2242     DoPageCompactionWithStateChange<kMode>(
2243         idx, page_status_arr_len, to_space_end, page, [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2244           CompactPage(first_obj, pre_compact_offset_moving_space_[idx], page, kMode == kCopyMode);
2245         });
2246     FreeFromSpacePages(idx, kMode);
2247   }
2248   DCHECK_EQ(to_space_end, bump_pointer_space_->Begin());
2249 }
2250 
UpdateNonMovingPage(mirror::Object * first,uint8_t * page)2251 void MarkCompact::UpdateNonMovingPage(mirror::Object* first, uint8_t* page) {
2252   DCHECK_LT(reinterpret_cast<uint8_t*>(first), page + kPageSize);
2253   // For every object found in the page, visit the previous object. This ensures
2254   // that we can visit without checking page-end boundary.
2255   // Call VisitRefsForCompaction with from-space read-barrier as the klass object and
2256   // super-class loads require it.
2257   // TODO: Set kVisitNativeRoots to false once we implement concurrent
2258   // compaction
2259   mirror::Object* curr_obj = first;
2260   non_moving_space_bitmap_->VisitMarkedRange(
2261           reinterpret_cast<uintptr_t>(first) + mirror::kObjectHeaderSize,
2262           reinterpret_cast<uintptr_t>(page + kPageSize),
2263           [&](mirror::Object* next_obj) {
2264             // TODO: Once non-moving space update becomes concurrent, we'll
2265             // require fetching the from-space address of 'curr_obj' and then call
2266             // visitor on that.
2267             if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
2268               RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false>
2269                       visitor(this, curr_obj, page, page + kPageSize);
2270               MemberOffset begin_offset(page - reinterpret_cast<uint8_t*>(curr_obj));
2271               // Native roots shouldn't be visited as they are done when this
2272               // object's beginning was visited in the preceding page.
2273               curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false, /*kVisitNativeRoots*/false>(
2274                       visitor, begin_offset, MemberOffset(-1));
2275             } else {
2276               RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
2277                       visitor(this, curr_obj, page, page + kPageSize);
2278               curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
2279                                                                        MemberOffset(0),
2280                                                                        MemberOffset(-1));
2281             }
2282             curr_obj = next_obj;
2283           });
2284 
2285   MemberOffset end_offset(page + kPageSize - reinterpret_cast<uint8_t*>(curr_obj));
2286   if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
2287     RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true>
2288             visitor(this, curr_obj, page, page + kPageSize);
2289     curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false, /*kVisitNativeRoots*/false>(
2290             visitor, MemberOffset(page - reinterpret_cast<uint8_t*>(curr_obj)), end_offset);
2291   } else {
2292     RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true>
2293             visitor(this, curr_obj, page, page + kPageSize);
2294     curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor, MemberOffset(0), end_offset);
2295   }
2296 }
2297 
UpdateNonMovingSpace()2298 void MarkCompact::UpdateNonMovingSpace() {
2299   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
2300   // Iterating in reverse ensures that the class pointer in objects which span
2301   // across more than one page gets updated in the end. This is necessary for
2302   // VisitRefsForCompaction() to work correctly.
2303   // TODO: If and when we make non-moving space update concurrent, implement a
2304   // mechanism to remember class pointers for such objects off-heap and pass it
2305   // to VisitRefsForCompaction().
2306   uint8_t* page = non_moving_space_->Begin() + non_moving_first_objs_count_ * kPageSize;
2307   for (ssize_t i = non_moving_first_objs_count_ - 1; i >= 0; i--) {
2308     mirror::Object* obj = first_objs_non_moving_space_[i].AsMirrorPtr();
2309     page -= kPageSize;
2310     // null means there are no objects on the page to update references.
2311     if (obj != nullptr) {
2312       UpdateNonMovingPage(obj, page);
2313     }
2314   }
2315 }
2316 
UpdateMovingSpaceBlackAllocations()2317 void MarkCompact::UpdateMovingSpaceBlackAllocations() {
2318   // For sliding black pages, we need the first-object, which overlaps with the
2319   // first byte of the page. Additionally, we compute the size of first chunk of
2320   // black objects. This will suffice for most black pages. Unlike, compaction
2321   // pages, here we don't need to pre-compute the offset within first-obj from
2322   // where sliding has to start. That can be calculated using the pre-compact
2323   // address of the page. Therefore, to save space, we store the first chunk's
2324   // size in black_alloc_pages_first_chunk_size_ array.
2325   // For the pages which may have holes after the first chunk, which could happen
2326   // if a new TLAB starts in the middle of the page, we mark the objects in
2327   // the mark-bitmap. So, if the first-chunk size is smaller than kPageSize,
2328   // then we use the mark-bitmap for the remainder of the page.
2329   uint8_t* const begin = bump_pointer_space_->Begin();
2330   uint8_t* black_allocs = black_allocations_begin_;
2331   DCHECK_LE(begin, black_allocs);
2332   size_t consumed_blocks_count = 0;
2333   size_t first_block_size;
2334   // Get the list of all blocks allocated in the bump-pointer space.
2335   std::vector<size_t>* block_sizes = bump_pointer_space_->GetBlockSizes(thread_running_gc_,
2336                                                                         &first_block_size);
2337   DCHECK_LE(first_block_size, (size_t)(black_allocs - begin));
2338   if (block_sizes != nullptr) {
2339     size_t black_page_idx = moving_first_objs_count_;
2340     uint8_t* block_end = begin + first_block_size;
2341     uint32_t remaining_chunk_size = 0;
2342     uint32_t first_chunk_size = 0;
2343     mirror::Object* first_obj = nullptr;
2344     for (size_t block_size : *block_sizes) {
2345       block_end += block_size;
2346       // Skip the blocks that are prior to the black allocations. These will be
2347       // merged with the main-block later.
2348       if (black_allocs >= block_end) {
2349         consumed_blocks_count++;
2350         continue;
2351       }
2352       mirror::Object* obj = reinterpret_cast<mirror::Object*>(black_allocs);
2353       bool set_mark_bit = remaining_chunk_size > 0;
2354       // We don't know how many objects are allocated in the current block. When we hit
2355       // a null assume it's the end. This works as every block is expected to
2356       // have objects allocated linearly using bump-pointer.
2357       // BumpPointerSpace::Walk() also works similarly.
2358       while (black_allocs < block_end
2359              && obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) {
2360         // Try to keep instructions which access class instance together to
2361         // avoid reloading the pointer from object.
2362         size_t obj_size = obj->SizeOf();
2363         bytes_scanned_ += obj_size;
2364         obj_size = RoundUp(obj_size, kAlignment);
2365         UpdateClassAfterObjectMap(obj);
2366         if (first_obj == nullptr) {
2367           first_obj = obj;
2368         }
2369         // We only need the mark-bitmap in the pages wherein a new TLAB starts in
2370         // the middle of the page.
2371         if (set_mark_bit) {
2372           moving_space_bitmap_->Set(obj);
2373         }
2374         // Handle objects which cross page boundary, including objects larger
2375         // than page size.
2376         if (remaining_chunk_size + obj_size >= kPageSize) {
2377           set_mark_bit = false;
2378           first_chunk_size += kPageSize - remaining_chunk_size;
2379           remaining_chunk_size += obj_size;
2380           // We should not store first-object and remaining_chunk_size if there were
2381           // unused bytes before this TLAB, in which case we must have already
2382           // stored the values (below).
2383           if (black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
2384             black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2385             first_objs_moving_space_[black_page_idx].Assign(first_obj);
2386           }
2387           black_page_idx++;
2388           remaining_chunk_size -= kPageSize;
2389           // Consume an object larger than page size.
2390           while (remaining_chunk_size >= kPageSize) {
2391             black_alloc_pages_first_chunk_size_[black_page_idx] = kPageSize;
2392             first_objs_moving_space_[black_page_idx].Assign(obj);
2393             black_page_idx++;
2394             remaining_chunk_size -= kPageSize;
2395           }
2396           first_obj = remaining_chunk_size > 0 ? obj : nullptr;
2397           first_chunk_size = remaining_chunk_size;
2398         } else {
2399           DCHECK_LE(first_chunk_size, remaining_chunk_size);
2400           first_chunk_size += obj_size;
2401           remaining_chunk_size += obj_size;
2402         }
2403         black_allocs += obj_size;
2404         obj = reinterpret_cast<mirror::Object*>(black_allocs);
2405       }
2406       DCHECK_LE(black_allocs, block_end);
2407       DCHECK_LT(remaining_chunk_size, kPageSize);
2408       // consume the unallocated portion of the block
2409       if (black_allocs < block_end) {
2410         // first-chunk of the current page ends here. Store it.
2411         if (first_chunk_size > 0 && black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
2412           black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2413           first_objs_moving_space_[black_page_idx].Assign(first_obj);
2414         }
2415         first_chunk_size = 0;
2416         first_obj = nullptr;
2417         size_t page_remaining = kPageSize - remaining_chunk_size;
2418         size_t block_remaining = block_end - black_allocs;
2419         if (page_remaining <= block_remaining) {
2420           block_remaining -= page_remaining;
2421           // current page and the subsequent empty pages in the block
2422           black_page_idx += 1 + block_remaining / kPageSize;
2423           remaining_chunk_size = block_remaining % kPageSize;
2424         } else {
2425           remaining_chunk_size += block_remaining;
2426         }
2427         black_allocs = block_end;
2428       }
2429     }
2430     if (black_page_idx < bump_pointer_space_->Size() / kPageSize) {
2431       // Store the leftover first-chunk, if any, and update page index.
2432       if (black_alloc_pages_first_chunk_size_[black_page_idx] > 0) {
2433         black_page_idx++;
2434       } else if (first_chunk_size > 0) {
2435         black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2436         first_objs_moving_space_[black_page_idx].Assign(first_obj);
2437         black_page_idx++;
2438       }
2439     }
2440     black_page_count_ = black_page_idx - moving_first_objs_count_;
2441     delete block_sizes;
2442   }
2443   // Update bump-pointer space by consuming all the pre-black blocks into the
2444   // main one.
2445   bump_pointer_space_->SetBlockSizes(thread_running_gc_,
2446                                      post_compact_end_ - begin,
2447                                      consumed_blocks_count);
2448 }
2449 
UpdateNonMovingSpaceBlackAllocations()2450 void MarkCompact::UpdateNonMovingSpaceBlackAllocations() {
2451   accounting::ObjectStack* stack = heap_->GetAllocationStack();
2452   const StackReference<mirror::Object>* limit = stack->End();
2453   uint8_t* const space_begin = non_moving_space_->Begin();
2454   for (StackReference<mirror::Object>* it = stack->Begin(); it != limit; ++it) {
2455     mirror::Object* obj = it->AsMirrorPtr();
2456     if (obj != nullptr && non_moving_space_bitmap_->HasAddress(obj)) {
2457       non_moving_space_bitmap_->Set(obj);
2458       // Clear so that we don't try to set the bit again in the next GC-cycle.
2459       it->Clear();
2460       size_t idx = (reinterpret_cast<uint8_t*>(obj) - space_begin) / kPageSize;
2461       uint8_t* page_begin = AlignDown(reinterpret_cast<uint8_t*>(obj), kPageSize);
2462       mirror::Object* first_obj = first_objs_non_moving_space_[idx].AsMirrorPtr();
2463       if (first_obj == nullptr
2464           || (obj < first_obj && reinterpret_cast<uint8_t*>(first_obj) > page_begin)) {
2465         first_objs_non_moving_space_[idx].Assign(obj);
2466       }
2467       mirror::Object* next_page_first_obj = first_objs_non_moving_space_[++idx].AsMirrorPtr();
2468       uint8_t* next_page_begin = page_begin + kPageSize;
2469       if (next_page_first_obj == nullptr
2470           || reinterpret_cast<uint8_t*>(next_page_first_obj) > next_page_begin) {
2471         size_t obj_size = RoundUp(obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
2472         uint8_t* obj_end = reinterpret_cast<uint8_t*>(obj) + obj_size;
2473         while (next_page_begin < obj_end) {
2474           first_objs_non_moving_space_[idx++].Assign(obj);
2475           next_page_begin += kPageSize;
2476         }
2477       }
2478       // update first_objs count in case we went past non_moving_first_objs_count_
2479       non_moving_first_objs_count_ = std::max(non_moving_first_objs_count_, idx);
2480     }
2481   }
2482 }
2483 
2484 class MarkCompact::ImmuneSpaceUpdateObjVisitor {
2485  public:
ImmuneSpaceUpdateObjVisitor(MarkCompact * collector,bool visit_native_roots)2486   ImmuneSpaceUpdateObjVisitor(MarkCompact* collector, bool visit_native_roots)
2487       : collector_(collector), visit_native_roots_(visit_native_roots) {}
2488 
operator ()(mirror::Object * obj) const2489   ALWAYS_INLINE void operator()(mirror::Object* obj) const REQUIRES(Locks::mutator_lock_) {
2490     RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(collector_,
2491                                                                         obj,
2492                                                                         /*begin_*/nullptr,
2493                                                                         /*end_*/nullptr);
2494     if (visit_native_roots_) {
2495       obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ true>(
2496           visitor, MemberOffset(0), MemberOffset(-1));
2497     } else {
2498       obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
2499           visitor, MemberOffset(0), MemberOffset(-1));
2500     }
2501   }
2502 
Callback(mirror::Object * obj,void * arg)2503   static void Callback(mirror::Object* obj, void* arg) REQUIRES(Locks::mutator_lock_) {
2504     reinterpret_cast<ImmuneSpaceUpdateObjVisitor*>(arg)->operator()(obj);
2505   }
2506 
2507  private:
2508   MarkCompact* const collector_;
2509   const bool visit_native_roots_;
2510 };
2511 
2512 class MarkCompact::ClassLoaderRootsUpdater : public ClassLoaderVisitor {
2513  public:
ClassLoaderRootsUpdater(MarkCompact * collector)2514   explicit ClassLoaderRootsUpdater(MarkCompact* collector) : collector_(collector) {}
2515 
Visit(ObjPtr<mirror::ClassLoader> class_loader)2516   void Visit(ObjPtr<mirror::ClassLoader> class_loader) override
2517       REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) {
2518     ClassTable* const class_table = class_loader->GetClassTable();
2519     if (class_table != nullptr) {
2520       class_table->VisitRoots(*this);
2521     }
2522   }
2523 
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const2524   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
2525       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
2526     if (!root->IsNull()) {
2527       VisitRoot(root);
2528     }
2529   }
2530 
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const2531   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
2532       REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
2533     collector_->VisitRoots(&root, 1, RootInfo(RootType::kRootVMInternal));
2534   }
2535 
2536  private:
2537   MarkCompact* collector_;
2538 };
2539 
2540 class MarkCompact::LinearAllocPageUpdater {
2541  public:
LinearAllocPageUpdater(MarkCompact * collector)2542   explicit LinearAllocPageUpdater(MarkCompact* collector) : collector_(collector) {}
2543 
operator ()(uint8_t * page_begin,uint8_t * first_obj)2544   void operator()(uint8_t* page_begin, uint8_t* first_obj) ALWAYS_INLINE
2545       REQUIRES_SHARED(Locks::mutator_lock_) {
2546     DCHECK_ALIGNED(page_begin, kPageSize);
2547     uint8_t* page_end = page_begin + kPageSize;
2548     uint32_t obj_size;
2549     for (uint8_t* byte = first_obj; byte < page_end;) {
2550       TrackingHeader* header = reinterpret_cast<TrackingHeader*>(byte);
2551       obj_size = header->GetSize();
2552       if (UNLIKELY(obj_size == 0)) {
2553         // No more objects in this page to visit.
2554         last_page_touched_ = byte >= page_begin;
2555         return;
2556       }
2557       uint8_t* obj = byte + sizeof(TrackingHeader);
2558       uint8_t* obj_end = byte + obj_size;
2559       if (header->Is16Aligned()) {
2560         obj = AlignUp(obj, 16);
2561       }
2562       uint8_t* begin_boundary = std::max(obj, page_begin);
2563       uint8_t* end_boundary = std::min(obj_end, page_end);
2564       if (begin_boundary < end_boundary) {
2565         VisitObject(header->GetKind(), obj, begin_boundary, end_boundary);
2566       }
2567       if (ArenaAllocator::IsRunningOnMemoryTool()) {
2568         obj_size += ArenaAllocator::kMemoryToolRedZoneBytes;
2569       }
2570       byte += RoundUp(obj_size, LinearAlloc::kAlignment);
2571     }
2572     last_page_touched_ = true;
2573   }
2574 
WasLastPageTouched() const2575   bool WasLastPageTouched() const { return last_page_touched_; }
2576 
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const2577   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
2578       ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
2579     if (!root->IsNull()) {
2580       VisitRoot(root);
2581     }
2582   }
2583 
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const2584   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
2585       ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
2586     mirror::Object* old_ref = root->AsMirrorPtr();
2587     DCHECK_NE(old_ref, nullptr);
2588     if (collector_->live_words_bitmap_->HasAddress(old_ref)) {
2589       mirror::Object* new_ref = old_ref;
2590       if (reinterpret_cast<uint8_t*>(old_ref) >= collector_->black_allocations_begin_) {
2591         new_ref = collector_->PostCompactBlackObjAddr(old_ref);
2592       } else if (collector_->live_words_bitmap_->Test(old_ref)) {
2593         DCHECK(collector_->moving_space_bitmap_->Test(old_ref)) << old_ref;
2594         new_ref = collector_->PostCompactOldObjAddr(old_ref);
2595       }
2596       if (old_ref != new_ref) {
2597         root->Assign(new_ref);
2598       }
2599     }
2600   }
2601 
2602  private:
VisitObject(LinearAllocKind kind,void * obj,uint8_t * start_boundary,uint8_t * end_boundary) const2603   void VisitObject(LinearAllocKind kind,
2604                    void* obj,
2605                    uint8_t* start_boundary,
2606                    uint8_t* end_boundary) const REQUIRES_SHARED(Locks::mutator_lock_) {
2607     switch (kind) {
2608       case LinearAllocKind::kNoGCRoots:
2609         break;
2610       case LinearAllocKind::kGCRootArray:
2611         {
2612           GcRoot<mirror::Object>* root = reinterpret_cast<GcRoot<mirror::Object>*>(start_boundary);
2613           GcRoot<mirror::Object>* last = reinterpret_cast<GcRoot<mirror::Object>*>(end_boundary);
2614           for (; root < last; root++) {
2615             VisitRootIfNonNull(root->AddressWithoutBarrier());
2616           }
2617         }
2618         break;
2619       case LinearAllocKind::kArtMethodArray:
2620         {
2621           LengthPrefixedArray<ArtMethod>* array = static_cast<LengthPrefixedArray<ArtMethod>*>(obj);
2622           // Old methods are clobbered in debug builds. Check size to confirm if the array
2623           // has any GC roots to visit. See ClassLinker::LinkMethodsHelper::ClobberOldMethods()
2624           if (array->size() > 0) {
2625             if (collector_->pointer_size_ == PointerSize::k64) {
2626               ArtMethod::VisitArrayRoots<PointerSize::k64>(
2627                   *this, start_boundary, end_boundary, array);
2628             } else {
2629               DCHECK_EQ(collector_->pointer_size_, PointerSize::k32);
2630               ArtMethod::VisitArrayRoots<PointerSize::k32>(
2631                   *this, start_boundary, end_boundary, array);
2632             }
2633           }
2634         }
2635         break;
2636       case LinearAllocKind::kArtMethod:
2637         ArtMethod::VisitRoots(*this, start_boundary, end_boundary, static_cast<ArtMethod*>(obj));
2638         break;
2639       case LinearAllocKind::kArtFieldArray:
2640         ArtField::VisitArrayRoots(*this,
2641                                   start_boundary,
2642                                   end_boundary,
2643                                   static_cast<LengthPrefixedArray<ArtField>*>(obj));
2644         break;
2645       case LinearAllocKind::kDexCacheArray:
2646         {
2647           mirror::DexCachePair<mirror::Object>* first =
2648               reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(start_boundary);
2649           mirror::DexCachePair<mirror::Object>* last =
2650               reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(end_boundary);
2651           mirror::DexCache::VisitDexCachePairRoots(*this, first, last);
2652       }
2653     }
2654   }
2655 
2656   MarkCompact* const collector_;
2657   // Whether the last page was touched or not.
2658   bool last_page_touched_;
2659 };
2660 
CompactionPause()2661 void MarkCompact::CompactionPause() {
2662   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
2663   Runtime* runtime = Runtime::Current();
2664   non_moving_space_bitmap_ = non_moving_space_->GetLiveBitmap();
2665   if (kIsDebugBuild) {
2666     DCHECK_EQ(thread_running_gc_, Thread::Current());
2667     stack_low_addr_ = thread_running_gc_->GetStackEnd();
2668     stack_high_addr_ =
2669         reinterpret_cast<char*>(stack_low_addr_) + thread_running_gc_->GetStackSize();
2670   }
2671   {
2672     TimingLogger::ScopedTiming t2("(Paused)UpdateCompactionDataStructures", GetTimings());
2673     ReaderMutexLock rmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
2674     // Refresh data-structures to catch-up on allocations that may have
2675     // happened since marking-phase pause.
2676     // There could be several TLABs that got allocated since marking pause. We
2677     // don't want to compact them and instead update the TLAB info in TLS and
2678     // let mutators continue to use the TLABs.
2679     // We need to set all the bits in live-words bitmap corresponding to allocated
2680     // objects. Also, we need to find the objects that are overlapping with
2681     // page-begin boundaries. Unlike objects allocated before
2682     // black_allocations_begin_, which can be identified via mark-bitmap, we can get
2683     // this info only via walking the space past black_allocations_begin_, which
2684     // involves fetching object size.
2685     // TODO: We can reduce the time spent on this in a pause by performing one
2686     // round of this concurrently prior to the pause.
2687     UpdateMovingSpaceBlackAllocations();
2688     // TODO: If we want to avoid this allocation in a pause then we will have to
2689     // allocate an array for the entire moving-space size, which can be made
2690     // part of info_map_.
2691     moving_pages_status_ = new Atomic<PageState>[moving_first_objs_count_ + black_page_count_];
2692     if (kIsDebugBuild) {
2693       size_t len = moving_first_objs_count_ + black_page_count_;
2694       for (size_t i = 0; i < len; i++) {
2695           CHECK_EQ(moving_pages_status_[i].load(std::memory_order_relaxed),
2696                    PageState::kUnprocessed);
2697       }
2698     }
2699     // Iterate over the allocation_stack_, for every object in the non-moving
2700     // space:
2701     // 1. Mark the object in live bitmap
2702     // 2. Erase the object from allocation stack
2703     // 3. In the corresponding page, if the first-object vector needs updating
2704     // then do so.
2705     UpdateNonMovingSpaceBlackAllocations();
2706 
2707     // This store is visible to mutator (or uffd worker threads) as the mutator
2708     // lock's unlock guarantees that.
2709     compacting_ = true;
2710     // Start updating roots and system weaks now.
2711     heap_->GetReferenceProcessor()->UpdateRoots(this);
2712   }
2713   {
2714     TimingLogger::ScopedTiming t2("(Paused)UpdateClassLoaderRoots", GetTimings());
2715     ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
2716     {
2717       ClassLoaderRootsUpdater updater(this);
2718       runtime->GetClassLinker()->VisitClassLoaders(&updater);
2719     }
2720   }
2721 
2722   bool has_zygote_space = heap_->HasZygoteSpace();
2723   // TODO: Find out why it's not sufficient to visit native roots of immune
2724   // spaces, and why all the pre-zygote fork arenas have to be linearly updated.
2725   // Is it possible that some native root starts getting pointed to by some object
2726   // in moving space after fork? Or are we missing a write-barrier somewhere
2727   // when a native root is updated?
2728   GcVisitedArenaPool* arena_pool =
2729       static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
2730   if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) {
2731     // Besides fallback-mode, visit linear-alloc space in the pause for zygote
2732     // processes prior to first fork (that's when zygote space gets created).
2733     if (kIsDebugBuild && IsValidFd(uffd_)) {
2734       // All arenas allocated so far are expected to be pre-zygote fork.
2735       arena_pool->ForEachAllocatedArena(
2736           [](const TrackedArena& arena)
2737               REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); });
2738     }
2739     LinearAllocPageUpdater updater(this);
2740     arena_pool->VisitRoots(updater);
2741   } else {
2742     // Clear the flag as we care about this only if arenas are freed during
2743     // concurrent compaction.
2744     arena_pool->ClearArenasFreed();
2745     arena_pool->ForEachAllocatedArena(
2746         [this](const TrackedArena& arena) REQUIRES_SHARED(Locks::mutator_lock_) {
2747           // The pre-zygote fork arenas are not visited concurrently in the
2748           // zygote children processes. The native roots of the dirty objects
2749           // are visited during immune space visit below.
2750           if (!arena.IsPreZygoteForkArena()) {
2751             uint8_t* last_byte = arena.GetLastUsedByte();
2752             CHECK(linear_alloc_arenas_.insert({&arena, last_byte}).second);
2753           } else {
2754             LinearAllocPageUpdater updater(this);
2755             arena.VisitRoots(updater);
2756           }
2757         });
2758   }
2759 
2760   SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ true);
2761 
2762   {
2763     TimingLogger::ScopedTiming t2("(Paused)UpdateConcurrentRoots", GetTimings());
2764     runtime->VisitConcurrentRoots(this, kVisitRootFlagAllRoots);
2765   }
2766   {
2767     // TODO: don't visit the transaction roots if it's not active.
2768     TimingLogger::ScopedTiming t2("(Paused)UpdateNonThreadRoots", GetTimings());
2769     runtime->VisitNonThreadRoots(this);
2770   }
2771 
2772   {
2773     // TODO: Immune space updation has to happen either before or after
2774     // remapping pre-compact pages to from-space. And depending on when it's
2775     // done, we have to invoke VisitRefsForCompaction() with or without
2776     // read-barrier.
2777     TimingLogger::ScopedTiming t2("(Paused)UpdateImmuneSpaces", GetTimings());
2778     accounting::CardTable* const card_table = heap_->GetCardTable();
2779     for (auto& space : immune_spaces_.GetSpaces()) {
2780       DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
2781       accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
2782       accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
2783       // Having zygote-space indicates that the first zygote fork has taken
2784       // place and that the classes/dex-caches in immune-spaces may have allocations
2785       // (ArtMethod/ArtField arrays, dex-cache array, etc.) in the
2786       // non-userfaultfd visited private-anonymous mappings. Visit them here.
2787       ImmuneSpaceUpdateObjVisitor visitor(this, /*visit_native_roots=*/false);
2788       if (table != nullptr) {
2789         table->ProcessCards();
2790         table->VisitObjects(ImmuneSpaceUpdateObjVisitor::Callback, &visitor);
2791       } else {
2792         WriterMutexLock wmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
2793         card_table->Scan<false>(
2794             live_bitmap,
2795             space->Begin(),
2796             space->Limit(),
2797             visitor,
2798             accounting::CardTable::kCardDirty - 1);
2799       }
2800     }
2801   }
2802 
2803   if (use_uffd_sigbus_) {
2804     // Release order wrt to mutator threads' SIGBUS handler load.
2805     sigbus_in_progress_count_.store(0, std::memory_order_release);
2806   }
2807   KernelPreparation();
2808   UpdateNonMovingSpace();
2809   // fallback mode
2810   if (uffd_ == kFallbackMode) {
2811     CompactMovingSpace<kFallbackMode>(nullptr);
2812 
2813     int32_t freed_bytes = black_objs_slide_diff_;
2814     bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
2815     RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
2816   } else {
2817     DCHECK_EQ(compaction_in_progress_count_.load(std::memory_order_relaxed), 0u);
2818     DCHECK_EQ(compaction_buffer_counter_.load(std::memory_order_relaxed), 1);
2819     if (!use_uffd_sigbus_) {
2820       // We must start worker threads before resuming mutators to avoid deadlocks.
2821       heap_->GetThreadPool()->StartWorkers(thread_running_gc_);
2822     }
2823   }
2824   stack_low_addr_ = nullptr;
2825 }
2826 
KernelPrepareRangeForUffd(uint8_t * to_addr,uint8_t * from_addr,size_t map_size,int fd,uint8_t * shadow_addr)2827 void MarkCompact::KernelPrepareRangeForUffd(uint8_t* to_addr,
2828                                             uint8_t* from_addr,
2829                                             size_t map_size,
2830                                             int fd,
2831                                             uint8_t* shadow_addr) {
2832   int mremap_flags = MREMAP_MAYMOVE | MREMAP_FIXED;
2833   if (gHaveMremapDontunmap) {
2834     mremap_flags |= MREMAP_DONTUNMAP;
2835   }
2836 
2837   void* ret = mremap(to_addr, map_size, map_size, mremap_flags, from_addr);
2838   CHECK_EQ(ret, static_cast<void*>(from_addr))
2839       << "mremap to move pages failed: " << strerror(errno)
2840       << ". space-addr=" << reinterpret_cast<void*>(to_addr) << " size=" << PrettySize(map_size);
2841 
2842   if (shadow_addr != nullptr) {
2843     DCHECK_EQ(fd, kFdUnused);
2844     DCHECK(gHaveMremapDontunmap);
2845     ret = mremap(shadow_addr, map_size, map_size, mremap_flags, to_addr);
2846     CHECK_EQ(ret, static_cast<void*>(to_addr))
2847         << "mremap from shadow to to-space map failed: " << strerror(errno);
2848   } else if (!gHaveMremapDontunmap || fd > kFdUnused) {
2849     // Without MREMAP_DONTUNMAP the source mapping is unmapped by mremap. So mmap
2850     // the moving space again.
2851     int mmap_flags = MAP_FIXED;
2852     if (fd == kFdUnused) {
2853       // Use MAP_FIXED_NOREPLACE so that if someone else reserves 'to_addr'
2854       // mapping in meantime, which can happen when MREMAP_DONTUNMAP isn't
2855       // available, to avoid unmapping someone else' mapping and then causing
2856       // crashes elsewhere.
2857       mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
2858       // On some platforms MAP_ANONYMOUS expects fd to be -1.
2859       fd = -1;
2860     } else if (IsValidFd(fd)) {
2861       mmap_flags |= MAP_SHARED;
2862     } else {
2863       DCHECK_EQ(fd, kFdSharedAnon);
2864       mmap_flags |= MAP_SHARED | MAP_ANONYMOUS;
2865     }
2866     ret = mmap(to_addr, map_size, PROT_READ | PROT_WRITE, mmap_flags, fd, 0);
2867     CHECK_EQ(ret, static_cast<void*>(to_addr))
2868         << "mmap for moving space failed: " << strerror(errno);
2869   }
2870 }
2871 
KernelPreparation()2872 void MarkCompact::KernelPreparation() {
2873   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
2874   uint8_t* moving_space_begin = bump_pointer_space_->Begin();
2875   size_t moving_space_size = bump_pointer_space_->Capacity();
2876   int mode = kCopyMode;
2877   size_t moving_space_register_sz;
2878   if (minor_fault_initialized_) {
2879     moving_space_register_sz = (moving_first_objs_count_ + black_page_count_) * kPageSize;
2880     if (shadow_to_space_map_.IsValid()) {
2881       size_t shadow_size = shadow_to_space_map_.Size();
2882       void* addr = shadow_to_space_map_.Begin();
2883       if (shadow_size < moving_space_register_sz) {
2884         addr = mremap(addr,
2885                       shadow_size,
2886                       moving_space_register_sz,
2887                       // Don't allow moving with obj-ptr poisoning as the
2888                       // mapping needs to be in <4GB address space.
2889                       kObjPtrPoisoning ? 0 : MREMAP_MAYMOVE,
2890                       /*new_address=*/nullptr);
2891         if (addr != MAP_FAILED) {
2892           // Succeeded in expanding the mapping. Update the MemMap entry for shadow map.
2893           MemMap temp = MemMap::MapPlaceholder(
2894               "moving-space-shadow", static_cast<uint8_t*>(addr), moving_space_register_sz);
2895           std::swap(shadow_to_space_map_, temp);
2896         }
2897       }
2898       if (addr != MAP_FAILED) {
2899         mode = kMinorFaultMode;
2900       } else {
2901         // We are not going to use shadow map. So protect it to catch any
2902         // potential bugs.
2903         DCHECK_EQ(mprotect(shadow_to_space_map_.Begin(), shadow_to_space_map_.Size(), PROT_NONE), 0)
2904             << "mprotect failed: " << strerror(errno);
2905       }
2906     }
2907   } else {
2908     moving_space_register_sz = moving_space_size;
2909   }
2910 
2911   bool map_shared =
2912       minor_fault_initialized_ || (!Runtime::Current()->IsZygote() && uffd_minor_fault_supported_);
2913   uint8_t* shadow_addr = nullptr;
2914   if (moving_to_space_fd_ == kFdUnused && map_shared) {
2915     DCHECK(gHaveMremapDontunmap);
2916     DCHECK(shadow_to_space_map_.IsValid());
2917     DCHECK_EQ(shadow_to_space_map_.Size(), moving_space_size);
2918     shadow_addr = shadow_to_space_map_.Begin();
2919   }
2920 
2921   KernelPrepareRangeForUffd(moving_space_begin,
2922                             from_space_begin_,
2923                             moving_space_size,
2924                             moving_to_space_fd_,
2925                             shadow_addr);
2926 
2927   if (IsValidFd(uffd_)) {
2928     // Register the moving space with userfaultfd.
2929     RegisterUffd(moving_space_begin, moving_space_register_sz, mode);
2930     // Prepare linear-alloc for concurrent compaction.
2931     for (auto& data : linear_alloc_spaces_data_) {
2932       bool mmap_again = map_shared && !data.already_shared_;
2933       DCHECK_EQ(static_cast<ssize_t>(data.shadow_.Size()), data.end_ - data.begin_);
2934       // There could be threads running in suspended mode when the compaction
2935       // pause is being executed. In order to make the userfaultfd setup atomic,
2936       // the registration has to be done *before* moving the pages to shadow map.
2937       if (!mmap_again) {
2938         // See the comment in the constructor as to why it's conditionally done.
2939         RegisterUffd(data.begin_,
2940                      data.shadow_.Size(),
2941                      minor_fault_initialized_ ? kMinorFaultMode : kCopyMode);
2942       }
2943       KernelPrepareRangeForUffd(data.begin_,
2944                                 data.shadow_.Begin(),
2945                                 data.shadow_.Size(),
2946                                 mmap_again ? kFdSharedAnon : kFdUnused);
2947       if (mmap_again) {
2948         data.already_shared_ = true;
2949         RegisterUffd(data.begin_,
2950                      data.shadow_.Size(),
2951                      minor_fault_initialized_ ? kMinorFaultMode : kCopyMode);
2952       }
2953     }
2954   }
2955   if (map_shared) {
2956     // Start mapping linear-alloc MAP_SHARED only after the compaction pause of
2957     // the first GC in non-zygote processes. This is the GC which sets up
2958     // mappings for using minor-fault in future. Up to this point we run
2959     // userfaultfd in copy-mode, which requires the mappings (of linear-alloc)
2960     // to be MAP_PRIVATE.
2961     map_linear_alloc_shared_ = true;
2962   }
2963 }
2964 
2965 template <int kMode>
ConcurrentCompaction(uint8_t * buf)2966 void MarkCompact::ConcurrentCompaction(uint8_t* buf) {
2967   DCHECK_NE(kMode, kFallbackMode);
2968   DCHECK(kMode != kCopyMode || buf != nullptr);
2969   size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_;
2970   while (true) {
2971     struct uffd_msg msg;
2972     ssize_t nread = read(uffd_, &msg, sizeof(msg));
2973     CHECK_GT(nread, 0);
2974     CHECK_EQ(msg.event, UFFD_EVENT_PAGEFAULT);
2975     DCHECK_EQ(nread, static_cast<ssize_t>(sizeof(msg)));
2976     uint8_t* fault_addr = reinterpret_cast<uint8_t*>(msg.arg.pagefault.address);
2977     if (fault_addr == conc_compaction_termination_page_) {
2978       // The counter doesn't need to be updated atomically as only one thread
2979       // would wake up against the gc-thread's load to this fault_addr. In fact,
2980       // the other threads would wake up serially because every exiting thread
2981       // will wake up gc-thread, which would retry load but again would find the
2982       // page missing. Also, the value will be flushed to caches due to the ioctl
2983       // syscall below.
2984       uint8_t ret = thread_pool_counter_--;
2985       // If 'gKernelHasFaultRetry == true' then only the last thread should map the
2986       // zeropage so that the gc-thread can proceed. Otherwise, each thread does
2987       // it and the gc-thread will repeat this fault until thread_pool_counter == 0.
2988       if (!gKernelHasFaultRetry || ret == 1) {
2989         ZeropageIoctl(fault_addr, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
2990       } else {
2991         struct uffdio_range uffd_range;
2992         uffd_range.start = msg.arg.pagefault.address;
2993         uffd_range.len = kPageSize;
2994         CHECK_EQ(ioctl(uffd_, UFFDIO_WAKE, &uffd_range), 0)
2995             << "ioctl_userfaultfd: wake failed for concurrent-compaction termination page: "
2996             << strerror(errno);
2997       }
2998       break;
2999     }
3000     uint8_t* fault_page = AlignDown(fault_addr, kPageSize);
3001     if (bump_pointer_space_->HasAddress(reinterpret_cast<mirror::Object*>(fault_addr))) {
3002       ConcurrentlyProcessMovingPage<kMode>(fault_page, buf, nr_moving_space_used_pages);
3003     } else if (minor_fault_initialized_) {
3004       ConcurrentlyProcessLinearAllocPage<kMinorFaultMode>(
3005           fault_page, (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0);
3006     } else {
3007       ConcurrentlyProcessLinearAllocPage<kCopyMode>(
3008           fault_page, (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0);
3009     }
3010   }
3011 }
3012 
SigbusHandler(siginfo_t * info)3013 bool MarkCompact::SigbusHandler(siginfo_t* info) {
3014   class ScopedInProgressCount {
3015    public:
3016     explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) {
3017       // Increment the count only if compaction is not done yet.
3018       SigbusCounterType prev =
3019           collector_->sigbus_in_progress_count_.load(std::memory_order_relaxed);
3020       while ((prev & kSigbusCounterCompactionDoneMask) == 0) {
3021         if (collector_->sigbus_in_progress_count_.compare_exchange_strong(
3022                 prev, prev + 1, std::memory_order_acquire)) {
3023           DCHECK_LT(prev, kSigbusCounterCompactionDoneMask - 1);
3024           compaction_done_ = false;
3025           return;
3026         }
3027       }
3028       compaction_done_ = true;
3029     }
3030 
3031     bool IsCompactionDone() const {
3032       return compaction_done_;
3033     }
3034 
3035     ~ScopedInProgressCount() {
3036       if (!IsCompactionDone()) {
3037         collector_->sigbus_in_progress_count_.fetch_sub(1, std::memory_order_release);
3038       }
3039     }
3040 
3041    private:
3042     MarkCompact* const collector_;
3043     bool compaction_done_;
3044   };
3045 
3046   DCHECK(use_uffd_sigbus_);
3047   if (info->si_code != BUS_ADRERR) {
3048     // Userfaultfd raises SIGBUS with BUS_ADRERR. All other causes can't be
3049     // handled here.
3050     return false;
3051   }
3052 
3053   ScopedInProgressCount spc(this);
3054   uint8_t* fault_page = AlignDown(reinterpret_cast<uint8_t*>(info->si_addr), kPageSize);
3055   if (!spc.IsCompactionDone()) {
3056     if (bump_pointer_space_->HasAddress(reinterpret_cast<mirror::Object*>(fault_page))) {
3057       Thread* self = Thread::Current();
3058       Locks::mutator_lock_->AssertSharedHeld(self);
3059       size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_;
3060       if (minor_fault_initialized_) {
3061         ConcurrentlyProcessMovingPage<kMinorFaultMode>(
3062             fault_page, nullptr, nr_moving_space_used_pages);
3063       } else {
3064         ConcurrentlyProcessMovingPage<kCopyMode>(
3065             fault_page, self->GetThreadLocalGcBuffer(), nr_moving_space_used_pages);
3066       }
3067       return true;
3068     } else {
3069       // Find the linear-alloc space containing fault-addr
3070       for (auto& data : linear_alloc_spaces_data_) {
3071         if (data.begin_ <= fault_page && data.end_ > fault_page) {
3072           if (minor_fault_initialized_) {
3073             ConcurrentlyProcessLinearAllocPage<kMinorFaultMode>(fault_page, false);
3074           } else {
3075             ConcurrentlyProcessLinearAllocPage<kCopyMode>(fault_page, false);
3076           }
3077           return true;
3078         }
3079       }
3080       // Fault address doesn't belong to either moving-space or linear-alloc.
3081       return false;
3082     }
3083   } else {
3084     // We may spuriously get SIGBUS fault, which was initiated before the
3085     // compaction was finished, but ends up here. In that case, if the fault
3086     // address is valid then consider it handled.
3087     return bump_pointer_space_->HasAddress(reinterpret_cast<mirror::Object*>(fault_page)) ||
3088            linear_alloc_spaces_data_.end() !=
3089                std::find_if(linear_alloc_spaces_data_.begin(),
3090                             linear_alloc_spaces_data_.end(),
3091                             [fault_page](const LinearAllocSpaceData& data) {
3092                               return data.begin_ <= fault_page && data.end_ > fault_page;
3093                             });
3094   }
3095 }
3096 
BackOff(uint32_t i)3097 static void BackOff(uint32_t i) {
3098   static constexpr uint32_t kYieldMax = 5;
3099   // TODO: Consider adding x86 PAUSE and/or ARM YIELD here.
3100   if (i <= kYieldMax) {
3101     sched_yield();
3102   } else {
3103     // nanosleep is not in the async-signal-safe list, but bionic implements it
3104     // with a pure system call, so it should be fine.
3105     NanoSleep(10000ull * (i - kYieldMax));
3106   }
3107 }
3108 
3109 template <int kMode>
ConcurrentlyProcessMovingPage(uint8_t * fault_page,uint8_t * buf,size_t nr_moving_space_used_pages)3110 void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page,
3111                                                 uint8_t* buf,
3112                                                 size_t nr_moving_space_used_pages) {
3113   class ScopedInProgressCount {
3114    public:
3115     explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) {
3116       collector_->compaction_in_progress_count_.fetch_add(1, std::memory_order_relaxed);
3117     }
3118 
3119     ~ScopedInProgressCount() {
3120       collector_->compaction_in_progress_count_.fetch_sub(1, std::memory_order_relaxed);
3121     }
3122 
3123    private:
3124     MarkCompact* collector_;
3125   };
3126 
3127   uint8_t* unused_space_begin =
3128       bump_pointer_space_->Begin() + nr_moving_space_used_pages * kPageSize;
3129   DCHECK(IsAligned<kPageSize>(unused_space_begin));
3130   DCHECK(kMode == kCopyMode || fault_page < unused_space_begin);
3131   if (kMode == kCopyMode && fault_page >= unused_space_begin) {
3132     // There is a race which allows more than one thread to install a
3133     // zero-page. But we can tolerate that. So absorb the EEXIST returned by
3134     // the ioctl and move on.
3135     ZeropageIoctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/true);
3136     return;
3137   }
3138   size_t page_idx = (fault_page - bump_pointer_space_->Begin()) / kPageSize;
3139   mirror::Object* first_obj = first_objs_moving_space_[page_idx].AsMirrorPtr();
3140   if (first_obj == nullptr) {
3141     // We should never have a case where two workers are trying to install a
3142     // zeropage in this range as we synchronize using moving_pages_status_[page_idx].
3143     PageState expected_state = PageState::kUnprocessed;
3144     if (moving_pages_status_[page_idx].compare_exchange_strong(
3145             expected_state, PageState::kProcessedAndMapping, std::memory_order_relaxed)) {
3146       // Note: ioctl acts as an acquire fence.
3147       ZeropageIoctl(fault_page, /*tolerate_eexist=*/false, /*tolerate_enoent=*/true);
3148     } else {
3149       DCHECK_EQ(expected_state, PageState::kProcessedAndMapping);
3150     }
3151     return;
3152   }
3153 
3154   PageState state = moving_pages_status_[page_idx].load(
3155       use_uffd_sigbus_ ? std::memory_order_acquire : std::memory_order_relaxed);
3156   uint32_t backoff_count = 0;
3157   while (true) {
3158     switch (state) {
3159       case PageState::kUnprocessed: {
3160         // The increment to the in-progress counter must be done before updating
3161         // the page's state. Otherwise, we will end up leaving a window wherein
3162         // the GC-thread could observe that no worker is working on compaction
3163         // and could end up unregistering the moving space from userfaultfd.
3164         ScopedInProgressCount spc(this);
3165         // Acquire order to ensure we don't start writing to shadow map, which is
3166         // shared, before the CAS is successful. Release order to ensure that the
3167         // increment to moving_compactions_in_progress above is not re-ordered
3168         // after the CAS.
3169         if (moving_pages_status_[page_idx].compare_exchange_strong(
3170                 state, PageState::kMutatorProcessing, std::memory_order_acq_rel)) {
3171           if (kMode == kMinorFaultMode) {
3172             DCHECK_EQ(buf, nullptr);
3173             buf = shadow_to_space_map_.Begin() + page_idx * kPageSize;
3174           } else if (UNLIKELY(buf == nullptr)) {
3175             DCHECK_EQ(kMode, kCopyMode);
3176             uint16_t idx = compaction_buffer_counter_.fetch_add(1, std::memory_order_relaxed);
3177             // The buffer-map is one page bigger as the first buffer is used by GC-thread.
3178             CHECK_LE(idx, kMutatorCompactionBufferCount);
3179             buf = compaction_buffers_map_.Begin() + idx * kPageSize;
3180             DCHECK(compaction_buffers_map_.HasAddress(buf));
3181             Thread::Current()->SetThreadLocalGcBuffer(buf);
3182           }
3183 
3184           if (fault_page < post_compact_end_) {
3185             // The page has to be compacted.
3186             CompactPage(
3187                 first_obj, pre_compact_offset_moving_space_[page_idx], buf, kMode == kCopyMode);
3188           } else {
3189             DCHECK_NE(first_obj, nullptr);
3190             DCHECK_GT(pre_compact_offset_moving_space_[page_idx], 0u);
3191             uint8_t* pre_compact_page = black_allocations_begin_ + (fault_page - post_compact_end_);
3192             DCHECK(IsAligned<kPageSize>(pre_compact_page));
3193             SlideBlackPage(first_obj, page_idx, pre_compact_page, buf, kMode == kCopyMode);
3194           }
3195           // Nobody else would simultaneously modify this page's state so an
3196           // atomic store is sufficient. Use 'release' order to guarantee that
3197           // loads/stores to the page are finished before this store.
3198           moving_pages_status_[page_idx].store(PageState::kProcessedAndMapping,
3199                                                std::memory_order_release);
3200           if (kMode == kCopyMode) {
3201             CopyIoctl(fault_page, buf);
3202             if (use_uffd_sigbus_) {
3203               // Store is sufficient as no other thread modifies the status at this stage.
3204               moving_pages_status_[page_idx].store(PageState::kProcessedAndMapped,
3205                                                    std::memory_order_release);
3206             }
3207             return;
3208           } else {
3209             break;
3210           }
3211         }
3212       }
3213         continue;
3214       case PageState::kProcessing:
3215         DCHECK_EQ(kMode, kMinorFaultMode);
3216         if (moving_pages_status_[page_idx].compare_exchange_strong(
3217                 state, PageState::kProcessingAndMapping, std::memory_order_relaxed) &&
3218             !use_uffd_sigbus_) {
3219           // Somebody else took or will take care of finishing the compaction and
3220           // then mapping the page.
3221           return;
3222         }
3223         continue;
3224       case PageState::kProcessed:
3225         // The page is processed but not mapped. We should map it.
3226         break;
3227       case PageState::kProcessingAndMapping:
3228       case PageState::kMutatorProcessing:
3229       case PageState::kProcessedAndMapping:
3230         if (use_uffd_sigbus_) {
3231           // Wait for the page to be mapped before returning.
3232           BackOff(backoff_count++);
3233           state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3234           continue;
3235         }
3236         return;
3237       case PageState::kProcessedAndMapped:
3238         // Somebody else took care of the page.
3239         return;
3240     }
3241     break;
3242   }
3243 
3244   DCHECK_EQ(kMode, kMinorFaultMode);
3245   if (state == PageState::kUnprocessed) {
3246     MapProcessedPages</*kFirstPageMapping=*/true>(
3247         fault_page, moving_pages_status_, page_idx, nr_moving_space_used_pages);
3248   } else {
3249     DCHECK_EQ(state, PageState::kProcessed);
3250     MapProcessedPages</*kFirstPageMapping=*/false>(
3251         fault_page, moving_pages_status_, page_idx, nr_moving_space_used_pages);
3252   }
3253 }
3254 
MapUpdatedLinearAllocPage(uint8_t * page,uint8_t * shadow_page,Atomic<PageState> & state,bool page_touched)3255 void MarkCompact::MapUpdatedLinearAllocPage(uint8_t* page,
3256                                             uint8_t* shadow_page,
3257                                             Atomic<PageState>& state,
3258                                             bool page_touched) {
3259   DCHECK(!minor_fault_initialized_);
3260   if (page_touched) {
3261     CopyIoctl(page, shadow_page);
3262   } else {
3263     // If the page wasn't touched, then it means it is empty and
3264     // is most likely not present on the shadow-side. Furthermore,
3265     // since the shadow is also userfaultfd registered doing copy
3266     // ioctl fail as the copy-from-user in the kernel will cause
3267     // userfault. Instead, just map a zeropage, which is not only
3268     // correct but also efficient as it avoids unnecessary memcpy
3269     // in the kernel.
3270     ZeropageIoctl(page, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
3271   }
3272   if (use_uffd_sigbus_) {
3273     // Store is sufficient as no other thread can modify the
3274     // status of this page at this point.
3275     state.store(PageState::kProcessedAndMapped, std::memory_order_release);
3276   }
3277 }
3278 
3279 template <int kMode>
ConcurrentlyProcessLinearAllocPage(uint8_t * fault_page,bool is_minor_fault)3280 void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool is_minor_fault) {
3281   DCHECK(!is_minor_fault || kMode == kMinorFaultMode);
3282   auto arena_iter = linear_alloc_arenas_.end();
3283   {
3284     TrackedArena temp_arena(fault_page);
3285     arena_iter = linear_alloc_arenas_.upper_bound(&temp_arena);
3286     arena_iter = arena_iter != linear_alloc_arenas_.begin() ? std::prev(arena_iter)
3287                                                             : linear_alloc_arenas_.end();
3288   }
3289   if (arena_iter == linear_alloc_arenas_.end() || arena_iter->second <= fault_page) {
3290     // Fault page isn't in any of the arenas that existed before we started
3291     // compaction. So map zeropage and return.
3292     ZeropageIoctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/false);
3293   } else {
3294     // fault_page should always belong to some arena.
3295     DCHECK(arena_iter != linear_alloc_arenas_.end())
3296         << "fault_page:" << static_cast<void*>(fault_page) << "is_minor_fault:" << is_minor_fault;
3297     // Find the linear-alloc space containing fault-page
3298     LinearAllocSpaceData* space_data = nullptr;
3299     for (auto& data : linear_alloc_spaces_data_) {
3300       if (data.begin_ <= fault_page && fault_page < data.end_) {
3301         space_data = &data;
3302         break;
3303       }
3304     }
3305     DCHECK_NE(space_data, nullptr);
3306     ptrdiff_t diff = space_data->shadow_.Begin() - space_data->begin_;
3307     size_t page_idx = (fault_page - space_data->begin_) / kPageSize;
3308     Atomic<PageState>* state_arr =
3309         reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin());
3310     PageState state = state_arr[page_idx].load(use_uffd_sigbus_ ? std::memory_order_acquire :
3311                                                                   std::memory_order_relaxed);
3312     uint32_t backoff_count = 0;
3313     while (true) {
3314       switch (state) {
3315         case PageState::kUnprocessed: {
3316           // Acquire order to ensure we don't start writing to shadow map, which is
3317           // shared, before the CAS is successful.
3318           if (state_arr[page_idx].compare_exchange_strong(
3319                   state, PageState::kProcessingAndMapping, std::memory_order_acquire)) {
3320             if (kMode == kCopyMode || is_minor_fault) {
3321               uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page);
3322               DCHECK_NE(first_obj, nullptr);
3323               LinearAllocPageUpdater updater(this);
3324               updater(fault_page + diff, first_obj + diff);
3325               if (kMode == kCopyMode) {
3326                 MapUpdatedLinearAllocPage(fault_page,
3327                                           fault_page + diff,
3328                                           state_arr[page_idx],
3329                                           updater.WasLastPageTouched());
3330                 return;
3331               }
3332             } else {
3333               // Don't touch the page in this case (there is no reason to do so
3334               // anyways) as it would mean reading from first_obj, which could be on
3335               // another missing page and hence may cause this thread to block, leading
3336               // to deadlocks.
3337               // Force read the page if it is missing so that a zeropage gets mapped on
3338               // the shadow map and then CONTINUE ioctl will map it on linear-alloc.
3339               ForceRead(fault_page + diff);
3340             }
3341             MapProcessedPages</*kFirstPageMapping=*/true>(
3342                 fault_page, state_arr, page_idx, space_data->page_status_map_.Size());
3343             return;
3344           }
3345         }
3346           continue;
3347         case PageState::kProcessing:
3348           DCHECK_EQ(kMode, kMinorFaultMode);
3349           if (state_arr[page_idx].compare_exchange_strong(
3350                   state, PageState::kProcessingAndMapping, std::memory_order_relaxed) &&
3351               !use_uffd_sigbus_) {
3352             // Somebody else took or will take care of finishing the updates and
3353             // then mapping the page.
3354             return;
3355           }
3356           continue;
3357         case PageState::kProcessed:
3358           // The page is processed but not mapped. We should map it.
3359           break;
3360         case PageState::kMutatorProcessing:
3361           UNREACHABLE();
3362         case PageState::kProcessingAndMapping:
3363         case PageState::kProcessedAndMapping:
3364           if (use_uffd_sigbus_) {
3365             // Wait for the page to be mapped before returning.
3366             BackOff(backoff_count++);
3367             state = state_arr[page_idx].load(std::memory_order_acquire);
3368             continue;
3369           }
3370           return;
3371         case PageState::kProcessedAndMapped:
3372           // Somebody else took care of the page.
3373           return;
3374       }
3375       break;
3376     }
3377 
3378     DCHECK_EQ(kMode, kMinorFaultMode);
3379     DCHECK_EQ(state, PageState::kProcessed);
3380     if (!is_minor_fault) {
3381       // Force read the page if it is missing so that a zeropage gets mapped on
3382       // the shadow map and then CONTINUE ioctl will map it on linear-alloc.
3383       ForceRead(fault_page + diff);
3384     }
3385     MapProcessedPages</*kFirstPageMapping=*/false>(
3386         fault_page, state_arr, page_idx, space_data->page_status_map_.Size());
3387   }
3388 }
3389 
ProcessLinearAlloc()3390 void MarkCompact::ProcessLinearAlloc() {
3391   GcVisitedArenaPool* arena_pool =
3392       static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
3393   for (auto& pair : linear_alloc_arenas_) {
3394     const TrackedArena* arena = pair.first;
3395     size_t arena_size;
3396     uint8_t* arena_begin;
3397     ptrdiff_t diff;
3398     bool others_processing;
3399     {
3400       // Acquire arena-pool's lock so that the arena being worked cannot be
3401       // deallocated at the same time.
3402       std::lock_guard<std::mutex> lock(arena_pool->GetLock());
3403       // If any arenas were freed since compaction pause then skip them from
3404       // visiting.
3405       if (arena_pool->AreArenasFreed() && !arena_pool->FindAllocatedArena(arena)) {
3406         continue;
3407       }
3408       uint8_t* last_byte = pair.second;
3409       DCHECK_ALIGNED(last_byte, kPageSize);
3410       others_processing = false;
3411       arena_begin = arena->Begin();
3412       arena_size = arena->Size();
3413       // Find the linear-alloc space containing the arena
3414       LinearAllocSpaceData* space_data = nullptr;
3415       for (auto& data : linear_alloc_spaces_data_) {
3416         if (data.begin_ <= arena_begin && arena_begin < data.end_) {
3417           space_data = &data;
3418           break;
3419         }
3420       }
3421       DCHECK_NE(space_data, nullptr);
3422       diff = space_data->shadow_.Begin() - space_data->begin_;
3423       auto visitor = [space_data, last_byte, diff, this, &others_processing](
3424                          uint8_t* page_begin,
3425                          uint8_t* first_obj) REQUIRES_SHARED(Locks::mutator_lock_) {
3426         // No need to process pages past last_byte as they already have updated
3427         // gc-roots, if any.
3428         if (page_begin >= last_byte) {
3429           return;
3430         }
3431         LinearAllocPageUpdater updater(this);
3432         size_t page_idx = (page_begin - space_data->begin_) / kPageSize;
3433         DCHECK_LT(page_idx, space_data->page_status_map_.Size());
3434         Atomic<PageState>* state_arr =
3435             reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin());
3436         PageState expected_state = PageState::kUnprocessed;
3437         PageState desired_state =
3438             minor_fault_initialized_ ? PageState::kProcessing : PageState::kProcessingAndMapping;
3439         // Acquire order to ensure that we don't start accessing the shadow page,
3440         // which is shared with other threads, prior to CAS. Also, for same
3441         // reason, we used 'release' order for changing the state to 'processed'.
3442         if (state_arr[page_idx].compare_exchange_strong(
3443                 expected_state, desired_state, std::memory_order_acquire)) {
3444           updater(page_begin + diff, first_obj + diff);
3445           expected_state = PageState::kProcessing;
3446           if (!minor_fault_initialized_) {
3447             MapUpdatedLinearAllocPage(
3448                 page_begin, page_begin + diff, state_arr[page_idx], updater.WasLastPageTouched());
3449           } else if (!state_arr[page_idx].compare_exchange_strong(
3450                          expected_state, PageState::kProcessed, std::memory_order_release)) {
3451             DCHECK_EQ(expected_state, PageState::kProcessingAndMapping);
3452             // Force read in case the page was missing and updater didn't touch it
3453             // as there was nothing to do. This will ensure that a zeropage is
3454             // faulted on the shadow map.
3455             ForceRead(page_begin + diff);
3456             MapProcessedPages</*kFirstPageMapping=*/true>(
3457                 page_begin, state_arr, page_idx, space_data->page_status_map_.Size());
3458           }
3459         } else {
3460           others_processing = true;
3461         }
3462       };
3463 
3464       arena->VisitRoots(visitor);
3465     }
3466     // If we are not in minor-fault mode and if no other thread was found to be
3467     // processing any pages in this arena, then we can madvise the shadow size.
3468     // Otherwise, we will double the memory use for linear-alloc.
3469     if (!minor_fault_initialized_ && !others_processing) {
3470       ZeroAndReleasePages(arena_begin + diff, arena_size);
3471     }
3472   }
3473 }
3474 
RegisterUffd(void * addr,size_t size,int mode)3475 void MarkCompact::RegisterUffd(void* addr, size_t size, int mode) {
3476   DCHECK(IsValidFd(uffd_));
3477   struct uffdio_register uffd_register;
3478   uffd_register.range.start = reinterpret_cast<uintptr_t>(addr);
3479   uffd_register.range.len = size;
3480   uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
3481   if (mode == kMinorFaultMode) {
3482     uffd_register.mode |= UFFDIO_REGISTER_MODE_MINOR;
3483   }
3484   CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0)
3485       << "ioctl_userfaultfd: register failed: " << strerror(errno)
3486       << ". start:" << static_cast<void*>(addr) << " len:" << PrettySize(size);
3487 }
3488 
UnregisterUffd(uint8_t * start,size_t len)3489 void MarkCompact::UnregisterUffd(uint8_t* start, size_t len) {
3490   DCHECK(IsValidFd(uffd_));
3491   struct uffdio_range range;
3492   range.start = reinterpret_cast<uintptr_t>(start);
3493   range.len = len;
3494   CHECK_EQ(ioctl(uffd_, UFFDIO_UNREGISTER, &range), 0)
3495       << "ioctl_userfaultfd: unregister failed: " << strerror(errno)
3496       << ". addr:" << static_cast<void*>(start) << " len:" << PrettySize(len);
3497   // Due to an oversight in the kernel implementation of 'unregister', the
3498   // waiting threads are woken up only for copy uffds. Therefore, for now, we
3499   // have to explicitly wake up the threads in minor-fault case.
3500   // TODO: The fix in the kernel is being worked on. Once the kernel version
3501   // containing the fix is known, make it conditional on that as well.
3502   if (minor_fault_initialized_) {
3503     CHECK_EQ(ioctl(uffd_, UFFDIO_WAKE, &range), 0)
3504         << "ioctl_userfaultfd: wake failed: " << strerror(errno)
3505         << ". addr:" << static_cast<void*>(start) << " len:" << PrettySize(len);
3506   }
3507 }
3508 
CompactionPhase()3509 void MarkCompact::CompactionPhase() {
3510   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3511   {
3512     int32_t freed_bytes = black_objs_slide_diff_;
3513     bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
3514     RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
3515   }
3516 
3517   if (CanCompactMovingSpaceWithMinorFault()) {
3518     CompactMovingSpace<kMinorFaultMode>(/*page=*/nullptr);
3519   } else {
3520     CompactMovingSpace<kCopyMode>(compaction_buffers_map_.Begin());
3521   }
3522 
3523   // Make sure no mutator is reading from the from-space before unregistering
3524   // userfaultfd from moving-space and then zapping from-space. The mutator
3525   // and GC may race to set a page state to processing or further along. The two
3526   // attempts are ordered. If the collector wins, then the mutator will see that
3527   // and not access the from-space page. If the muator wins, then the
3528   // compaction_in_progress_count_ increment by the mutator happens-before the test
3529   // here, and we will not see a zero value until the mutator has completed.
3530   for (uint32_t i = 0; compaction_in_progress_count_.load(std::memory_order_acquire) > 0; i++) {
3531     BackOff(i);
3532   }
3533 
3534   size_t moving_space_size = bump_pointer_space_->Capacity();
3535   UnregisterUffd(bump_pointer_space_->Begin(),
3536                  minor_fault_initialized_ ?
3537                      (moving_first_objs_count_ + black_page_count_) * kPageSize :
3538                      moving_space_size);
3539 
3540   // Release all of the memory taken by moving-space's from-map
3541   if (minor_fault_initialized_) {
3542     if (IsValidFd(moving_from_space_fd_)) {
3543       // A strange behavior is observed wherein between GC cycles the from-space'
3544       // first page is accessed. But the memfd that is mapped on from-space, is
3545       // used on to-space in next GC cycle, causing issues with userfaultfd as the
3546       // page isn't missing. A possible reason for this could be prefetches. The
3547       // mprotect ensures that such accesses don't succeed.
3548       int ret = mprotect(from_space_begin_, moving_space_size, PROT_NONE);
3549       CHECK_EQ(ret, 0) << "mprotect(PROT_NONE) for from-space failed: " << strerror(errno);
3550       // madvise(MADV_REMOVE) needs PROT_WRITE. Use fallocate() instead, which
3551       // does the same thing.
3552       ret = fallocate(moving_from_space_fd_,
3553                       FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
3554                       /*offset=*/0,
3555                       moving_space_size);
3556       CHECK_EQ(ret, 0) << "fallocate for from-space failed: " << strerror(errno);
3557     } else {
3558       // We don't have a valid fd, so use madvise(MADV_REMOVE) instead. mprotect
3559       // is not required in this case as we create fresh
3560       // MAP_SHARED+MAP_ANONYMOUS mapping in each GC cycle.
3561       int ret = madvise(from_space_begin_, moving_space_size, MADV_REMOVE);
3562       CHECK_EQ(ret, 0) << "madvise(MADV_REMOVE) failed for from-space map:" << strerror(errno);
3563     }
3564   } else {
3565     from_space_map_.MadviseDontNeedAndZero();
3566   }
3567   // mprotect(PROT_NONE) all maps except to-space in debug-mode to catch any unexpected accesses.
3568   if (shadow_to_space_map_.IsValid()) {
3569     DCHECK_EQ(mprotect(shadow_to_space_map_.Begin(), shadow_to_space_map_.Size(), PROT_NONE), 0)
3570         << "mprotect(PROT_NONE) for shadow-map failed:" << strerror(errno);
3571   }
3572   if (!IsValidFd(moving_from_space_fd_)) {
3573     // The other case is already mprotected above.
3574     DCHECK_EQ(mprotect(from_space_begin_, moving_space_size, PROT_NONE), 0)
3575         << "mprotect(PROT_NONE) for from-space failed: " << strerror(errno);
3576   }
3577 
3578   ProcessLinearAlloc();
3579 
3580   if (use_uffd_sigbus_) {
3581     // Set compaction-done bit so that no new mutator threads start compaction
3582     // process in the SIGBUS handler.
3583     SigbusCounterType count = sigbus_in_progress_count_.fetch_or(kSigbusCounterCompactionDoneMask,
3584                                                                  std::memory_order_acq_rel);
3585     // Wait for SIGBUS handlers already in play.
3586     for (uint32_t i = 0; count > 0; i++) {
3587       BackOff(i);
3588       count = sigbus_in_progress_count_.load(std::memory_order_acquire);
3589       count &= ~kSigbusCounterCompactionDoneMask;
3590     }
3591   } else {
3592     DCHECK(IsAligned<kPageSize>(conc_compaction_termination_page_));
3593     // We will only iterate once if gKernelHasFaultRetry is true.
3594     do {
3595       // madvise the page so that we can get userfaults on it.
3596       ZeroAndReleasePages(conc_compaction_termination_page_, kPageSize);
3597       // The following load triggers 'special' userfaults. When received by the
3598       // thread-pool workers, they will exit out of the compaction task. This fault
3599       // happens because we madvised the page.
3600       ForceRead(conc_compaction_termination_page_);
3601     } while (thread_pool_counter_ > 0);
3602   }
3603   // Unregister linear-alloc spaces
3604   for (auto& data : linear_alloc_spaces_data_) {
3605     DCHECK_EQ(data.end_ - data.begin_, static_cast<ssize_t>(data.shadow_.Size()));
3606     UnregisterUffd(data.begin_, data.shadow_.Size());
3607     // madvise linear-allocs's page-status array
3608     data.page_status_map_.MadviseDontNeedAndZero();
3609     // Madvise the entire linear-alloc space's shadow. In copy-mode it gets rid
3610     // of the pages which are still mapped. In minor-fault mode this unmaps all
3611     // pages, which is good in reducing the mremap (done in STW pause) time in
3612     // next GC cycle.
3613     data.shadow_.MadviseDontNeedAndZero();
3614     if (minor_fault_initialized_) {
3615       DCHECK_EQ(mprotect(data.shadow_.Begin(), data.shadow_.Size(), PROT_NONE), 0)
3616           << "mprotect failed: " << strerror(errno);
3617     }
3618   }
3619 
3620   if (!use_uffd_sigbus_) {
3621     heap_->GetThreadPool()->StopWorkers(thread_running_gc_);
3622   }
3623 }
3624 
3625 template <size_t kBufferSize>
3626 class MarkCompact::ThreadRootsVisitor : public RootVisitor {
3627  public:
ThreadRootsVisitor(MarkCompact * mark_compact,Thread * const self)3628   explicit ThreadRootsVisitor(MarkCompact* mark_compact, Thread* const self)
3629         : mark_compact_(mark_compact), self_(self) {}
3630 
~ThreadRootsVisitor()3631   ~ThreadRootsVisitor() {
3632     Flush();
3633   }
3634 
VisitRoots(mirror::Object *** roots,size_t count,const RootInfo & info ATTRIBUTE_UNUSED)3635   void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED)
3636       override REQUIRES_SHARED(Locks::mutator_lock_)
3637       REQUIRES(Locks::heap_bitmap_lock_) {
3638     for (size_t i = 0; i < count; i++) {
3639       mirror::Object* obj = *roots[i];
3640       if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
3641         Push(obj);
3642       }
3643     }
3644   }
3645 
VisitRoots(mirror::CompressedReference<mirror::Object> ** roots,size_t count,const RootInfo & info ATTRIBUTE_UNUSED)3646   void VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
3647                   size_t count,
3648                   const RootInfo& info ATTRIBUTE_UNUSED)
3649       override REQUIRES_SHARED(Locks::mutator_lock_)
3650       REQUIRES(Locks::heap_bitmap_lock_) {
3651     for (size_t i = 0; i < count; i++) {
3652       mirror::Object* obj = roots[i]->AsMirrorPtr();
3653       if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
3654         Push(obj);
3655       }
3656     }
3657   }
3658 
3659  private:
Flush()3660   void Flush() REQUIRES_SHARED(Locks::mutator_lock_)
3661                REQUIRES(Locks::heap_bitmap_lock_) {
3662     StackReference<mirror::Object>* start;
3663     StackReference<mirror::Object>* end;
3664     {
3665       MutexLock mu(self_, mark_compact_->lock_);
3666       // Loop here because even after expanding once it may not be sufficient to
3667       // accommodate all references. It's almost impossible, but there is no harm
3668       // in implementing it this way.
3669       while (!mark_compact_->mark_stack_->BumpBack(idx_, &start, &end)) {
3670         mark_compact_->ExpandMarkStack();
3671       }
3672     }
3673     while (idx_ > 0) {
3674       *start++ = roots_[--idx_];
3675     }
3676     DCHECK_EQ(start, end);
3677   }
3678 
Push(mirror::Object * obj)3679   void Push(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_)
3680                                  REQUIRES(Locks::heap_bitmap_lock_) {
3681     if (UNLIKELY(idx_ >= kBufferSize)) {
3682       Flush();
3683     }
3684     roots_[idx_++].Assign(obj);
3685   }
3686 
3687   StackReference<mirror::Object> roots_[kBufferSize];
3688   size_t idx_ = 0;
3689   MarkCompact* const mark_compact_;
3690   Thread* const self_;
3691 };
3692 
3693 class MarkCompact::CheckpointMarkThreadRoots : public Closure {
3694  public:
CheckpointMarkThreadRoots(MarkCompact * mark_compact)3695   explicit CheckpointMarkThreadRoots(MarkCompact* mark_compact) : mark_compact_(mark_compact) {}
3696 
Run(Thread * thread)3697   void Run(Thread* thread) override NO_THREAD_SAFETY_ANALYSIS {
3698     ScopedTrace trace("Marking thread roots");
3699     // Note: self is not necessarily equal to thread since thread may be
3700     // suspended.
3701     Thread* const self = Thread::Current();
3702     CHECK(thread == self
3703           || thread->IsSuspended()
3704           || thread->GetState() == ThreadState::kWaitingPerformingGc)
3705         << thread->GetState() << " thread " << thread << " self " << self;
3706     {
3707       ThreadRootsVisitor</*kBufferSize*/ 20> visitor(mark_compact_, self);
3708       thread->VisitRoots(&visitor, kVisitRootFlagAllRoots);
3709     }
3710     // Clear page-buffer to prepare for compaction phase.
3711     thread->SetThreadLocalGcBuffer(nullptr);
3712 
3713     // If thread is a running mutator, then act on behalf of the garbage
3714     // collector. See the code in ThreadList::RunCheckpoint.
3715     mark_compact_->GetBarrier().Pass(self);
3716   }
3717 
3718  private:
3719   MarkCompact* const mark_compact_;
3720 };
3721 
MarkRootsCheckpoint(Thread * self,Runtime * runtime)3722 void MarkCompact::MarkRootsCheckpoint(Thread* self, Runtime* runtime) {
3723   // We revote TLABs later during paused round of marking.
3724   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3725   CheckpointMarkThreadRoots check_point(this);
3726   ThreadList* thread_list = runtime->GetThreadList();
3727   gc_barrier_.Init(self, 0);
3728   // Request the check point is run on all threads returning a count of the threads that must
3729   // run through the barrier including self.
3730   size_t barrier_count = thread_list->RunCheckpoint(&check_point);
3731   // Release locks then wait for all mutator threads to pass the barrier.
3732   // If there are no threads to wait which implys that all the checkpoint functions are finished,
3733   // then no need to release locks.
3734   if (barrier_count == 0) {
3735     return;
3736   }
3737   Locks::heap_bitmap_lock_->ExclusiveUnlock(self);
3738   Locks::mutator_lock_->SharedUnlock(self);
3739   {
3740     ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun);
3741     gc_barrier_.Increment(self, barrier_count);
3742   }
3743   Locks::mutator_lock_->SharedLock(self);
3744   Locks::heap_bitmap_lock_->ExclusiveLock(self);
3745 }
3746 
MarkNonThreadRoots(Runtime * runtime)3747 void MarkCompact::MarkNonThreadRoots(Runtime* runtime) {
3748   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3749   runtime->VisitNonThreadRoots(this);
3750 }
3751 
MarkConcurrentRoots(VisitRootFlags flags,Runtime * runtime)3752 void MarkCompact::MarkConcurrentRoots(VisitRootFlags flags, Runtime* runtime) {
3753   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3754   runtime->VisitConcurrentRoots(this, flags);
3755 }
3756 
RevokeAllThreadLocalBuffers()3757 void MarkCompact::RevokeAllThreadLocalBuffers() {
3758   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3759   bump_pointer_space_->RevokeAllThreadLocalBuffers();
3760 }
3761 
3762 class MarkCompact::ScanObjectVisitor {
3763  public:
ScanObjectVisitor(MarkCompact * const mark_compact)3764   explicit ScanObjectVisitor(MarkCompact* const mark_compact) ALWAYS_INLINE
3765       : mark_compact_(mark_compact) {}
3766 
operator ()(ObjPtr<mirror::Object> obj) const3767   void operator()(ObjPtr<mirror::Object> obj) const
3768       ALWAYS_INLINE
3769       REQUIRES(Locks::heap_bitmap_lock_)
3770       REQUIRES_SHARED(Locks::mutator_lock_) {
3771     mark_compact_->ScanObject</*kUpdateLiveWords*/ false>(obj.Ptr());
3772   }
3773 
3774  private:
3775   MarkCompact* const mark_compact_;
3776 };
3777 
UpdateAndMarkModUnion()3778 void MarkCompact::UpdateAndMarkModUnion() {
3779   accounting::CardTable* const card_table = heap_->GetCardTable();
3780   for (const auto& space : immune_spaces_.GetSpaces()) {
3781     const char* name = space->IsZygoteSpace()
3782         ? "UpdateAndMarkZygoteModUnionTable"
3783         : "UpdateAndMarkImageModUnionTable";
3784     DCHECK(space->IsZygoteSpace() || space->IsImageSpace()) << *space;
3785     TimingLogger::ScopedTiming t(name, GetTimings());
3786     accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
3787     if (table != nullptr) {
3788       // UpdateAndMarkReferences() doesn't visit Reference-type objects. But
3789       // that's fine because these objects are immutable enough (referent can
3790       // only be cleared) and hence the only referents they can have are intra-space.
3791       table->UpdateAndMarkReferences(this);
3792     } else {
3793       // No mod-union table, scan all dirty/aged cards in the corresponding
3794       // card-table. This can only occur for app images.
3795       card_table->Scan</*kClearCard*/ false>(space->GetMarkBitmap(),
3796                                              space->Begin(),
3797                                              space->End(),
3798                                              ScanObjectVisitor(this),
3799                                              gc::accounting::CardTable::kCardAged);
3800     }
3801   }
3802 }
3803 
MarkReachableObjects()3804 void MarkCompact::MarkReachableObjects() {
3805   UpdateAndMarkModUnion();
3806   // Recursively mark all the non-image bits set in the mark bitmap.
3807   ProcessMarkStack();
3808 }
3809 
3810 class MarkCompact::CardModifiedVisitor {
3811  public:
CardModifiedVisitor(MarkCompact * const mark_compact,accounting::ContinuousSpaceBitmap * const bitmap,accounting::CardTable * const card_table)3812   explicit CardModifiedVisitor(MarkCompact* const mark_compact,
3813                                accounting::ContinuousSpaceBitmap* const bitmap,
3814                                accounting::CardTable* const card_table)
3815       : visitor_(mark_compact), bitmap_(bitmap), card_table_(card_table) {}
3816 
operator ()(uint8_t * card,uint8_t expected_value,uint8_t new_value ATTRIBUTE_UNUSED) const3817   void operator()(uint8_t* card,
3818                   uint8_t expected_value,
3819                   uint8_t new_value ATTRIBUTE_UNUSED) const {
3820     if (expected_value == accounting::CardTable::kCardDirty) {
3821       uintptr_t start = reinterpret_cast<uintptr_t>(card_table_->AddrFromCard(card));
3822       bitmap_->VisitMarkedRange(start, start + accounting::CardTable::kCardSize, visitor_);
3823     }
3824   }
3825 
3826  private:
3827   ScanObjectVisitor visitor_;
3828   accounting::ContinuousSpaceBitmap* bitmap_;
3829   accounting::CardTable* const card_table_;
3830 };
3831 
ScanDirtyObjects(bool paused,uint8_t minimum_age)3832 void MarkCompact::ScanDirtyObjects(bool paused, uint8_t minimum_age) {
3833   accounting::CardTable* card_table = heap_->GetCardTable();
3834   for (const auto& space : heap_->GetContinuousSpaces()) {
3835     const char* name = nullptr;
3836     switch (space->GetGcRetentionPolicy()) {
3837     case space::kGcRetentionPolicyNeverCollect:
3838       name = paused ? "(Paused)ScanGrayImmuneSpaceObjects" : "ScanGrayImmuneSpaceObjects";
3839       break;
3840     case space::kGcRetentionPolicyFullCollect:
3841       name = paused ? "(Paused)ScanGrayZygoteSpaceObjects" : "ScanGrayZygoteSpaceObjects";
3842       break;
3843     case space::kGcRetentionPolicyAlwaysCollect:
3844       name = paused ? "(Paused)ScanGrayAllocSpaceObjects" : "ScanGrayAllocSpaceObjects";
3845       break;
3846     default:
3847       LOG(FATAL) << "Unreachable";
3848       UNREACHABLE();
3849     }
3850     TimingLogger::ScopedTiming t(name, GetTimings());
3851     ScanObjectVisitor visitor(this);
3852     const bool is_immune_space = space->IsZygoteSpace() || space->IsImageSpace();
3853     if (paused) {
3854       DCHECK_EQ(minimum_age, gc::accounting::CardTable::kCardDirty);
3855       // We can clear the card-table for any non-immune space.
3856       if (is_immune_space) {
3857         card_table->Scan</*kClearCard*/false>(space->GetMarkBitmap(),
3858                                               space->Begin(),
3859                                               space->End(),
3860                                               visitor,
3861                                               minimum_age);
3862       } else {
3863         card_table->Scan</*kClearCard*/true>(space->GetMarkBitmap(),
3864                                              space->Begin(),
3865                                              space->End(),
3866                                              visitor,
3867                                              minimum_age);
3868       }
3869     } else {
3870       DCHECK_EQ(minimum_age, gc::accounting::CardTable::kCardAged);
3871       accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
3872       if (table) {
3873         table->ProcessCards();
3874         card_table->Scan</*kClearCard*/false>(space->GetMarkBitmap(),
3875                                               space->Begin(),
3876                                               space->End(),
3877                                               visitor,
3878                                               minimum_age);
3879       } else {
3880         CardModifiedVisitor card_modified_visitor(this, space->GetMarkBitmap(), card_table);
3881         // For the alloc spaces we should age the dirty cards and clear the rest.
3882         // For image and zygote-space without mod-union-table, age the dirty
3883         // cards but keep the already aged cards unchanged.
3884         // In either case, visit the objects on the cards that were changed from
3885         // dirty to aged.
3886         if (is_immune_space) {
3887           card_table->ModifyCardsAtomic(space->Begin(),
3888                                         space->End(),
3889                                         [](uint8_t card) {
3890                                           return (card == gc::accounting::CardTable::kCardClean)
3891                                                   ? card
3892                                                   : gc::accounting::CardTable::kCardAged;
3893                                         },
3894                                         card_modified_visitor);
3895         } else {
3896           card_table->ModifyCardsAtomic(space->Begin(),
3897                                         space->End(),
3898                                         AgeCardVisitor(),
3899                                         card_modified_visitor);
3900         }
3901       }
3902     }
3903   }
3904 }
3905 
RecursiveMarkDirtyObjects(bool paused,uint8_t minimum_age)3906 void MarkCompact::RecursiveMarkDirtyObjects(bool paused, uint8_t minimum_age) {
3907   ScanDirtyObjects(paused, minimum_age);
3908   ProcessMarkStack();
3909 }
3910 
MarkRoots(VisitRootFlags flags)3911 void MarkCompact::MarkRoots(VisitRootFlags flags) {
3912   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3913   Runtime* runtime = Runtime::Current();
3914   // Make sure that the checkpoint which collects the stack roots is the first
3915   // one capturning GC-roots. As this one is supposed to find the address
3916   // everything allocated after that (during this marking phase) will be
3917   // considered 'marked'.
3918   MarkRootsCheckpoint(thread_running_gc_, runtime);
3919   MarkNonThreadRoots(runtime);
3920   MarkConcurrentRoots(flags, runtime);
3921 }
3922 
PreCleanCards()3923 void MarkCompact::PreCleanCards() {
3924   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3925   CHECK(!Locks::mutator_lock_->IsExclusiveHeld(thread_running_gc_));
3926   MarkRoots(static_cast<VisitRootFlags>(kVisitRootFlagClearRootLog | kVisitRootFlagNewRoots));
3927   RecursiveMarkDirtyObjects(/*paused*/ false, accounting::CardTable::kCardDirty - 1);
3928 }
3929 
3930 // In a concurrent marking algorithm, if we are not using a write/read barrier, as
3931 // in this case, then we need a stop-the-world (STW) round in the end to mark
3932 // objects which were written into concurrently while concurrent marking was
3933 // performed.
3934 // In order to minimize the pause time, we could take one of the two approaches:
3935 // 1. Keep repeating concurrent marking of dirty cards until the time spent goes
3936 // below a threshold.
3937 // 2. Do two rounds concurrently and then attempt a paused one. If we figure
3938 // that it's taking too long, then resume mutators and retry.
3939 //
3940 // Given the non-trivial fixed overhead of running a round (card table and root
3941 // scan), it might be better to go with approach 2.
MarkingPhase()3942 void MarkCompact::MarkingPhase() {
3943   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3944   DCHECK_EQ(thread_running_gc_, Thread::Current());
3945   WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
3946   BindAndResetBitmaps();
3947   MarkZygoteLargeObjects();
3948   MarkRoots(
3949         static_cast<VisitRootFlags>(kVisitRootFlagAllRoots | kVisitRootFlagStartLoggingNewRoots));
3950   MarkReachableObjects();
3951   // Pre-clean dirtied cards to reduce pauses.
3952   PreCleanCards();
3953 
3954   // Setup reference processing and forward soft references once before enabling
3955   // slow path (in MarkingPause)
3956   ReferenceProcessor* rp = GetHeap()->GetReferenceProcessor();
3957   bool clear_soft_references = GetCurrentIteration()->GetClearSoftReferences();
3958   rp->Setup(thread_running_gc_, this, /*concurrent=*/ true, clear_soft_references);
3959   if (!clear_soft_references) {
3960     // Forward as many SoftReferences as possible before inhibiting reference access.
3961     rp->ForwardSoftReferences(GetTimings());
3962   }
3963 }
3964 
3965 class MarkCompact::RefFieldsVisitor {
3966  public:
RefFieldsVisitor(MarkCompact * const mark_compact)3967   ALWAYS_INLINE explicit RefFieldsVisitor(MarkCompact* const mark_compact)
3968     : mark_compact_(mark_compact) {}
3969 
operator ()(mirror::Object * obj,MemberOffset offset,bool is_static ATTRIBUTE_UNUSED) const3970   ALWAYS_INLINE void operator()(mirror::Object* obj,
3971                                 MemberOffset offset,
3972                                 bool is_static ATTRIBUTE_UNUSED) const
3973       REQUIRES(Locks::heap_bitmap_lock_)
3974       REQUIRES_SHARED(Locks::mutator_lock_) {
3975     if (kCheckLocks) {
3976       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
3977       Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
3978     }
3979     mark_compact_->MarkObject(obj->GetFieldObject<mirror::Object>(offset), obj, offset);
3980   }
3981 
operator ()(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref) const3982   void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) const
3983       REQUIRES(Locks::heap_bitmap_lock_)
3984       REQUIRES_SHARED(Locks::mutator_lock_) {
3985     mark_compact_->DelayReferenceReferent(klass, ref);
3986   }
3987 
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const3988   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
3989       REQUIRES(Locks::heap_bitmap_lock_)
3990       REQUIRES_SHARED(Locks::mutator_lock_) {
3991     if (!root->IsNull()) {
3992       VisitRoot(root);
3993     }
3994   }
3995 
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const3996   void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
3997       REQUIRES(Locks::heap_bitmap_lock_)
3998       REQUIRES_SHARED(Locks::mutator_lock_) {
3999     if (kCheckLocks) {
4000       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
4001       Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
4002     }
4003     mark_compact_->MarkObject(root->AsMirrorPtr());
4004   }
4005 
4006  private:
4007   MarkCompact* const mark_compact_;
4008 };
4009 
4010 template <size_t kAlignment>
LiveBytesInBitmapWord(size_t chunk_idx) const4011 size_t MarkCompact::LiveWordsBitmap<kAlignment>::LiveBytesInBitmapWord(size_t chunk_idx) const {
4012   const size_t index = chunk_idx * kBitmapWordsPerVectorWord;
4013   size_t words = 0;
4014   for (uint32_t i = 0; i < kBitmapWordsPerVectorWord; i++) {
4015     words += POPCOUNT(Bitmap::Begin()[index + i]);
4016   }
4017   return words * kAlignment;
4018 }
4019 
UpdateLivenessInfo(mirror::Object * obj,size_t obj_size)4020 void MarkCompact::UpdateLivenessInfo(mirror::Object* obj, size_t obj_size) {
4021   DCHECK(obj != nullptr);
4022   DCHECK_EQ(obj_size, obj->SizeOf<kDefaultVerifyFlags>());
4023   uintptr_t obj_begin = reinterpret_cast<uintptr_t>(obj);
4024   UpdateClassAfterObjectMap(obj);
4025   size_t size = RoundUp(obj_size, kAlignment);
4026   uintptr_t bit_index = live_words_bitmap_->SetLiveWords(obj_begin, size);
4027   size_t chunk_idx = (obj_begin - live_words_bitmap_->Begin()) / kOffsetChunkSize;
4028   // Compute the bit-index within the chunk-info vector word.
4029   bit_index %= kBitsPerVectorWord;
4030   size_t first_chunk_portion = std::min(size, (kBitsPerVectorWord - bit_index) * kAlignment);
4031 
4032   chunk_info_vec_[chunk_idx++] += first_chunk_portion;
4033   DCHECK_LE(first_chunk_portion, size);
4034   for (size -= first_chunk_portion; size > kOffsetChunkSize; size -= kOffsetChunkSize) {
4035     DCHECK_EQ(chunk_info_vec_[chunk_idx], 0u);
4036     chunk_info_vec_[chunk_idx++] = kOffsetChunkSize;
4037   }
4038   chunk_info_vec_[chunk_idx] += size;
4039   freed_objects_--;
4040 }
4041 
4042 template <bool kUpdateLiveWords>
ScanObject(mirror::Object * obj)4043 void MarkCompact::ScanObject(mirror::Object* obj) {
4044   // The size of `obj` is used both here (to update `bytes_scanned_`) and in
4045   // `UpdateLivenessInfo`. As fetching this value can be expensive, do it once
4046   // here and pass that information to `UpdateLivenessInfo`.
4047   size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>();
4048   bytes_scanned_ += obj_size;
4049 
4050   RefFieldsVisitor visitor(this);
4051   DCHECK(IsMarked(obj)) << "Scanning marked object " << obj << "\n" << heap_->DumpSpaces();
4052   if (kUpdateLiveWords && moving_space_bitmap_->HasAddress(obj)) {
4053     UpdateLivenessInfo(obj, obj_size);
4054   }
4055   obj->VisitReferences(visitor, visitor);
4056 }
4057 
4058 // Scan anything that's on the mark stack.
ProcessMarkStack()4059 void MarkCompact::ProcessMarkStack() {
4060   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4061   // TODO: try prefetch like in CMS
4062   while (!mark_stack_->IsEmpty()) {
4063     mirror::Object* obj = mark_stack_->PopBack();
4064     DCHECK(obj != nullptr);
4065     ScanObject</*kUpdateLiveWords*/ true>(obj);
4066   }
4067 }
4068 
ExpandMarkStack()4069 void MarkCompact::ExpandMarkStack() {
4070   const size_t new_size = mark_stack_->Capacity() * 2;
4071   std::vector<StackReference<mirror::Object>> temp(mark_stack_->Begin(),
4072                                                    mark_stack_->End());
4073   mark_stack_->Resize(new_size);
4074   for (auto& ref : temp) {
4075     mark_stack_->PushBack(ref.AsMirrorPtr());
4076   }
4077   DCHECK(!mark_stack_->IsFull());
4078 }
4079 
PushOnMarkStack(mirror::Object * obj)4080 inline void MarkCompact::PushOnMarkStack(mirror::Object* obj) {
4081   if (UNLIKELY(mark_stack_->IsFull())) {
4082     ExpandMarkStack();
4083   }
4084   mark_stack_->PushBack(obj);
4085 }
4086 
MarkObjectNonNull(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4087 inline void MarkCompact::MarkObjectNonNull(mirror::Object* obj,
4088                                            mirror::Object* holder,
4089                                            MemberOffset offset) {
4090   DCHECK(obj != nullptr);
4091   if (MarkObjectNonNullNoPush</*kParallel*/false>(obj, holder, offset)) {
4092     PushOnMarkStack(obj);
4093   }
4094 }
4095 
4096 template <bool kParallel>
MarkObjectNonNullNoPush(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4097 inline bool MarkCompact::MarkObjectNonNullNoPush(mirror::Object* obj,
4098                                                  mirror::Object* holder,
4099                                                  MemberOffset offset) {
4100   // We expect most of the referenes to be in bump-pointer space, so try that
4101   // first to keep the cost of this function minimal.
4102   if (LIKELY(moving_space_bitmap_->HasAddress(obj))) {
4103     return kParallel ? !moving_space_bitmap_->AtomicTestAndSet(obj)
4104                      : !moving_space_bitmap_->Set(obj);
4105   } else if (non_moving_space_bitmap_->HasAddress(obj)) {
4106     return kParallel ? !non_moving_space_bitmap_->AtomicTestAndSet(obj)
4107                      : !non_moving_space_bitmap_->Set(obj);
4108   } else if (immune_spaces_.ContainsObject(obj)) {
4109     DCHECK(IsMarked(obj) != nullptr);
4110     return false;
4111   } else {
4112     // Must be a large-object space, otherwise it's a case of heap corruption.
4113     if (!IsAligned<kPageSize>(obj)) {
4114       // Objects in large-object space are page aligned. So if we have an object
4115       // which doesn't belong to any space and is not page-aligned as well, then
4116       // it's memory corruption.
4117       // TODO: implement protect/unprotect in bump-pointer space.
4118       heap_->GetVerification()->LogHeapCorruption(holder, offset, obj, /*fatal*/ true);
4119     }
4120     DCHECK_NE(heap_->GetLargeObjectsSpace(), nullptr)
4121         << "ref=" << obj
4122         << " doesn't belong to any of the spaces and large object space doesn't exist";
4123     accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
4124     DCHECK(los_bitmap->HasAddress(obj));
4125     if (kParallel) {
4126       los_bitmap->AtomicTestAndSet(obj);
4127     } else {
4128       los_bitmap->Set(obj);
4129     }
4130     // We only have primitive arrays in large object space. So there is no
4131     // reason to push into mark-stack.
4132     DCHECK(obj->IsString() || (obj->IsArrayInstance() && !obj->IsObjectArray()));
4133     return false;
4134   }
4135 }
4136 
MarkObject(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4137 inline void MarkCompact::MarkObject(mirror::Object* obj,
4138                                     mirror::Object* holder,
4139                                     MemberOffset offset) {
4140   if (obj != nullptr) {
4141     MarkObjectNonNull(obj, holder, offset);
4142   }
4143 }
4144 
MarkObject(mirror::Object * obj)4145 mirror::Object* MarkCompact::MarkObject(mirror::Object* obj) {
4146   MarkObject(obj, nullptr, MemberOffset(0));
4147   return obj;
4148 }
4149 
MarkHeapReference(mirror::HeapReference<mirror::Object> * obj,bool do_atomic_update ATTRIBUTE_UNUSED)4150 void MarkCompact::MarkHeapReference(mirror::HeapReference<mirror::Object>* obj,
4151                                     bool do_atomic_update ATTRIBUTE_UNUSED) {
4152   MarkObject(obj->AsMirrorPtr(), nullptr, MemberOffset(0));
4153 }
4154 
VisitRoots(mirror::Object *** roots,size_t count,const RootInfo & info)4155 void MarkCompact::VisitRoots(mirror::Object*** roots,
4156                              size_t count,
4157                              const RootInfo& info) {
4158   if (compacting_) {
4159     for (size_t i = 0; i < count; ++i) {
4160       UpdateRoot(roots[i], info);
4161     }
4162   } else {
4163     for (size_t i = 0; i < count; ++i) {
4164       MarkObjectNonNull(*roots[i]);
4165     }
4166   }
4167 }
4168 
VisitRoots(mirror::CompressedReference<mirror::Object> ** roots,size_t count,const RootInfo & info)4169 void MarkCompact::VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
4170                              size_t count,
4171                              const RootInfo& info) {
4172   // TODO: do we need to check if the root is null or not?
4173   if (compacting_) {
4174     for (size_t i = 0; i < count; ++i) {
4175       UpdateRoot(roots[i], info);
4176     }
4177   } else {
4178     for (size_t i = 0; i < count; ++i) {
4179       MarkObjectNonNull(roots[i]->AsMirrorPtr());
4180     }
4181   }
4182 }
4183 
IsMarked(mirror::Object * obj)4184 mirror::Object* MarkCompact::IsMarked(mirror::Object* obj) {
4185   if (moving_space_bitmap_->HasAddress(obj)) {
4186     const bool is_black = reinterpret_cast<uint8_t*>(obj) >= black_allocations_begin_;
4187     if (compacting_) {
4188       if (is_black) {
4189         return PostCompactBlackObjAddr(obj);
4190       } else if (live_words_bitmap_->Test(obj)) {
4191         return PostCompactOldObjAddr(obj);
4192       } else {
4193         return nullptr;
4194       }
4195     }
4196     return (is_black || moving_space_bitmap_->Test(obj)) ? obj : nullptr;
4197   } else if (non_moving_space_bitmap_->HasAddress(obj)) {
4198     return non_moving_space_bitmap_->Test(obj) ? obj : nullptr;
4199   } else if (immune_spaces_.ContainsObject(obj)) {
4200     return obj;
4201   } else {
4202     DCHECK(heap_->GetLargeObjectsSpace())
4203         << "ref=" << obj
4204         << " doesn't belong to any of the spaces and large object space doesn't exist";
4205     accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
4206     if (los_bitmap->HasAddress(obj)) {
4207       DCHECK(IsAligned<kPageSize>(obj));
4208       return los_bitmap->Test(obj) ? obj : nullptr;
4209     } else {
4210       // The given obj is not in any of the known spaces, so return null. This could
4211       // happen for instance in interpreter caches wherein a concurrent updation
4212       // to the cache could result in obj being a non-reference. This is
4213       // tolerable because SweepInterpreterCaches only updates if the given
4214       // object has moved, which can't be the case for the non-reference.
4215       return nullptr;
4216     }
4217   }
4218 }
4219 
IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object> * obj,bool do_atomic_update ATTRIBUTE_UNUSED)4220 bool MarkCompact::IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj,
4221                                               bool do_atomic_update ATTRIBUTE_UNUSED) {
4222   mirror::Object* ref = obj->AsMirrorPtr();
4223   if (ref == nullptr) {
4224     return true;
4225   }
4226   return IsMarked(ref);
4227 }
4228 
4229 // Process the 'referent' field in a java.lang.ref.Reference. If the referent
4230 // has not yet been marked, put it on the appropriate list in the heap for later
4231 // processing.
DelayReferenceReferent(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref)4232 void MarkCompact::DelayReferenceReferent(ObjPtr<mirror::Class> klass,
4233                                          ObjPtr<mirror::Reference> ref) {
4234   heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, this);
4235 }
4236 
FinishPhase()4237 void MarkCompact::FinishPhase() {
4238   GetCurrentIteration()->SetScannedBytes(bytes_scanned_);
4239   bool is_zygote = Runtime::Current()->IsZygote();
4240   compacting_ = false;
4241   minor_fault_initialized_ = !is_zygote && uffd_minor_fault_supported_;
4242   // Madvise compaction buffers. When using threaded implementation, skip the first page,
4243   // which is used by the gc-thread for the next iteration. Otherwise, we get into a
4244   // deadlock due to userfault on it in the next iteration. This page is not consuming any
4245   // physical memory because we already madvised it above and then we triggered a read
4246   // userfault, which maps a special zero-page.
4247   if (use_uffd_sigbus_ || !minor_fault_initialized_ || !shadow_to_space_map_.IsValid() ||
4248       shadow_to_space_map_.Size() < (moving_first_objs_count_ + black_page_count_) * kPageSize) {
4249     size_t adjustment = use_uffd_sigbus_ ? 0 : kPageSize;
4250     ZeroAndReleasePages(compaction_buffers_map_.Begin() + adjustment,
4251                         compaction_buffers_map_.Size() - adjustment);
4252   } else if (shadow_to_space_map_.Size() == bump_pointer_space_->Capacity()) {
4253     // Now that we are going to use minor-faults from next GC cycle, we can
4254     // unmap the buffers used by worker threads.
4255     compaction_buffers_map_.SetSize(kPageSize);
4256   }
4257   info_map_.MadviseDontNeedAndZero();
4258   live_words_bitmap_->ClearBitmap();
4259   // TODO: We can clear this bitmap right before compaction pause. But in that
4260   // case we need to ensure that we don't assert on this bitmap afterwards.
4261   // Also, we would still need to clear it here again as we may have to use the
4262   // bitmap for black-allocations (see UpdateMovingSpaceBlackAllocations()).
4263   moving_space_bitmap_->Clear();
4264 
4265   if (UNLIKELY(is_zygote && IsValidFd(uffd_))) {
4266     heap_->DeleteThreadPool();
4267     // This unregisters all ranges as a side-effect.
4268     close(uffd_);
4269     uffd_ = kFdUnused;
4270     uffd_initialized_ = false;
4271   }
4272   CHECK(mark_stack_->IsEmpty());  // Ensure that the mark stack is empty.
4273   mark_stack_->Reset();
4274   DCHECK_EQ(thread_running_gc_, Thread::Current());
4275   if (kIsDebugBuild) {
4276     MutexLock mu(thread_running_gc_, lock_);
4277     if (updated_roots_.get() != nullptr) {
4278       updated_roots_->clear();
4279     }
4280   }
4281   class_after_obj_ordered_map_.clear();
4282   delete[] moving_pages_status_;
4283   linear_alloc_arenas_.clear();
4284   {
4285     ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_);
4286     WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_);
4287     heap_->ClearMarkedObjects();
4288   }
4289   std::swap(moving_to_space_fd_, moving_from_space_fd_);
4290   if (IsValidFd(moving_to_space_fd_)) {
4291     // Confirm that the memfd to be used on to-space in next GC cycle is empty.
4292     struct stat buf;
4293     DCHECK_EQ(fstat(moving_to_space_fd_, &buf), 0) << "fstat failed: " << strerror(errno);
4294     DCHECK_EQ(buf.st_blocks, 0u);
4295   }
4296 }
4297 
4298 }  // namespace collector
4299 }  // namespace gc
4300 }  // namespace art
4301