1 /*
2 * Copyright 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <fcntl.h>
18 // Glibc v2.19 doesn't include these in fcntl.h so host builds will fail without.
19 #if !defined(FALLOC_FL_PUNCH_HOLE) || !defined(FALLOC_FL_KEEP_SIZE)
20 #include <linux/falloc.h>
21 #endif
22 #include <linux/userfaultfd.h>
23 #include <poll.h>
24 #include <sys/ioctl.h>
25 #include <sys/mman.h>
26 #include <sys/resource.h>
27 #include <sys/stat.h>
28 #include <sys/syscall.h>
29 #include <unistd.h>
30
31 #include <fstream>
32 #include <numeric>
33 #include <string>
34 #include <string_view>
35 #include <vector>
36
37 #include "android-base/file.h"
38 #include "android-base/parsebool.h"
39 #include "android-base/parseint.h"
40 #include "android-base/properties.h"
41 #include "android-base/strings.h"
42 #include "base/file_utils.h"
43 #include "base/memfd.h"
44 #include "base/quasi_atomic.h"
45 #include "base/systrace.h"
46 #include "base/utils.h"
47 #include "gc/accounting/mod_union_table-inl.h"
48 #include "gc/collector_type.h"
49 #include "gc/reference_processor.h"
50 #include "gc/space/bump_pointer_space.h"
51 #include "gc/space/space-inl.h"
52 #include "gc/task_processor.h"
53 #include "gc/verification-inl.h"
54 #include "jit/jit_code_cache.h"
55 #include "mark_compact-inl.h"
56 #include "mirror/object-refvisitor-inl.h"
57 #include "read_barrier_config.h"
58 #include "scoped_thread_state_change-inl.h"
59 #include "sigchain.h"
60 #include "thread_list.h"
61
62 #ifdef ART_TARGET_ANDROID
63 #include "android-modules-utils/sdk_level.h"
64 #include "com_android_art.h"
65 #include "com_android_art_flags.h"
66 #endif
67
68 // See aosp/2996596 for where these values came from.
69 #ifndef UFFDIO_COPY_MODE_MMAP_TRYLOCK
70 #define UFFDIO_COPY_MODE_MMAP_TRYLOCK (static_cast<uint64_t>(1) << 63)
71 #endif
72 #ifndef UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK
73 #define UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK (static_cast<uint64_t>(1) << 63)
74 #endif
75 #ifdef ART_TARGET_ANDROID
76 namespace {
77
78 using ::android::base::GetBoolProperty;
79 using ::android::base::ParseBool;
80 using ::android::base::ParseBoolResult;
81 using ::android::modules::sdklevel::IsAtLeastV;
82
83 } // namespace
84 #endif
85
86 namespace art HIDDEN {
87
HaveMremapDontunmap()88 static bool HaveMremapDontunmap() {
89 const size_t page_size = GetPageSizeSlow();
90 void* old = mmap(nullptr, page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
91 CHECK_NE(old, MAP_FAILED);
92 void* addr = mremap(old, page_size, page_size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, nullptr);
93 CHECK_EQ(munmap(old, page_size), 0);
94 if (addr != MAP_FAILED) {
95 CHECK_EQ(munmap(addr, page_size), 0);
96 return true;
97 } else {
98 return false;
99 }
100 }
101
102 static bool gUffdSupportsMmapTrylock = false;
103 // We require MREMAP_DONTUNMAP functionality of the mremap syscall, which was
104 // introduced in 5.13 kernel version. But it was backported to GKI kernels.
105 static bool gHaveMremapDontunmap = IsKernelVersionAtLeast(5, 13) || HaveMremapDontunmap();
106 // Bitmap of features supported by userfaultfd. This is obtained via uffd API ioctl.
107 static uint64_t gUffdFeatures = 0;
108 // Both, missing and minor faults on shmem are needed only for minor-fault mode.
109 static constexpr uint64_t kUffdFeaturesForMinorFault =
110 UFFD_FEATURE_MISSING_SHMEM | UFFD_FEATURE_MINOR_SHMEM;
111 static constexpr uint64_t kUffdFeaturesForSigbus = UFFD_FEATURE_SIGBUS;
112 // A region which is more than kBlackDenseRegionThreshold percent live doesn't
113 // need to be compacted as it is too densely packed.
114 static constexpr uint kBlackDenseRegionThreshold = 95U;
115 // We consider SIGBUS feature necessary to enable this GC as it's superior than
116 // threading-based implementation for janks. We may want minor-fault in future
117 // to be available for making jit-code-cache updation concurrent, which uses shmem.
KernelSupportsUffd()118 bool KernelSupportsUffd() {
119 #ifdef __linux__
120 if (gHaveMremapDontunmap) {
121 int fd = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
122 // On non-android devices we may not have the kernel patches that restrict
123 // userfaultfd to user mode. But that is not a security concern as we are
124 // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
125 if (!kIsTargetAndroid && fd == -1 && errno == EINVAL) {
126 fd = syscall(__NR_userfaultfd, O_CLOEXEC);
127 }
128 if (fd >= 0) {
129 // We are only fetching the available features, which is returned by the
130 // ioctl.
131 struct uffdio_api api = {.api = UFFD_API, .features = 0, .ioctls = 0};
132 CHECK_EQ(ioctl(fd, UFFDIO_API, &api), 0) << "ioctl_userfaultfd : API:" << strerror(errno);
133 gUffdFeatures = api.features;
134 // MMAP_TRYLOCK is available only in 5.10 and 5.15 GKI kernels. The higher
135 // versions will have per-vma locks. The lower ones don't support
136 // userfaultfd.
137 if (kIsTargetAndroid && !IsKernelVersionAtLeast(5, 16)) {
138 // Check if MMAP_TRYLOCK feature is supported
139 const size_t page_size = GetPageSizeSlow();
140 void* mem =
141 mmap(nullptr, page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
142 CHECK_NE(mem, MAP_FAILED) << " errno: " << errno;
143
144 struct uffdio_zeropage uffd_zeropage;
145 uffd_zeropage.mode = UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK;
146 uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(mem);
147 uffd_zeropage.range.len = page_size;
148 uffd_zeropage.zeropage = 0;
149 // The ioctl will definitely fail as mem is not registered with uffd.
150 CHECK_EQ(ioctl(fd, UFFDIO_ZEROPAGE, &uffd_zeropage), -1);
151 // uffd ioctls return EINVAL for several reasons. We make sure with
152 // (proper alignment of 'mem' and 'len') that, before updating
153 // uffd_zeropage.zeropage (with error), it fails with EINVAL only if
154 // `trylock` isn't available.
155 if (uffd_zeropage.zeropage == 0 && errno == EINVAL) {
156 LOG(INFO) << "MMAP_TRYLOCK is not supported in uffd addr:" << mem
157 << " page-size:" << page_size;
158 } else {
159 gUffdSupportsMmapTrylock = true;
160 LOG(INFO) << "MMAP_TRYLOCK is supported in uffd errno:" << errno << " addr:" << mem
161 << " size:" << page_size;
162 }
163 munmap(mem, page_size);
164 }
165 close(fd);
166 // Minimum we need is sigbus feature for using userfaultfd.
167 return (api.features & kUffdFeaturesForSigbus) == kUffdFeaturesForSigbus;
168 }
169 }
170 #endif
171 return false;
172 }
173
174 // The other cases are defined as constexpr in runtime/read_barrier_config.h
175 #if !defined(ART_FORCE_USE_READ_BARRIER) && defined(ART_USE_READ_BARRIER)
176 // Returns collector type asked to be used on the cmdline.
FetchCmdlineGcType()177 static gc::CollectorType FetchCmdlineGcType() {
178 std::string argv;
179 gc::CollectorType gc_type = gc::CollectorType::kCollectorTypeNone;
180 if (android::base::ReadFileToString("/proc/self/cmdline", &argv)) {
181 auto pos = argv.rfind("-Xgc:");
182 if (argv.substr(pos + 5, 3) == "CMC") {
183 gc_type = gc::CollectorType::kCollectorTypeCMC;
184 } else if (argv.substr(pos + 5, 2) == "CC") {
185 gc_type = gc::CollectorType::kCollectorTypeCC;
186 }
187 }
188 return gc_type;
189 }
190
191 #ifdef ART_TARGET_ANDROID
GetOverrideCacheInfoFd()192 static int GetOverrideCacheInfoFd() {
193 std::string args_str;
194 if (!android::base::ReadFileToString("/proc/self/cmdline", &args_str)) {
195 LOG(WARNING) << "Failed to load /proc/self/cmdline";
196 return -1;
197 }
198 std::vector<std::string_view> args;
199 Split(std::string_view(args_str), /*separator=*/'\0', &args);
200 for (std::string_view arg : args) {
201 if (android::base::ConsumePrefix(&arg, "--cache-info-fd=")) { // This is a dex2oat flag.
202 int fd;
203 if (!android::base::ParseInt(std::string(arg), &fd)) {
204 LOG(ERROR) << "Failed to parse --cache-info-fd (value: '" << arg << "')";
205 return -1;
206 }
207 return fd;
208 }
209 }
210 return -1;
211 }
212
GetCachedProperties()213 static std::unordered_map<std::string, std::string> GetCachedProperties() {
214 // For simplicity, we don't handle multiple calls because otherwise we would have to reset the fd.
215 static bool called = false;
216 CHECK(!called) << "GetCachedBoolProperty can be called only once";
217 called = true;
218
219 std::string cache_info_contents;
220 int fd = GetOverrideCacheInfoFd();
221 if (fd >= 0) {
222 if (!android::base::ReadFdToString(fd, &cache_info_contents)) {
223 PLOG(ERROR) << "Failed to read cache-info from fd " << fd;
224 return {};
225 }
226 } else {
227 std::string path = GetApexDataDalvikCacheDirectory(InstructionSet::kNone) + "/cache-info.xml";
228 if (!android::base::ReadFileToString(path, &cache_info_contents)) {
229 // If the file is not found, then we are in chroot or in a standalone runtime process (e.g.,
230 // IncidentHelper), or odsign/odrefresh failed to generate and sign the cache info. There's
231 // nothing we can do.
232 if (errno != ENOENT) {
233 PLOG(ERROR) << "Failed to read cache-info from the default path";
234 }
235 return {};
236 }
237 }
238
239 std::optional<com::android::art::CacheInfo> cache_info =
240 com::android::art::parse(cache_info_contents.c_str());
241 if (!cache_info.has_value()) {
242 // This should never happen.
243 LOG(ERROR) << "Failed to parse cache-info";
244 return {};
245 }
246 const com::android::art::KeyValuePairList* list = cache_info->getFirstSystemProperties();
247 if (list == nullptr) {
248 // This should never happen.
249 LOG(ERROR) << "Missing system properties from cache-info";
250 return {};
251 }
252 const std::vector<com::android::art::KeyValuePair>& properties = list->getItem();
253 std::unordered_map<std::string, std::string> result;
254 for (const com::android::art::KeyValuePair& pair : properties) {
255 result[pair.getK()] = pair.getV();
256 }
257 return result;
258 }
259
GetCachedBoolProperty(const std::unordered_map<std::string,std::string> & cached_properties,const std::string & key,bool default_value)260 static bool GetCachedBoolProperty(
261 const std::unordered_map<std::string, std::string>& cached_properties,
262 const std::string& key,
263 bool default_value) {
264 auto it = cached_properties.find(key);
265 if (it == cached_properties.end()) {
266 return default_value;
267 }
268 ParseBoolResult result = ParseBool(it->second);
269 switch (result) {
270 case ParseBoolResult::kTrue:
271 return true;
272 case ParseBoolResult::kFalse:
273 return false;
274 case ParseBoolResult::kError:
275 return default_value;
276 }
277 }
278
SysPropSaysUffdGc()279 static bool SysPropSaysUffdGc() {
280 // The phenotype flag can change at time time after boot, but it shouldn't take effect until a
281 // reboot. Therefore, we read the phenotype flag from the cache info, which is generated on boot.
282 std::unordered_map<std::string, std::string> cached_properties = GetCachedProperties();
283 bool phenotype_enable = GetCachedBoolProperty(
284 cached_properties, "persist.device_config.runtime_native_boot.enable_uffd_gc_2", false);
285 bool phenotype_force_disable = GetCachedBoolProperty(
286 cached_properties, "persist.device_config.runtime_native_boot.force_disable_uffd_gc", false);
287 bool build_enable = GetBoolProperty("ro.dalvik.vm.enable_uffd_gc", false);
288 bool is_at_most_u = !IsAtLeastV();
289 return (phenotype_enable || build_enable || is_at_most_u) && !phenotype_force_disable;
290 }
291 #else
292 // Never called.
SysPropSaysUffdGc()293 static bool SysPropSaysUffdGc() { return false; }
294 #endif
295
ShouldUseUserfaultfd()296 static bool ShouldUseUserfaultfd() {
297 static_assert(kUseBakerReadBarrier || kUseTableLookupReadBarrier);
298 #ifdef __linux__
299 // Use CMC/CC if that is being explicitly asked for on cmdline. Otherwise,
300 // always use CC on host. On target, use CMC only if system properties says so
301 // and the kernel supports it.
302 gc::CollectorType gc_type = FetchCmdlineGcType();
303 return gc_type == gc::CollectorType::kCollectorTypeCMC ||
304 (gc_type == gc::CollectorType::kCollectorTypeNone &&
305 kIsTargetAndroid &&
306 SysPropSaysUffdGc() &&
307 KernelSupportsUffd());
308 #else
309 return false;
310 #endif
311 }
312
313 const bool gUseUserfaultfd = ShouldUseUserfaultfd();
314 const bool gUseReadBarrier = !gUseUserfaultfd;
315 #endif
316 #ifdef ART_TARGET_ANDROID
ShouldUseGenerationalGC()317 bool ShouldUseGenerationalGC() {
318 if (gUseUserfaultfd && !com::android::art::flags::use_generational_cmc()) {
319 return false;
320 }
321 // Generational GC feature doesn't need a reboot. Any process (like dex2oat)
322 // can pick a different values than zygote and will be able to execute.
323 return GetBoolProperty("persist.device_config.runtime_native_boot.use_generational_gc", true);
324 }
325 #else
ShouldUseGenerationalGC()326 bool ShouldUseGenerationalGC() { return true; }
327 #endif
328
329 namespace gc {
330 namespace collector {
331
332 // Turn off kCheckLocks when profiling the GC as it slows down the GC
333 // significantly.
334 static constexpr bool kCheckLocks = kDebugLocking;
335 static constexpr bool kVerifyRootsMarked = kIsDebugBuild;
336 // Verify that there are no missing card marks.
337 static constexpr bool kVerifyNoMissingCardMarks = kIsDebugBuild;
338 // Verify that all references in post-GC objects are valid.
339 static constexpr bool kVerifyPostGCObjects = kIsDebugBuild;
340 // Number of compaction buffers reserved for mutator threads in SIGBUS feature
341 // case. It's extremely unlikely that we will ever have more than these number
342 // of mutator threads trying to access the moving-space during one compaction
343 // phase.
344 static constexpr size_t kMutatorCompactionBufferCount = 2048;
345 // Minimum from-space chunk to be madvised (during concurrent compaction) in one go.
346 // Choose a reasonable size to avoid making too many batched ioctl and madvise calls.
347 static constexpr ssize_t kMinFromSpaceMadviseSize = 8 * MB;
348 // Concurrent compaction termination logic is different (and slightly more efficient) if the
349 // kernel has the fault-retry feature (allowing repeated faults on the same page), which was
350 // introduced in 5.7 (https://android-review.git.corp.google.com/c/kernel/common/+/1540088).
351 // This allows a single page fault to be handled, in turn, by each worker thread, only waking
352 // up the GC thread at the end.
353 static const bool gKernelHasFaultRetry = IsKernelVersionAtLeast(5, 7);
354
GetUffdAndMinorFault()355 std::pair<bool, bool> MarkCompact::GetUffdAndMinorFault() {
356 bool uffd_available;
357 // In most cases the gUffdFeatures will already be initialized at boot time
358 // when libart is loaded. On very old kernels we may get '0' from the kernel,
359 // in which case we would be doing the syscalls each time this function is
360 // called. But that's very unlikely case. There are no correctness issues as
361 // the response from kernel never changes after boot.
362 if (UNLIKELY(gUffdFeatures == 0)) {
363 uffd_available = KernelSupportsUffd();
364 } else {
365 // We can have any uffd features only if uffd exists.
366 uffd_available = true;
367 }
368 bool minor_fault_available =
369 (gUffdFeatures & kUffdFeaturesForMinorFault) == kUffdFeaturesForMinorFault;
370 return std::pair<bool, bool>(uffd_available, minor_fault_available);
371 }
372
CreateUserfaultfd(bool post_fork)373 bool MarkCompact::CreateUserfaultfd(bool post_fork) {
374 if (post_fork || uffd_ == kFdUnused) {
375 // Check if we have MREMAP_DONTUNMAP here for cases where
376 // 'ART_USE_READ_BARRIER=false' is used. Additionally, this check ensures
377 // that userfaultfd isn't used on old kernels, which cause random ioctl
378 // failures.
379 if (gHaveMremapDontunmap) {
380 // Don't use O_NONBLOCK as we rely on read waiting on uffd_ if there isn't
381 // any read event available. We don't use poll.
382 uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
383 // On non-android devices we may not have the kernel patches that restrict
384 // userfaultfd to user mode. But that is not a security concern as we are
385 // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
386 if (!kIsTargetAndroid && UNLIKELY(uffd_ == -1 && errno == EINVAL)) {
387 uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC);
388 }
389 if (UNLIKELY(uffd_ == -1)) {
390 uffd_ = kFallbackMode;
391 LOG(WARNING) << "Userfaultfd isn't supported (reason: " << strerror(errno)
392 << ") and therefore falling back to stop-the-world compaction.";
393 } else {
394 DCHECK(IsValidFd(uffd_));
395 // Initialize uffd with the features which are required and available.
396 // Using private anonymous mapping in threading mode is the default,
397 // for which we don't need to ask for any features. Note: this mode
398 // is not used in production.
399 struct uffdio_api api = {.api = UFFD_API, .features = 0, .ioctls = 0};
400 // We should add SIGBUS feature only if we plan on using it as
401 // requesting it here will mean threading mode will not work.
402 CHECK_EQ(gUffdFeatures & kUffdFeaturesForSigbus, kUffdFeaturesForSigbus);
403 api.features |= kUffdFeaturesForSigbus;
404 CHECK_EQ(ioctl(uffd_, UFFDIO_API, &api), 0)
405 << "ioctl_userfaultfd: API: " << strerror(errno);
406 }
407 } else {
408 uffd_ = kFallbackMode;
409 }
410 }
411 uffd_initialized_ = !post_fork || uffd_ == kFallbackMode;
412 return IsValidFd(uffd_);
413 }
414
415 template <size_t kAlignment>
Create(uintptr_t begin,uintptr_t end)416 MarkCompact::LiveWordsBitmap<kAlignment>* MarkCompact::LiveWordsBitmap<kAlignment>::Create(
417 uintptr_t begin, uintptr_t end) {
418 return static_cast<LiveWordsBitmap<kAlignment>*>(
419 MemRangeBitmap::Create("Concurrent Mark Compact live words bitmap", begin, end));
420 }
421
ComputeInfoMapSize()422 size_t MarkCompact::ComputeInfoMapSize() {
423 size_t moving_space_size = bump_pointer_space_->Capacity();
424 size_t chunk_info_vec_size = moving_space_size / kOffsetChunkSize;
425 size_t nr_moving_pages = DivideByPageSize(moving_space_size);
426 size_t nr_non_moving_pages = DivideByPageSize(heap_->GetNonMovingSpace()->Capacity());
427 return chunk_info_vec_size * sizeof(uint32_t) + nr_non_moving_pages * sizeof(ObjReference) +
428 nr_moving_pages * (sizeof(ObjReference) + sizeof(uint32_t) + sizeof(Atomic<uint32_t>));
429 }
430
InitializeInfoMap(uint8_t * p,size_t moving_space_sz)431 size_t MarkCompact::InitializeInfoMap(uint8_t* p, size_t moving_space_sz) {
432 size_t nr_moving_pages = DivideByPageSize(moving_space_sz);
433
434 chunk_info_vec_ = reinterpret_cast<uint32_t*>(p);
435 vector_length_ = moving_space_sz / kOffsetChunkSize;
436 size_t total = vector_length_ * sizeof(uint32_t);
437
438 first_objs_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
439 total += nr_moving_pages * sizeof(ObjReference);
440
441 pre_compact_offset_moving_space_ = reinterpret_cast<uint32_t*>(p + total);
442 total += nr_moving_pages * sizeof(uint32_t);
443
444 moving_pages_status_ = reinterpret_cast<Atomic<uint32_t>*>(p + total);
445 total += nr_moving_pages * sizeof(Atomic<uint32_t>);
446
447 first_objs_non_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
448 total += DivideByPageSize(heap_->GetNonMovingSpace()->Capacity()) * sizeof(ObjReference);
449 DCHECK_EQ(total, ComputeInfoMapSize());
450 return total;
451 }
452
YoungMarkCompact(Heap * heap,MarkCompact * main)453 YoungMarkCompact::YoungMarkCompact(Heap* heap, MarkCompact* main)
454 : GarbageCollector(heap, "young concurrent mark compact"), main_collector_(main) {
455 // Initialize GC metrics.
456 metrics::ArtMetrics* metrics = GetMetrics();
457 gc_time_histogram_ = metrics->YoungGcCollectionTime();
458 metrics_gc_count_ = metrics->YoungGcCount();
459 metrics_gc_count_delta_ = metrics->YoungGcCountDelta();
460 gc_throughput_histogram_ = metrics->YoungGcThroughput();
461 gc_tracing_throughput_hist_ = metrics->YoungGcTracingThroughput();
462 gc_throughput_avg_ = metrics->YoungGcThroughputAvg();
463 gc_tracing_throughput_avg_ = metrics->YoungGcTracingThroughputAvg();
464 gc_scanned_bytes_ = metrics->YoungGcScannedBytes();
465 gc_scanned_bytes_delta_ = metrics->YoungGcScannedBytesDelta();
466 gc_freed_bytes_ = metrics->YoungGcFreedBytes();
467 gc_freed_bytes_delta_ = metrics->YoungGcFreedBytesDelta();
468 gc_duration_ = metrics->YoungGcDuration();
469 gc_duration_delta_ = metrics->YoungGcDurationDelta();
470 gc_app_slow_path_during_gc_duration_delta_ = metrics->AppSlowPathDuringYoungGcDurationDelta();
471 are_metrics_initialized_ = true;
472 }
473
RunPhases()474 void YoungMarkCompact::RunPhases() {
475 DCHECK(!main_collector_->young_gen_);
476 main_collector_->young_gen_ = true;
477 main_collector_->RunPhases();
478 main_collector_->young_gen_ = false;
479 }
480
MarkCompact(Heap * heap)481 MarkCompact::MarkCompact(Heap* heap)
482 : GarbageCollector(heap, "concurrent mark compact"),
483 gc_barrier_(0),
484 lock_("mark compact lock", kGenericBottomLock),
485 sigbus_in_progress_count_{kSigbusCounterCompactionDoneMask, kSigbusCounterCompactionDoneMask},
486 mid_to_old_promo_bit_vec_(nullptr),
487 bump_pointer_space_(heap->GetBumpPointerSpace()),
488 post_compact_end_(nullptr),
489 young_gen_(false),
490 use_generational_(heap->GetUseGenerational()),
491 compacting_(false),
492 moving_space_bitmap_(bump_pointer_space_->GetMarkBitmap()),
493 moving_space_begin_(bump_pointer_space_->Begin()),
494 moving_space_end_(bump_pointer_space_->Limit()),
495 black_dense_end_(moving_space_begin_),
496 mid_gen_end_(moving_space_begin_),
497 uffd_(kFdUnused),
498 marking_done_(false),
499 uffd_initialized_(false),
500 clamp_info_map_status_(ClampInfoStatus::kClampInfoNotDone) {
501 if (kIsDebugBuild) {
502 updated_roots_.reset(new std::unordered_set<void*>());
503 }
504 if (gUffdFeatures == 0) {
505 GetUffdAndMinorFault();
506 }
507 uint8_t* moving_space_begin = bump_pointer_space_->Begin();
508 // TODO: Depending on how the bump-pointer space move is implemented. If we
509 // switch between two virtual memories each time, then we will have to
510 // initialize live_words_bitmap_ accordingly.
511 live_words_bitmap_.reset(LiveWordsBitmap<kAlignment>::Create(
512 reinterpret_cast<uintptr_t>(moving_space_begin),
513 reinterpret_cast<uintptr_t>(bump_pointer_space_->Limit())));
514
515 std::string err_msg;
516 size_t moving_space_size = bump_pointer_space_->Capacity();
517 {
518 // Create one MemMap for all the data structures
519 info_map_ = MemMap::MapAnonymous("Concurrent mark-compact chunk-info vector",
520 ComputeInfoMapSize(),
521 PROT_READ | PROT_WRITE,
522 /*low_4gb=*/false,
523 &err_msg);
524 if (UNLIKELY(!info_map_.IsValid())) {
525 LOG(FATAL) << "Failed to allocate concurrent mark-compact chunk-info vector: " << err_msg;
526 } else {
527 size_t total = InitializeInfoMap(info_map_.Begin(), moving_space_size);
528 DCHECK_EQ(total, info_map_.Size());
529 }
530 }
531
532 size_t moving_space_alignment = Heap::BestPageTableAlignment(moving_space_size);
533 // The moving space is created at a fixed address, which is expected to be
534 // PMD-size aligned.
535 if (!IsAlignedParam(moving_space_begin, moving_space_alignment)) {
536 LOG(WARNING) << "Bump pointer space is not aligned to " << PrettySize(moving_space_alignment)
537 << ". This can lead to longer stop-the-world pauses for compaction";
538 }
539 // NOTE: PROT_NONE is used here as these mappings are for address space reservation
540 // only and will be used only after appropriately remapping them.
541 from_space_map_ = MemMap::MapAnonymousAligned("Concurrent mark-compact from-space",
542 moving_space_size,
543 PROT_NONE,
544 /*low_4gb=*/kObjPtrPoisoning,
545 moving_space_alignment,
546 &err_msg);
547 if (UNLIKELY(!from_space_map_.IsValid())) {
548 LOG(FATAL) << "Failed to allocate concurrent mark-compact from-space" << err_msg;
549 } else {
550 from_space_begin_ = from_space_map_.Begin();
551 }
552
553 compaction_buffers_map_ = MemMap::MapAnonymous("Concurrent mark-compact compaction buffers",
554 (1 + kMutatorCompactionBufferCount) * gPageSize,
555 PROT_READ | PROT_WRITE,
556 /*low_4gb=*/kObjPtrPoisoning,
557 &err_msg);
558 if (UNLIKELY(!compaction_buffers_map_.IsValid())) {
559 LOG(FATAL) << "Failed to allocate concurrent mark-compact compaction buffers" << err_msg;
560 }
561 // We also use the first page-sized buffer for the purpose of terminating concurrent compaction.
562 conc_compaction_termination_page_ = compaction_buffers_map_.Begin();
563 // Touch the page deliberately to avoid userfaults on it. We madvise it in
564 // CompactionPhase() before using it to terminate concurrent compaction.
565 ForceRead(conc_compaction_termination_page_);
566
567 // In most of the cases, we don't expect more than one LinearAlloc space.
568 linear_alloc_spaces_data_.reserve(1);
569
570 // Initialize GC metrics.
571 metrics::ArtMetrics* metrics = GetMetrics();
572 gc_time_histogram_ = metrics->FullGcCollectionTime();
573 metrics_gc_count_ = metrics->FullGcCount();
574 metrics_gc_count_delta_ = metrics->FullGcCountDelta();
575 gc_throughput_histogram_ = metrics->FullGcThroughput();
576 gc_tracing_throughput_hist_ = metrics->FullGcTracingThroughput();
577 gc_throughput_avg_ = metrics->FullGcThroughputAvg();
578 gc_tracing_throughput_avg_ = metrics->FullGcTracingThroughputAvg();
579 gc_scanned_bytes_ = metrics->FullGcScannedBytes();
580 gc_scanned_bytes_delta_ = metrics->FullGcScannedBytesDelta();
581 gc_freed_bytes_ = metrics->FullGcFreedBytes();
582 gc_freed_bytes_delta_ = metrics->FullGcFreedBytesDelta();
583 gc_duration_ = metrics->FullGcDuration();
584 gc_duration_delta_ = metrics->FullGcDurationDelta();
585 gc_app_slow_path_during_gc_duration_delta_ = metrics->AppSlowPathDuringFullGcDurationDelta();
586 are_metrics_initialized_ = true;
587 }
588
ResetGenerationalState()589 void MarkCompact::ResetGenerationalState() {
590 black_dense_end_ = mid_gen_end_ = moving_space_begin_;
591 post_compact_end_ = nullptr;
592 class_after_obj_map_.clear();
593 }
594
AddLinearAllocSpaceData(uint8_t * begin,size_t len)595 void MarkCompact::AddLinearAllocSpaceData(uint8_t* begin, size_t len) {
596 DCHECK_ALIGNED_PARAM(begin, gPageSize);
597 DCHECK_ALIGNED_PARAM(len, gPageSize);
598 DCHECK_GE(len, Heap::GetPMDSize());
599 size_t alignment = Heap::BestPageTableAlignment(len);
600 std::string err_msg;
601 MemMap shadow(MemMap::MapAnonymousAligned("linear-alloc shadow map",
602 len,
603 PROT_NONE,
604 /*low_4gb=*/false,
605 alignment,
606 &err_msg));
607 if (!shadow.IsValid()) {
608 LOG(FATAL) << "Failed to allocate linear-alloc shadow map: " << err_msg;
609 UNREACHABLE();
610 }
611
612 MemMap page_status_map(MemMap::MapAnonymous("linear-alloc page-status map",
613 DivideByPageSize(len),
614 PROT_READ | PROT_WRITE,
615 /*low_4gb=*/false,
616 &err_msg));
617 if (!page_status_map.IsValid()) {
618 LOG(FATAL) << "Failed to allocate linear-alloc page-status shadow map: " << err_msg;
619 UNREACHABLE();
620 }
621 linear_alloc_spaces_data_.emplace_back(
622 std::forward<MemMap>(shadow), std::forward<MemMap>(page_status_map), begin, begin + len);
623 }
624
ClampGrowthLimit(size_t new_capacity)625 void MarkCompact::ClampGrowthLimit(size_t new_capacity) {
626 // From-space is the same size as moving-space in virtual memory.
627 // However, if it's in >4GB address space then we don't need to do it
628 // synchronously.
629 #if defined(__LP64__)
630 constexpr bool kClampFromSpace = kObjPtrPoisoning;
631 #else
632 constexpr bool kClampFromSpace = true;
633 #endif
634 size_t old_capacity = bump_pointer_space_->Capacity();
635 new_capacity = bump_pointer_space_->ClampGrowthLimit(new_capacity);
636 if (new_capacity < old_capacity) {
637 CHECK(from_space_map_.IsValid());
638 if (kClampFromSpace) {
639 from_space_map_.SetSize(new_capacity);
640 }
641 clamp_info_map_status_ = ClampInfoStatus::kClampInfoPending;
642 }
643 CHECK_EQ(moving_space_begin_, bump_pointer_space_->Begin());
644 }
645
MaybeClampGcStructures()646 void MarkCompact::MaybeClampGcStructures() {
647 size_t moving_space_size = bump_pointer_space_->Capacity();
648 DCHECK(thread_running_gc_ != nullptr);
649 if (UNLIKELY(clamp_info_map_status_ == ClampInfoStatus::kClampInfoPending)) {
650 CHECK(from_space_map_.IsValid());
651 if (from_space_map_.Size() > moving_space_size) {
652 from_space_map_.SetSize(moving_space_size);
653 }
654 // Bitmaps and other data structures
655 live_words_bitmap_->SetBitmapSize(moving_space_size);
656 size_t set_size = InitializeInfoMap(info_map_.Begin(), moving_space_size);
657 CHECK_LT(set_size, info_map_.Size());
658 info_map_.SetSize(set_size);
659
660 clamp_info_map_status_ = ClampInfoStatus::kClampInfoFinished;
661 }
662 }
663
PrepareForMarking(bool pre_marking)664 void MarkCompact::PrepareForMarking(bool pre_marking) {
665 static_assert(gc::accounting::CardTable::kCardDirty - 1 == gc::accounting::CardTable::kCardAged);
666 static_assert(gc::accounting::CardTable::kCardAged - 1 == gc::accounting::CardTable::kCardAged2);
667 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
668 accounting::CardTable* const card_table = heap_->GetCardTable();
669 // immune_spaces_ is emptied in InitializePhase() before marking starts. This
670 // function is invoked twice during marking. We only need to populate immune_spaces_
671 // once per GC cycle. And when it's done (below), all the immune spaces are
672 // added to it. We can never have partially filled immune_spaces_.
673 bool update_immune_spaces = immune_spaces_.IsEmpty();
674 // Mark all of the spaces we never collect as immune.
675 for (const auto& space : GetHeap()->GetContinuousSpaces()) {
676 if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
677 space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
678 CHECK(space->IsZygoteSpace() || space->IsImageSpace());
679 if (update_immune_spaces) {
680 immune_spaces_.AddSpace(space);
681 }
682 accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
683 if (table != nullptr) {
684 table->ProcessCards();
685 } else {
686 // Keep cards aged if we don't have a mod-union table since we need
687 // to scan them in future GCs. This case is for app images.
688 card_table->ModifyCardsAtomic(
689 space->Begin(),
690 space->End(),
691 [](uint8_t card) {
692 return (card == gc::accounting::CardTable::kCardClean)
693 ? card
694 : gc::accounting::CardTable::kCardAged;
695 },
696 /* card modified visitor */ VoidFunctor());
697 }
698 } else if (pre_marking) {
699 CHECK(!space->IsZygoteSpace());
700 CHECK(!space->IsImageSpace());
701 if (young_gen_) {
702 uint8_t* space_age_end = space->Limit();
703 // Age cards in old-gen as they contain old-to-young references.
704 if (space == bump_pointer_space_) {
705 DCHECK_ALIGNED_PARAM(old_gen_end_, gPageSize);
706 moving_space_bitmap_->ClearRange(reinterpret_cast<mirror::Object*>(old_gen_end_),
707 reinterpret_cast<mirror::Object*>(moving_space_end_));
708 // Clear cards in [old_gen_end_, moving_space_end_) as they are not needed.
709 card_table->ClearCardRange(old_gen_end_, space->Limit());
710 space_age_end = old_gen_end_;
711 }
712 card_table->ModifyCardsAtomic(space->Begin(),
713 space_age_end,
714 AgeCardVisitor(),
715 /*card modified visitor=*/VoidFunctor());
716 } else {
717 // The card-table corresponding to bump-pointer and non-moving space can
718 // be cleared, because we are going to traverse all the reachable objects
719 // in these spaces. This card-table will eventually be used to track
720 // mutations while concurrent marking is going on.
721 card_table->ClearCardRange(space->Begin(), space->Limit());
722 if (space == bump_pointer_space_) {
723 moving_space_bitmap_->Clear();
724 }
725 }
726 if (space != bump_pointer_space_) {
727 CHECK_EQ(space, heap_->GetNonMovingSpace());
728 if (young_gen_) {
729 space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
730 }
731 non_moving_space_ = space;
732 non_moving_space_bitmap_ = space->GetMarkBitmap();
733 }
734 } else {
735 if (young_gen_) {
736 // It would be correct to retain existing aged cards and add dirty cards
737 // to that set. However, that would unecessarily need us to re-scan
738 // cards which haven't been dirtied since first-pass of marking.
739 auto card_visitor = [](uint8_t card) {
740 return (card > gc::accounting::CardTable::kCardAged2)
741 ? card - 1
742 : gc::accounting::CardTable::kCardClean;
743 };
744 card_table->ModifyCardsAtomic(
745 space->Begin(), space->End(), card_visitor, /*card modified visitor=*/VoidFunctor());
746 } else {
747 card_table->ModifyCardsAtomic(space->Begin(),
748 space->End(),
749 AgeCardVisitor(),
750 /*card modified visitor=*/VoidFunctor());
751 }
752 }
753 }
754 if (pre_marking && young_gen_) {
755 for (const auto& space : GetHeap()->GetDiscontinuousSpaces()) {
756 CHECK(space->IsLargeObjectSpace());
757 space->AsLargeObjectSpace()->CopyLiveToMarked();
758 }
759 }
760 }
761
MarkZygoteLargeObjects()762 void MarkCompact::MarkZygoteLargeObjects() {
763 Thread* self = thread_running_gc_;
764 DCHECK_EQ(self, Thread::Current());
765 space::LargeObjectSpace* const los = heap_->GetLargeObjectsSpace();
766 if (los != nullptr) {
767 // Pick the current live bitmap (mark bitmap if swapped).
768 accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap();
769 accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap();
770 // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept.
771 std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic();
772 live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first),
773 reinterpret_cast<uintptr_t>(range.second),
774 [mark_bitmap, los, self](mirror::Object* obj)
775 REQUIRES(Locks::heap_bitmap_lock_)
776 REQUIRES_SHARED(Locks::mutator_lock_) {
777 if (los->IsZygoteLargeObject(self, obj)) {
778 mark_bitmap->Set(obj);
779 }
780 });
781 }
782 }
783
InitializePhase()784 void MarkCompact::InitializePhase() {
785 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
786 mark_stack_ = heap_->GetMarkStack();
787 CHECK(mark_stack_->IsEmpty());
788 immune_spaces_.Reset();
789 moving_first_objs_count_ = 0;
790 non_moving_first_objs_count_ = 0;
791 black_page_count_ = 0;
792 bytes_scanned_ = 0;
793 freed_objects_ = 0;
794 // The first buffer is used by gc-thread.
795 compaction_buffer_counter_.store(1, std::memory_order_relaxed);
796 black_allocations_begin_ = bump_pointer_space_->Limit();
797 DCHECK_EQ(moving_space_begin_, bump_pointer_space_->Begin());
798 from_space_slide_diff_ = from_space_begin_ - moving_space_begin_;
799 moving_space_end_ = bump_pointer_space_->Limit();
800 if (use_generational_ && !young_gen_) {
801 class_after_obj_map_.clear();
802 }
803 // TODO: Would it suffice to read it once in the constructor, which is called
804 // in zygote process?
805 pointer_size_ = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
806 for (size_t i = 0; i < vector_length_; i++) {
807 DCHECK_EQ(chunk_info_vec_[i], 0u);
808 }
809 app_slow_path_start_time_ = 0;
810 }
811
812 class MarkCompact::ThreadFlipVisitor : public Closure {
813 public:
ThreadFlipVisitor(MarkCompact * collector)814 explicit ThreadFlipVisitor(MarkCompact* collector) : collector_(collector) {}
815
Run(Thread * thread)816 void Run(Thread* thread) override REQUIRES_SHARED(Locks::mutator_lock_) {
817 // Note: self is not necessarily equal to thread since thread may be suspended.
818 Thread* self = Thread::Current();
819 CHECK(thread == self || thread->GetState() != ThreadState::kRunnable)
820 << thread->GetState() << " thread " << thread << " self " << self;
821 thread->VisitRoots(collector_, kVisitRootFlagAllRoots);
822 // Interpreter cache is thread-local so it needs to be swept either in a
823 // flip, or a stop-the-world pause.
824 CHECK(collector_->compacting_);
825 thread->GetInterpreterCache()->Clear(thread);
826 thread->AdjustTlab(collector_->black_objs_slide_diff_);
827 }
828
829 private:
830 MarkCompact* const collector_;
831 };
832
833 class MarkCompact::FlipCallback : public Closure {
834 public:
FlipCallback(MarkCompact * collector)835 explicit FlipCallback(MarkCompact* collector) : collector_(collector) {}
836
Run(Thread * thread)837 void Run([[maybe_unused]] Thread* thread) override REQUIRES(Locks::mutator_lock_) {
838 collector_->CompactionPause();
839 }
840
841 private:
842 MarkCompact* const collector_;
843 };
844
RunPhases()845 void MarkCompact::RunPhases() {
846 Thread* self = Thread::Current();
847 thread_running_gc_ = self;
848 Runtime* runtime = Runtime::Current();
849 GetHeap()->PreGcVerification(this);
850 InitializePhase();
851 {
852 ReaderMutexLock mu(self, *Locks::mutator_lock_);
853 MarkingPhase();
854 }
855 {
856 // Marking pause
857 ScopedPause pause(this);
858 MarkingPause();
859 if (kIsDebugBuild) {
860 bump_pointer_space_->AssertAllThreadLocalBuffersAreRevoked();
861 }
862 }
863 bool perform_compaction;
864 {
865 ReaderMutexLock mu(self, *Locks::mutator_lock_);
866 ReclaimPhase();
867 perform_compaction = PrepareForCompaction();
868 }
869 if (perform_compaction) {
870 // Compaction pause
871 ThreadFlipVisitor visitor(this);
872 FlipCallback callback(this);
873 runtime->GetThreadList()->FlipThreadRoots(
874 &visitor, &callback, this, GetHeap()->GetGcPauseListener());
875
876 if (IsValidFd(uffd_)) {
877 ReaderMutexLock mu(self, *Locks::mutator_lock_);
878 CompactionPhase();
879 }
880 } else {
881 if (use_generational_) {
882 DCHECK_IMPLIES(post_compact_end_ != nullptr, post_compact_end_ == black_allocations_begin_);
883 }
884 post_compact_end_ = black_allocations_begin_;
885 }
886 FinishPhase(perform_compaction);
887 GetHeap()->PostGcVerification(this);
888 thread_running_gc_ = nullptr;
889 }
890
InitMovingSpaceFirstObjects(size_t vec_len,size_t to_space_page_idx)891 void MarkCompact::InitMovingSpaceFirstObjects(size_t vec_len, size_t to_space_page_idx) {
892 uint32_t offset_in_chunk_word;
893 uint32_t offset;
894 mirror::Object* obj;
895 const uintptr_t heap_begin = moving_space_bitmap_->HeapBegin();
896
897 // Find the first live word.
898 size_t chunk_idx = to_space_page_idx * (gPageSize / kOffsetChunkSize);
899 DCHECK_LT(chunk_idx, vec_len);
900 // Find the first live word in the space
901 for (; chunk_info_vec_[chunk_idx] == 0; chunk_idx++) {
902 if (chunk_idx >= vec_len) {
903 // We don't have any live data on the moving-space.
904 moving_first_objs_count_ = to_space_page_idx;
905 return;
906 }
907 }
908 DCHECK_LT(chunk_idx, vec_len);
909 // Use live-words bitmap to find the first live word
910 offset_in_chunk_word = live_words_bitmap_->FindNthLiveWordOffset(chunk_idx, /*n*/ 0);
911 offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
912 DCHECK(live_words_bitmap_->Test(offset)) << "offset=" << offset
913 << " chunk_idx=" << chunk_idx
914 << " N=0"
915 << " offset_in_word=" << offset_in_chunk_word
916 << " word=" << std::hex
917 << live_words_bitmap_->GetWord(chunk_idx);
918 obj = moving_space_bitmap_->FindPrecedingObject(heap_begin + offset * kAlignment);
919 // TODO: add a check to validate the object.
920
921 pre_compact_offset_moving_space_[to_space_page_idx] = offset;
922 first_objs_moving_space_[to_space_page_idx].Assign(obj);
923 to_space_page_idx++;
924
925 uint32_t page_live_bytes = 0;
926 while (true) {
927 for (; page_live_bytes <= gPageSize; chunk_idx++) {
928 if (chunk_idx >= vec_len) {
929 moving_first_objs_count_ = to_space_page_idx;
930 return;
931 }
932 page_live_bytes += chunk_info_vec_[chunk_idx];
933 }
934 chunk_idx--;
935 page_live_bytes -= gPageSize;
936 DCHECK_LE(page_live_bytes, kOffsetChunkSize);
937 DCHECK_LE(page_live_bytes, chunk_info_vec_[chunk_idx])
938 << " chunk_idx=" << chunk_idx
939 << " to_space_page_idx=" << to_space_page_idx
940 << " vec_len=" << vec_len;
941 DCHECK(IsAligned<kAlignment>(chunk_info_vec_[chunk_idx] - page_live_bytes));
942 offset_in_chunk_word =
943 live_words_bitmap_->FindNthLiveWordOffset(
944 chunk_idx, (chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment);
945 offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
946 DCHECK(live_words_bitmap_->Test(offset))
947 << "offset=" << offset
948 << " chunk_idx=" << chunk_idx
949 << " N=" << ((chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment)
950 << " offset_in_word=" << offset_in_chunk_word
951 << " word=" << std::hex << live_words_bitmap_->GetWord(chunk_idx);
952 // TODO: Can we optimize this for large objects? If we are continuing a
953 // large object that spans multiple pages, then we may be able to do without
954 // calling FindPrecedingObject().
955 //
956 // Find the object which encapsulates offset in it, which could be
957 // starting at offset itself.
958 obj = moving_space_bitmap_->FindPrecedingObject(heap_begin + offset * kAlignment);
959 // TODO: add a check to validate the object.
960 pre_compact_offset_moving_space_[to_space_page_idx] = offset;
961 first_objs_moving_space_[to_space_page_idx].Assign(obj);
962 to_space_page_idx++;
963 chunk_idx++;
964 }
965 }
966
InitNonMovingFirstObjects(uintptr_t begin,uintptr_t end,accounting::ContinuousSpaceBitmap * bitmap,ObjReference * first_objs_arr)967 size_t MarkCompact::InitNonMovingFirstObjects(uintptr_t begin,
968 uintptr_t end,
969 accounting::ContinuousSpaceBitmap* bitmap,
970 ObjReference* first_objs_arr) {
971 mirror::Object* prev_obj;
972 size_t page_idx;
973 {
974 // Find first live object
975 mirror::Object* obj = nullptr;
976 bitmap->VisitMarkedRange</*kVisitOnce*/ true>(begin,
977 end,
978 [&obj] (mirror::Object* o) {
979 obj = o;
980 });
981 if (obj == nullptr) {
982 // There are no live objects in the space
983 return 0;
984 }
985 page_idx = DivideByPageSize(reinterpret_cast<uintptr_t>(obj) - begin);
986 first_objs_arr[page_idx++].Assign(obj);
987 prev_obj = obj;
988 }
989 // TODO: check obj is valid
990 uintptr_t prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
991 + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
992 // For every page find the object starting from which we need to call
993 // VisitReferences. It could either be an object that started on some
994 // preceding page, or some object starting within this page.
995 begin = RoundDown(reinterpret_cast<uintptr_t>(prev_obj) + gPageSize, gPageSize);
996 while (begin < end) {
997 // Utilize, if any, large object that started in some preceding page, but
998 // overlaps with this page as well.
999 if (prev_obj != nullptr && prev_obj_end > begin) {
1000 DCHECK_LT(prev_obj, reinterpret_cast<mirror::Object*>(begin));
1001 first_objs_arr[page_idx].Assign(prev_obj);
1002 } else {
1003 prev_obj_end = 0;
1004 // It's sufficient to only search for previous object in the preceding page.
1005 // If no live object started in that page and some object had started in
1006 // the page preceding to that page, which was big enough to overlap with
1007 // the current page, then we wouldn't be in the else part.
1008 prev_obj = bitmap->FindPrecedingObject(begin, begin - gPageSize);
1009 if (prev_obj != nullptr) {
1010 prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
1011 + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
1012 }
1013 if (prev_obj_end > begin) {
1014 first_objs_arr[page_idx].Assign(prev_obj);
1015 } else {
1016 // Find the first live object in this page
1017 bitmap->VisitMarkedRange</*kVisitOnce*/ true>(
1018 begin, begin + gPageSize, [first_objs_arr, page_idx](mirror::Object* obj) {
1019 first_objs_arr[page_idx].Assign(obj);
1020 });
1021 }
1022 // An empty entry indicates that the page has no live objects and hence
1023 // can be skipped.
1024 }
1025 begin += gPageSize;
1026 page_idx++;
1027 }
1028 return page_idx;
1029 }
1030
1031 // Generational CMC description
1032 // ============================
1033 //
1034 // All allocations since last GC are considered to be in young generation.
1035 // Unlike other ART GCs, we promote surviving objects to old generation after
1036 // they survive two contiguous GCs. Objects that survive one GC are considered
1037 // to be in mid generation. In the next young GC, marking is performed on both
1038 // the young as well as mid gen objects. And then during compaction, the
1039 // surviving mid-gen objects are compacted and then promoted to old-gen, while
1040 // the surviving young gen objects are compacted and promoted to mid-gen.
1041 //
1042 // Some other important points worth explaining:
1043 //
1044 // 1. During marking-phase, 'mid_gen_end_' segregates young and mid generations.
1045 // Before starting compaction, in PrepareForCompaction(), we set it to the
1046 // corresponding post-compact addresses, aligned up to page-size. Therefore,
1047 // some object's beginning portion maybe in mid-gen, while the rest is in young-gen.
1048 // Aligning up is essential as mid_gen_end_ becomes old_gen_end_ at the end of
1049 // GC cycle, and the latter has to be page-aligned as old-gen pages are
1050 // processed differently (no compaction).
1051 //
1052 // 2. We need to maintain the mark-bitmap for the old-gen for subsequent GCs,
1053 // when objects are promoted to old-gen from mid-gen, their mark bits are
1054 // first collected in a BitVector and then later copied into mark-bitmap in
1055 // FinishPhase(). We can't directly set the bits in mark-bitmap as the bitmap
1056 // contains pre-compaction mark bits which are required during compaction.
1057 //
1058 // 3. Since we need to revisit mid-gen objects in the next GC cycle, we need to
1059 // dirty the cards in old-gen containing references to them. We identify these
1060 // references when visiting old-gen objects during compaction. However, native
1061 // roots are skipped at that time (they are updated separately in linear-alloc
1062 // space, where we don't know which object (dex-cache/class-loader/class) does
1063 // a native root belong to. Therefore, native roots are covered during marking
1064 // phase.
1065
PrepareForCompaction()1066 bool MarkCompact::PrepareForCompaction() {
1067 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1068 size_t chunk_info_per_page = gPageSize / kOffsetChunkSize;
1069 size_t vector_len = (black_allocations_begin_ - moving_space_begin_) / kOffsetChunkSize;
1070 DCHECK_LE(vector_len, vector_length_);
1071 DCHECK_ALIGNED_PARAM(vector_length_, chunk_info_per_page);
1072 if (UNLIKELY(vector_len == 0)) {
1073 // Nothing to compact. Entire heap is empty.
1074 black_dense_end_ = mid_gen_end_ = moving_space_begin_;
1075 return false;
1076 }
1077 for (size_t i = 0; i < vector_len; i++) {
1078 DCHECK_LE(chunk_info_vec_[i], kOffsetChunkSize)
1079 << "i:" << i << " vector_length:" << vector_len << " vector_length_:" << vector_length_;
1080 DCHECK_EQ(chunk_info_vec_[i], live_words_bitmap_->LiveBytesInBitmapWord(i));
1081 }
1082
1083 // TODO: We can do a lot of neat tricks with this offset vector to tune the
1084 // compaction as we wish. Originally, the compaction algorithm slides all
1085 // live objects towards the beginning of the heap. This is nice because it
1086 // keeps the spatial locality of objects intact.
1087 // However, sometimes it's desired to compact objects in certain portions
1088 // of the heap. For instance, it is expected that, over time,
1089 // objects towards the beginning of the heap are long lived and are always
1090 // densely packed. In this case, it makes sense to only update references in
1091 // there and not try to compact it.
1092 // Furthermore, we might have some large objects and may not want to move such
1093 // objects.
1094 // We can adjust, without too much effort, the values in the chunk_info_vec_ such
1095 // that the objects in the dense beginning area aren't moved. OTOH, large
1096 // objects, which could be anywhere in the heap, could also be kept from
1097 // moving by using a similar trick. The only issue is that by doing this we will
1098 // leave an unused hole in the middle of the heap which can't be used for
1099 // allocations until we do a *full* compaction.
1100 //
1101 // At this point every element in the chunk_info_vec_ contains the live-bytes
1102 // of the corresponding chunk. For old-to-new address computation we need
1103 // every element to reflect total live-bytes till the corresponding chunk.
1104
1105 size_t black_dense_idx = 0;
1106 GcCause gc_cause = GetCurrentIteration()->GetGcCause();
1107 if (young_gen_) {
1108 DCHECK_ALIGNED_PARAM(old_gen_end_, gPageSize);
1109 DCHECK_GE(mid_gen_end_, old_gen_end_);
1110 DCHECK_GE(black_allocations_begin_, mid_gen_end_);
1111 // old-gen's boundary was decided at the end of previous GC-cycle.
1112 black_dense_idx = (old_gen_end_ - moving_space_begin_) / kOffsetChunkSize;
1113 if (black_dense_idx == vector_len) {
1114 // There is nothing live in young-gen.
1115 DCHECK_EQ(old_gen_end_, black_allocations_begin_);
1116 mid_gen_end_ = black_allocations_begin_;
1117 return false;
1118 }
1119 InitNonMovingFirstObjects(reinterpret_cast<uintptr_t>(moving_space_begin_),
1120 reinterpret_cast<uintptr_t>(old_gen_end_),
1121 moving_space_bitmap_,
1122 first_objs_moving_space_);
1123 } else if (gc_cause != kGcCauseExplicit && gc_cause != kGcCauseCollectorTransition &&
1124 !GetCurrentIteration()->GetClearSoftReferences()) {
1125 uint64_t live_bytes = 0, total_bytes = 0;
1126 size_t aligned_vec_len = RoundUp(vector_len, chunk_info_per_page);
1127 size_t num_pages = aligned_vec_len / chunk_info_per_page;
1128 size_t threshold_passing_marker = 0; // In number of pages
1129 std::vector<uint32_t> pages_live_bytes;
1130 pages_live_bytes.reserve(num_pages);
1131 // Identify the largest chunk towards the beginning of moving space which
1132 // passes the black-dense threshold.
1133 for (size_t i = 0; i < aligned_vec_len; i += chunk_info_per_page) {
1134 uint32_t page_live_bytes = 0;
1135 for (size_t j = 0; j < chunk_info_per_page; j++) {
1136 page_live_bytes += chunk_info_vec_[i + j];
1137 total_bytes += kOffsetChunkSize;
1138 }
1139 live_bytes += page_live_bytes;
1140 pages_live_bytes.push_back(page_live_bytes);
1141 if (live_bytes * 100U >= total_bytes * kBlackDenseRegionThreshold) {
1142 threshold_passing_marker = pages_live_bytes.size();
1143 }
1144 }
1145 DCHECK_EQ(pages_live_bytes.size(), num_pages);
1146 // Eliminate the pages at the end of the chunk which are lower than the threshold.
1147 if (threshold_passing_marker > 0) {
1148 auto iter = std::find_if(
1149 pages_live_bytes.rbegin() + (num_pages - threshold_passing_marker),
1150 pages_live_bytes.rend(),
1151 [](uint32_t bytes) { return bytes * 100U >= gPageSize * kBlackDenseRegionThreshold; });
1152 black_dense_idx = (pages_live_bytes.rend() - iter) * chunk_info_per_page;
1153 }
1154 black_dense_end_ = moving_space_begin_ + black_dense_idx * kOffsetChunkSize;
1155 DCHECK_ALIGNED_PARAM(black_dense_end_, gPageSize);
1156
1157 // Adjust for class allocated after black_dense_end_ while its object(s)
1158 // are earlier. This is required as we update the references in the
1159 // black-dense region in-place. And if the class pointer of some first
1160 // object for a page, which started in some preceding page, is already
1161 // updated, then we will read wrong class data like ref-offset bitmap.
1162 for (auto iter = class_after_obj_map_.rbegin();
1163 iter != class_after_obj_map_.rend() &&
1164 reinterpret_cast<uint8_t*>(iter->first.AsMirrorPtr()) >= black_dense_end_;
1165 iter++) {
1166 black_dense_end_ =
1167 std::min(black_dense_end_, reinterpret_cast<uint8_t*>(iter->second.AsMirrorPtr()));
1168 black_dense_end_ = AlignDown(black_dense_end_, gPageSize);
1169 }
1170 black_dense_idx = (black_dense_end_ - moving_space_begin_) / kOffsetChunkSize;
1171 DCHECK_LE(black_dense_idx, vector_len);
1172 if (black_dense_idx == vector_len) {
1173 // There is nothing to compact. All the in-use pages are completely full.
1174 mid_gen_end_ = black_allocations_begin_;
1175 return false;
1176 }
1177 InitNonMovingFirstObjects(reinterpret_cast<uintptr_t>(moving_space_begin_),
1178 reinterpret_cast<uintptr_t>(black_dense_end_),
1179 moving_space_bitmap_,
1180 first_objs_moving_space_);
1181 } else {
1182 black_dense_end_ = moving_space_begin_;
1183 }
1184
1185 InitMovingSpaceFirstObjects(vector_len, black_dense_idx / chunk_info_per_page);
1186 non_moving_first_objs_count_ =
1187 InitNonMovingFirstObjects(reinterpret_cast<uintptr_t>(non_moving_space_->Begin()),
1188 reinterpret_cast<uintptr_t>(non_moving_space_->End()),
1189 non_moving_space_bitmap_,
1190 first_objs_non_moving_space_);
1191 // Update the vector one past the heap usage as it is required for black
1192 // allocated objects' post-compact address computation.
1193 uint32_t total_bytes;
1194 if (vector_len < vector_length_) {
1195 vector_len++;
1196 total_bytes = 0;
1197 } else {
1198 // Fetch the value stored in the last element before it gets overwritten by
1199 // std::exclusive_scan().
1200 total_bytes = chunk_info_vec_[vector_len - 1];
1201 }
1202 std::exclusive_scan(chunk_info_vec_ + black_dense_idx,
1203 chunk_info_vec_ + vector_len,
1204 chunk_info_vec_ + black_dense_idx,
1205 black_dense_idx * kOffsetChunkSize);
1206 total_bytes += chunk_info_vec_[vector_len - 1];
1207 post_compact_end_ = AlignUp(moving_space_begin_ + total_bytes, gPageSize);
1208 CHECK_EQ(post_compact_end_, moving_space_begin_ + moving_first_objs_count_ * gPageSize)
1209 << "moving_first_objs_count_:" << moving_first_objs_count_
1210 << " black_dense_idx:" << black_dense_idx << " vector_len:" << vector_len
1211 << " total_bytes:" << total_bytes
1212 << " black_dense_end:" << reinterpret_cast<void*>(black_dense_end_)
1213 << " chunk_info_per_page:" << chunk_info_per_page;
1214 black_objs_slide_diff_ = black_allocations_begin_ - post_compact_end_;
1215 // We shouldn't be consuming more space after compaction than pre-compaction.
1216 CHECK_GE(black_objs_slide_diff_, 0);
1217 for (size_t i = vector_len; i < vector_length_; i++) {
1218 DCHECK_EQ(chunk_info_vec_[i], 0u);
1219 }
1220 if (black_objs_slide_diff_ == 0) {
1221 // Regardless of the gc-type, there are no pages to be compacted. Ensure
1222 // that we don't shrink the mid-gen, which will become old-gen in
1223 // FinishPhase(), thereby possibly moving some objects back to young-gen,
1224 // which can cause memory corruption due to missing card marks.
1225 mid_gen_end_ = std::max(mid_gen_end_, black_dense_end_);
1226 mid_gen_end_ = std::min(mid_gen_end_, post_compact_end_);
1227 return false;
1228 }
1229 if (use_generational_) {
1230 // Current value of mid_gen_end_ represents end of 'pre-compacted' mid-gen,
1231 // which was done at the end of previous GC. Compute, 'post-compacted' end of
1232 // mid-gen, which will be consumed by old-gen at the end of this GC cycle.
1233 DCHECK_NE(mid_gen_end_, nullptr);
1234 mirror::Object* first_obj = nullptr;
1235 if (mid_gen_end_ < black_allocations_begin_) {
1236 ReaderMutexLock rmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1237 // Find the first live object in the young-gen.
1238 moving_space_bitmap_->VisitMarkedRange</*kVisitOnce=*/true>(
1239 reinterpret_cast<uintptr_t>(mid_gen_end_),
1240 reinterpret_cast<uintptr_t>(black_allocations_begin_),
1241 [&first_obj](mirror::Object* obj) { first_obj = obj; });
1242 }
1243 if (first_obj != nullptr) {
1244 mirror::Object* compacted_obj;
1245 if (reinterpret_cast<uint8_t*>(first_obj) >= old_gen_end_) {
1246 // post-compact address of the first live object in young-gen.
1247 compacted_obj = PostCompactOldObjAddr(first_obj);
1248 DCHECK_LT(reinterpret_cast<uint8_t*>(compacted_obj), post_compact_end_);
1249 } else {
1250 DCHECK(!young_gen_);
1251 compacted_obj = first_obj;
1252 }
1253 // It's important to page-align mid-gen boundary. However, that means
1254 // there could be an object overlapping that boundary. We will deal with
1255 // the consequences of that at different places. Aligning up is important
1256 // to ensure that we don't de-promote an object from old-gen back to
1257 // young-gen. Otherwise, we may skip dirtying card for such an object if
1258 // it contains native-roots to young-gen.
1259 mid_gen_end_ = AlignUp(reinterpret_cast<uint8_t*>(compacted_obj), gPageSize);
1260 // We need to ensure that for any object in old-gen, its class is also in
1261 // there (for the same reason as mentioned above in the black-dense case).
1262 // So adjust mid_gen_end_ accordingly, in the worst case all the way up
1263 // to post_compact_end_.
1264 auto iter = class_after_obj_map_.lower_bound(ObjReference::FromMirrorPtr(first_obj));
1265 for (; iter != class_after_obj_map_.end(); iter++) {
1266 // 'mid_gen_end_' is now post-compact, so need to compare with
1267 // post-compact addresses.
1268 compacted_obj =
1269 PostCompactAddress(iter->second.AsMirrorPtr(), old_gen_end_, moving_space_end_);
1270 // We cannot update the map with post-compact addresses yet as compaction-phase
1271 // expects pre-compacted addresses. So we will update in FinishPhase().
1272 if (reinterpret_cast<uint8_t*>(compacted_obj) < mid_gen_end_) {
1273 mirror::Object* klass = iter->first.AsMirrorPtr();
1274 DCHECK_LT(reinterpret_cast<uint8_t*>(klass), black_allocations_begin_);
1275 klass = PostCompactAddress(klass, old_gen_end_, moving_space_end_);
1276 // We only need to make sure that the class object doesn't move during
1277 // compaction, which can be ensured by just making its first word be
1278 // consumed in to the old-gen.
1279 mid_gen_end_ =
1280 std::max(mid_gen_end_, reinterpret_cast<uint8_t*>(klass) + kObjectAlignment);
1281 mid_gen_end_ = AlignUp(mid_gen_end_, gPageSize);
1282 }
1283 }
1284 CHECK_LE(mid_gen_end_, post_compact_end_);
1285 } else {
1286 // Young-gen is empty.
1287 mid_gen_end_ = post_compact_end_;
1288 }
1289 DCHECK_LE(mid_gen_end_, post_compact_end_);
1290 // We need this temporary bitmap only when running in generational mode.
1291 if (old_gen_end_ < mid_gen_end_) {
1292 mid_to_old_promo_bit_vec_.reset(
1293 new BitVector((mid_gen_end_ - old_gen_end_) / kObjectAlignment,
1294 /*expandable=*/false,
1295 Allocator::GetCallocAllocator()));
1296 }
1297 }
1298 // How do we handle compaction of heap portion used for allocations after the
1299 // marking-pause?
1300 // All allocations after the marking-pause are considered black (reachable)
1301 // for this GC cycle. However, they need not be allocated contiguously as
1302 // different mutators use TLABs. So we will compact the heap till the point
1303 // where allocations took place before the marking-pause. And everything after
1304 // that will be slid with TLAB holes, and then TLAB info in TLS will be
1305 // appropriately updated in the pre-compaction pause.
1306 // The chunk-info vector entries for the post marking-pause allocations will be
1307 // also updated in the pre-compaction pause.
1308
1309 if (!uffd_initialized_) {
1310 CreateUserfaultfd(/*post_fork=*/false);
1311 }
1312 return true;
1313 }
1314
1315 class MarkCompact::VerifyRootMarkedVisitor : public SingleRootVisitor {
1316 public:
VerifyRootMarkedVisitor(MarkCompact * collector)1317 explicit VerifyRootMarkedVisitor(MarkCompact* collector) : collector_(collector) { }
1318
VisitRoot(mirror::Object * root,const RootInfo & info)1319 void VisitRoot(mirror::Object* root, const RootInfo& info) override
1320 REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
1321 CHECK(collector_->IsMarked(root) != nullptr) << info.ToString();
1322 }
1323
1324 private:
1325 MarkCompact* const collector_;
1326 };
1327
ReMarkRoots(Runtime * runtime)1328 void MarkCompact::ReMarkRoots(Runtime* runtime) {
1329 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1330 DCHECK_EQ(thread_running_gc_, Thread::Current());
1331 Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
1332 MarkNonThreadRoots(runtime);
1333 MarkConcurrentRoots(
1334 static_cast<VisitRootFlags>(kVisitRootFlagNewRoots | kVisitRootFlagStopLoggingNewRoots |
1335 kVisitRootFlagClearRootLog),
1336 runtime);
1337 if (kVerifyRootsMarked) {
1338 TimingLogger::ScopedTiming t2("(Paused)VerifyRoots", GetTimings());
1339 VerifyRootMarkedVisitor visitor(this);
1340 runtime->VisitRoots(&visitor);
1341 }
1342 }
1343
MarkingPause()1344 void MarkCompact::MarkingPause() {
1345 TimingLogger::ScopedTiming t("(Paused)MarkingPause", GetTimings());
1346 Runtime* runtime = Runtime::Current();
1347 Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
1348 {
1349 // Handle the dirty objects as we are a concurrent GC
1350 WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1351 {
1352 MutexLock mu2(thread_running_gc_, *Locks::runtime_shutdown_lock_);
1353 MutexLock mu3(thread_running_gc_, *Locks::thread_list_lock_);
1354 std::list<Thread*> thread_list = runtime->GetThreadList()->GetList();
1355 for (Thread* thread : thread_list) {
1356 thread->VisitRoots(this, static_cast<VisitRootFlags>(0));
1357 DCHECK_EQ(thread->GetThreadLocalGcBuffer(), nullptr);
1358 // Need to revoke all the thread-local allocation stacks since we will
1359 // swap the allocation stacks (below) and don't want anybody to allocate
1360 // into the live stack.
1361 thread->RevokeThreadLocalAllocationStack();
1362 bump_pointer_space_->RevokeThreadLocalBuffers(thread);
1363 }
1364 }
1365 ProcessMarkStack();
1366 // Fetch only the accumulated objects-allocated count as it is guaranteed to
1367 // be up-to-date after the TLAB revocation above.
1368 freed_objects_ += bump_pointer_space_->GetAccumulatedObjectsAllocated();
1369 // Capture 'end' of moving-space at this point. Every allocation beyond this
1370 // point will be considered as black.
1371 // Align-up to page boundary so that black allocations happen from next page
1372 // onwards. Also, it ensures that 'end' is aligned for card-table's
1373 // ClearCardRange().
1374 black_allocations_begin_ = bump_pointer_space_->AlignEnd(thread_running_gc_, gPageSize, heap_);
1375 DCHECK_ALIGNED_PARAM(black_allocations_begin_, gPageSize);
1376
1377 // Re-mark root set. Doesn't include thread-roots as they are already marked
1378 // above.
1379 ReMarkRoots(runtime);
1380 // Scan dirty objects.
1381 RecursiveMarkDirtyObjects(/*paused*/ true, accounting::CardTable::kCardDirty);
1382 {
1383 TimingLogger::ScopedTiming t2("SwapStacks", GetTimings());
1384 heap_->SwapStacks();
1385 live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
1386 }
1387 }
1388 // TODO: For PreSweepingGcVerification(), find correct strategy to visit/walk
1389 // objects in bump-pointer space when we have a mark-bitmap to indicate live
1390 // objects. At the same time we also need to be able to visit black allocations,
1391 // even though they are not marked in the bitmap. Without both of these we fail
1392 // pre-sweeping verification. As well as we leave windows open wherein a
1393 // VisitObjects/Walk on the space would either miss some objects or visit
1394 // unreachable ones. These windows are when we are switching from shared
1395 // mutator-lock to exclusive and vice-versa starting from here till compaction pause.
1396 // heap_->PreSweepingGcVerification(this);
1397
1398 // Disallow new system weaks to prevent a race which occurs when someone adds
1399 // a new system weak before we sweep them. Since this new system weak may not
1400 // be marked, the GC may incorrectly sweep it. This also fixes a race where
1401 // interning may attempt to return a strong reference to a string that is
1402 // about to be swept.
1403 runtime->DisallowNewSystemWeaks();
1404 // Enable the reference processing slow path, needs to be done with mutators
1405 // paused since there is no lock in the GetReferent fast path.
1406 heap_->GetReferenceProcessor()->EnableSlowPath();
1407 marking_done_ = true;
1408 }
1409
SweepSystemWeaks(Thread * self,Runtime * runtime,const bool paused)1410 void MarkCompact::SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused) {
1411 TimingLogger::ScopedTiming t(paused ? "(Paused)SweepSystemWeaks" : "SweepSystemWeaks",
1412 GetTimings());
1413 ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
1414 runtime->SweepSystemWeaks(this);
1415 }
1416
ProcessReferences(Thread * self)1417 void MarkCompact::ProcessReferences(Thread* self) {
1418 WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
1419 GetHeap()->GetReferenceProcessor()->ProcessReferences(self, GetTimings());
1420 }
1421
SweepArray(accounting::ObjectStack * obj_arr,bool swap_bitmaps)1422 void MarkCompact::SweepArray(accounting::ObjectStack* obj_arr, bool swap_bitmaps) {
1423 TimingLogger::ScopedTiming t("SweepArray", GetTimings());
1424 std::vector<space::ContinuousSpace*> sweep_spaces;
1425 for (space::ContinuousSpace* space : heap_->GetContinuousSpaces()) {
1426 if (!space->IsAllocSpace() || space == bump_pointer_space_ ||
1427 immune_spaces_.ContainsSpace(space) || space->GetLiveBitmap() == nullptr) {
1428 continue;
1429 }
1430 sweep_spaces.push_back(space);
1431 }
1432 GarbageCollector::SweepArray(obj_arr, swap_bitmaps, &sweep_spaces);
1433 }
1434
Sweep(bool swap_bitmaps)1435 void MarkCompact::Sweep(bool swap_bitmaps) {
1436 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1437 if (young_gen_) {
1438 // Only sweep objects on the live stack.
1439 SweepArray(heap_->GetLiveStack(), /*swap_bitmaps=*/false);
1440 } else {
1441 // Ensure that nobody inserted objects in the live stack after we swapped the
1442 // stacks.
1443 CHECK_GE(live_stack_freeze_size_, GetHeap()->GetLiveStack()->Size());
1444 {
1445 TimingLogger::ScopedTiming t2("MarkAllocStackAsLive", GetTimings());
1446 // Mark everything allocated since the last GC as live so that we can sweep
1447 // concurrently, knowing that new allocations won't be marked as live.
1448 accounting::ObjectStack* live_stack = heap_->GetLiveStack();
1449 heap_->MarkAllocStackAsLive(live_stack);
1450 live_stack->Reset();
1451 DCHECK(mark_stack_->IsEmpty());
1452 }
1453 for (const auto& space : GetHeap()->GetContinuousSpaces()) {
1454 if (space->IsContinuousMemMapAllocSpace() && space != bump_pointer_space_ &&
1455 !immune_spaces_.ContainsSpace(space)) {
1456 space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
1457 DCHECK(!alloc_space->IsZygoteSpace());
1458 TimingLogger::ScopedTiming split("SweepMallocSpace", GetTimings());
1459 RecordFree(alloc_space->Sweep(swap_bitmaps));
1460 }
1461 }
1462 SweepLargeObjects(swap_bitmaps);
1463 }
1464 }
1465
SweepLargeObjects(bool swap_bitmaps)1466 void MarkCompact::SweepLargeObjects(bool swap_bitmaps) {
1467 space::LargeObjectSpace* los = heap_->GetLargeObjectsSpace();
1468 if (los != nullptr) {
1469 TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
1470 RecordFreeLOS(los->Sweep(swap_bitmaps));
1471 }
1472 }
1473
ReclaimPhase()1474 void MarkCompact::ReclaimPhase() {
1475 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1476 DCHECK(thread_running_gc_ == Thread::Current());
1477 Runtime* const runtime = Runtime::Current();
1478 // Process the references concurrently.
1479 ProcessReferences(thread_running_gc_);
1480 // TODO: Try to merge this system-weak sweeping with the one while updating
1481 // references during the compaction pause.
1482 SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ false);
1483 runtime->AllowNewSystemWeaks();
1484 // Clean up class loaders after system weaks are swept since that is how we know if class
1485 // unloading occurred.
1486 runtime->GetClassLinker()->CleanupClassLoaders();
1487 {
1488 WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1489 // Reclaim unmarked objects.
1490 Sweep(false);
1491 // Swap the live and mark bitmaps for each space which we modified space. This is an
1492 // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
1493 // bitmaps.
1494 SwapBitmaps();
1495 // Unbind the live and mark bitmaps.
1496 GetHeap()->UnBindBitmaps();
1497 }
1498 // After sweeping and unbinding, we will need to use non-moving space'
1499 // live-bitmap, instead of mark-bitmap.
1500 non_moving_space_bitmap_ = non_moving_space_->GetLiveBitmap();
1501 }
1502
1503 // We want to avoid checking for every reference if it's within the page or
1504 // not. This can be done if we know where in the page the holder object lies.
1505 // If it doesn't overlap either boundaries then we can skip the checks.
1506 //
1507 // If kDirtyOldToMid = true, then check if the object contains any references
1508 // into young-gen, which will be mid-gen after this GC. This is required
1509 // as we mark and compact mid-gen again in next GC-cycle, and hence cards
1510 // need to be dirtied. Note that even black-allocations (the next young-gen)
1511 // will also have to be checked because the pages are being compacted and hence
1512 // the card corresponding to the compacted page needs to be dirtied.
1513 template <bool kCheckBegin, bool kCheckEnd, bool kDirtyOldToMid>
1514 class MarkCompact::RefsUpdateVisitor {
1515 public:
RefsUpdateVisitor(MarkCompact * collector,mirror::Object * obj,uint8_t * begin,uint8_t * end,accounting::CardTable * card_table=nullptr,mirror::Object * card_obj=nullptr)1516 RefsUpdateVisitor(MarkCompact* collector,
1517 mirror::Object* obj,
1518 uint8_t* begin,
1519 uint8_t* end,
1520 accounting::CardTable* card_table = nullptr,
1521 mirror::Object* card_obj = nullptr)
1522 : RefsUpdateVisitor(collector, obj, begin, end, false) {
1523 DCHECK(!kCheckBegin || begin != nullptr);
1524 DCHECK(!kCheckEnd || end != nullptr);
1525 // We can skip checking each reference for objects whose cards are already dirty.
1526 if (kDirtyOldToMid && card_obj != nullptr) {
1527 dirty_card_ = card_table->IsDirty(card_obj);
1528 }
1529 }
1530
RefsUpdateVisitor(MarkCompact * collector,mirror::Object * obj,uint8_t * begin,uint8_t * end,bool dirty_card)1531 RefsUpdateVisitor(
1532 MarkCompact* collector, mirror::Object* obj, uint8_t* begin, uint8_t* end, bool dirty_card)
1533 : collector_(collector),
1534 moving_space_begin_(collector->black_dense_end_),
1535 moving_space_end_(collector->moving_space_end_),
1536 young_gen_begin_(collector->mid_gen_end_),
1537 obj_(obj),
1538 begin_(begin),
1539 end_(end),
1540 dirty_card_(dirty_card) {}
1541
ShouldDirtyCard() const1542 bool ShouldDirtyCard() const { return dirty_card_; }
1543
operator ()(mirror::Object * old,MemberOffset offset,bool is_static) const1544 void operator()([[maybe_unused]] mirror::Object* old,
1545 MemberOffset offset,
1546 [[maybe_unused]] bool is_static) const ALWAYS_INLINE
1547 REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
1548 bool update = true;
1549 if (kCheckBegin || kCheckEnd) {
1550 uint8_t* ref = reinterpret_cast<uint8_t*>(obj_) + offset.Int32Value();
1551 update = (!kCheckBegin || ref >= begin_) && (!kCheckEnd || ref < end_);
1552 }
1553 if (update) {
1554 mirror::Object* new_ref =
1555 collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_);
1556 CheckShouldDirtyCard(new_ref);
1557 }
1558 }
1559
1560 // For object arrays we don't need to check boundaries here as it's done in
1561 // VisitReferenes().
1562 // TODO: Optimize reference updating using SIMD instructions. Object arrays
1563 // are perfect as all references are tightly packed.
operator ()(mirror::Object * old,MemberOffset offset,bool is_static,bool is_obj_array) const1564 void operator()([[maybe_unused]] mirror::Object* old,
1565 MemberOffset offset,
1566 [[maybe_unused]] bool is_static,
1567 [[maybe_unused]] bool is_obj_array) const ALWAYS_INLINE
1568 REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
1569 mirror::Object* new_ref =
1570 collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_);
1571 CheckShouldDirtyCard(new_ref);
1572 }
1573
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const1574 void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
1575 ALWAYS_INLINE
1576 REQUIRES_SHARED(Locks::mutator_lock_) {
1577 if (!root->IsNull()) {
1578 VisitRoot(root);
1579 }
1580 }
1581
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const1582 void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
1583 ALWAYS_INLINE
1584 REQUIRES_SHARED(Locks::mutator_lock_) {
1585 mirror::Object* new_ref = collector_->UpdateRoot(root, moving_space_begin_, moving_space_end_);
1586 CheckShouldDirtyCard(new_ref);
1587 }
1588
1589 private:
CheckShouldDirtyCard(mirror::Object * ref) const1590 inline void CheckShouldDirtyCard(mirror::Object* ref) const {
1591 if (kDirtyOldToMid && !dirty_card_) {
1592 // moving_space_end_ is young-gen's end.
1593 dirty_card_ = reinterpret_cast<uint8_t*>(ref) >= young_gen_begin_ &&
1594 reinterpret_cast<uint8_t*>(ref) < moving_space_end_;
1595 }
1596 }
1597
1598 MarkCompact* const collector_;
1599 uint8_t* const moving_space_begin_;
1600 uint8_t* const moving_space_end_;
1601 uint8_t* const young_gen_begin_;
1602 mirror::Object* const obj_;
1603 uint8_t* const begin_;
1604 uint8_t* const end_;
1605 mutable bool dirty_card_;
1606 };
1607
SetBitForMidToOldPromotion(uint8_t * obj)1608 inline void MarkCompact::SetBitForMidToOldPromotion(uint8_t* obj) {
1609 DCHECK(use_generational_);
1610 DCHECK_GE(obj, old_gen_end_);
1611 DCHECK_LT(obj, mid_gen_end_);
1612 // This doesn't need to be atomic as every thread only sets bits in the
1613 // bit_vector words corresponding to the page it is compacting.
1614 mid_to_old_promo_bit_vec_->SetBit((obj - old_gen_end_) / kObjectAlignment);
1615 }
1616
IsValidObject(mirror::Object * obj) const1617 bool MarkCompact::IsValidObject(mirror::Object* obj) const {
1618 mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
1619 if (!heap_->GetVerification()->IsValidHeapObjectAddress(klass)) {
1620 return false;
1621 }
1622 return heap_->GetVerification()->IsValidClassUnchecked<kWithFromSpaceBarrier>(
1623 obj->GetClass<kVerifyNone, kWithFromSpaceBarrier>());
1624 }
1625
1626 template <typename Callback>
VerifyObject(mirror::Object * ref,Callback & callback) const1627 void MarkCompact::VerifyObject(mirror::Object* ref, Callback& callback) const {
1628 if (kIsDebugBuild) {
1629 mirror::Class* klass = ref->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1630 mirror::Class* pre_compact_klass = ref->GetClass<kVerifyNone, kWithoutReadBarrier>();
1631 mirror::Class* klass_klass = klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1632 mirror::Class* klass_klass_klass = klass_klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1633 if (HasAddress(pre_compact_klass) &&
1634 reinterpret_cast<uint8_t*>(pre_compact_klass) < black_allocations_begin_) {
1635 CHECK(moving_space_bitmap_->Test(pre_compact_klass))
1636 << "ref=" << ref
1637 << " post_compact_end=" << static_cast<void*>(post_compact_end_)
1638 << " pre_compact_klass=" << pre_compact_klass
1639 << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
1640 if (!young_gen_) {
1641 CHECK(live_words_bitmap_->Test(pre_compact_klass));
1642 }
1643 }
1644 if (!IsValidObject(ref)) {
1645 std::ostringstream oss;
1646 oss << "Invalid object: "
1647 << "ref=" << ref
1648 << " klass=" << klass
1649 << " klass_klass=" << klass_klass
1650 << " klass_klass_klass=" << klass_klass_klass
1651 << " pre_compact_klass=" << pre_compact_klass
1652 << " from_space_begin=" << static_cast<void*>(from_space_begin_)
1653 << " pre_compact_begin=" << static_cast<void*>(bump_pointer_space_->Begin())
1654 << " post_compact_end=" << static_cast<void*>(post_compact_end_)
1655 << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
1656
1657 // Call callback before dumping larger data like RAM and space dumps.
1658 callback(oss);
1659
1660 oss << " \nobject="
1661 << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(ref), 128)
1662 << " \nklass(from)="
1663 << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(klass), 128)
1664 << "spaces:\n";
1665 heap_->DumpSpaces(oss);
1666 LOG(FATAL) << oss.str();
1667 }
1668 }
1669 }
1670
1671 template <bool kSetupForGenerational>
CompactPage(mirror::Object * obj,uint32_t offset,uint8_t * addr,uint8_t * to_space_addr,bool needs_memset_zero)1672 void MarkCompact::CompactPage(mirror::Object* obj,
1673 uint32_t offset,
1674 uint8_t* addr,
1675 uint8_t* to_space_addr,
1676 bool needs_memset_zero) {
1677 DCHECK_ALIGNED_PARAM(to_space_addr, gPageSize);
1678 DCHECK(moving_space_bitmap_->Test(obj)
1679 && live_words_bitmap_->Test(obj));
1680 DCHECK(live_words_bitmap_->Test(offset)) << "obj=" << obj
1681 << " offset=" << offset
1682 << " addr=" << static_cast<void*>(addr)
1683 << " black_allocs_begin="
1684 << static_cast<void*>(black_allocations_begin_)
1685 << " post_compact_addr="
1686 << static_cast<void*>(post_compact_end_);
1687 accounting::CardTable* card_table = heap_->GetCardTable();
1688 uint8_t* const start_addr = addr;
1689 // We need to find the cards in the mid-gen (which is going to be consumed
1690 // into old-gen after this GC) for dirty cards (dirtied after marking-pause and
1691 // until compaction pause) and dirty the corresponding post-compact cards. We
1692 // could have found reference fields while updating them in RefsUpdateVisitor.
1693 // But it will not catch native-roots and hence we need to directly look at the
1694 // pre-compact card-table.
1695 // NOTE: we may get some false-positives if the same address in post-compact
1696 // heap is already allocated as TLAB and has been having write-barrers be
1697 // called. But that is not harmful.
1698 size_t cards_per_page = gPageSize >> accounting::CardTable::kCardShift;
1699 size_t dest_cards = 0;
1700 DCHECK(IsAligned<accounting::CardTable::kCardSize>(gPageSize));
1701 static_assert(sizeof(dest_cards) * kBitsPerByte >=
1702 kMaxPageSize / accounting::CardTable::kCardSize);
1703 // How many distinct live-strides do we have.
1704 size_t stride_count = 0;
1705 uint8_t* last_stride = addr;
1706 uint32_t last_stride_begin = 0;
1707 auto verify_obj_callback = [&](std::ostream& os) {
1708 os << " stride_count=" << stride_count << " last_stride=" << static_cast<void*>(last_stride)
1709 << " offset=" << offset << " start_addr=" << static_cast<void*>(start_addr);
1710 };
1711 live_words_bitmap_->VisitLiveStrides(
1712 offset,
1713 black_allocations_begin_,
1714 gPageSize,
1715 [&](uint32_t stride_begin, size_t stride_size, [[maybe_unused]] bool is_last)
1716 REQUIRES_SHARED(Locks::mutator_lock_) {
1717 size_t stride_in_bytes = stride_size * kAlignment;
1718 size_t stride_begin_bytes = stride_begin * kAlignment;
1719 DCHECK_LE(stride_in_bytes, gPageSize);
1720 last_stride_begin = stride_begin;
1721 DCHECK(IsAligned<kAlignment>(addr));
1722 memcpy(addr, from_space_begin_ + stride_begin_bytes, stride_in_bytes);
1723 if (kIsDebugBuild) {
1724 uint8_t* space_begin = bump_pointer_space_->Begin();
1725 // We can interpret the first word of the stride as an
1726 // obj only from second stride onwards, as the first
1727 // stride's first-object may have started on previous
1728 // page. The only exception is the first page of the
1729 // moving space.
1730 if (stride_count > 0 || stride_begin * kAlignment < gPageSize) {
1731 mirror::Object* o =
1732 reinterpret_cast<mirror::Object*>(space_begin + stride_begin * kAlignment);
1733 CHECK(live_words_bitmap_->Test(o)) << "ref=" << o;
1734 CHECK(moving_space_bitmap_->Test(o))
1735 << "ref=" << o << " bitmap: " << moving_space_bitmap_->DumpMemAround(o);
1736 VerifyObject(reinterpret_cast<mirror::Object*>(addr), verify_obj_callback);
1737 }
1738 }
1739 last_stride = addr;
1740 stride_count++;
1741 if (kSetupForGenerational) {
1742 // Card idx within the gPageSize sized destination page.
1743 size_t dest_card_idx = (addr - start_addr) >> accounting::CardTable::kCardShift;
1744 DCHECK_LT(dest_card_idx, cards_per_page);
1745 // Bytes remaining to fill in the current dest card.
1746 size_t dest_bytes_remaining = accounting::CardTable::kCardSize -
1747 (addr - start_addr) % accounting::CardTable::kCardSize;
1748 // Update 'addr' for next stride before starting to modify
1749 // 'stride_in_bytes' in the loops below.
1750 addr += stride_in_bytes;
1751 // Unconsumed bytes in the current src card.
1752 size_t src_card_bytes = accounting::CardTable::kCardSize -
1753 stride_begin_bytes % accounting::CardTable::kCardSize;
1754 src_card_bytes = std::min(src_card_bytes, stride_in_bytes);
1755 uint8_t* end_card = card_table->CardFromAddr(
1756 moving_space_begin_ + stride_begin_bytes + stride_in_bytes - 1);
1757 for (uint8_t* card =
1758 card_table->CardFromAddr(moving_space_begin_ + stride_begin_bytes);
1759 card <= end_card;
1760 card++) {
1761 if (*card == accounting::CardTable::kCardDirty) {
1762 // If the current src card will contribute to the next dest
1763 // card as well, then dirty the next one too.
1764 size_t val = dest_bytes_remaining < src_card_bytes ? 3 : 1;
1765 dest_cards |= val << dest_card_idx;
1766 }
1767 // Adjust destination card and its remaining bytes for next iteration.
1768 if (dest_bytes_remaining <= src_card_bytes) {
1769 dest_bytes_remaining =
1770 accounting::CardTable::kCardSize - (src_card_bytes - dest_bytes_remaining);
1771 dest_card_idx++;
1772 } else {
1773 dest_bytes_remaining -= src_card_bytes;
1774 }
1775 DCHECK_LE(dest_card_idx, cards_per_page);
1776 stride_in_bytes -= src_card_bytes;
1777 src_card_bytes = std::min(accounting::CardTable::kCardSize, stride_in_bytes);
1778 }
1779 } else {
1780 addr += stride_in_bytes;
1781 }
1782 });
1783 DCHECK_LT(last_stride, start_addr + gPageSize);
1784 DCHECK_GT(stride_count, 0u);
1785 size_t obj_size = 0;
1786 uint32_t offset_within_obj =
1787 offset * kAlignment - (reinterpret_cast<uint8_t*>(obj) - moving_space_begin_);
1788 // First object
1789 if (offset_within_obj > 0) {
1790 bool should_dirty_card;
1791 mirror::Object* to_ref = reinterpret_cast<mirror::Object*>(start_addr - offset_within_obj);
1792 mirror::Object* from_obj = GetFromSpaceAddr(obj);
1793 mirror::Object* post_compact_obj = nullptr;
1794 if (kSetupForGenerational) {
1795 post_compact_obj = PostCompactAddress(obj, black_dense_end_, moving_space_end_);
1796 }
1797 if (stride_count > 1) {
1798 RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ false, kSetupForGenerational> visitor(
1799 this, to_ref, start_addr, nullptr, card_table, post_compact_obj);
1800 obj_size =
1801 from_obj->VisitRefsForCompaction</*kFetchObjSize*/ true, /*kVisitNativeRoots*/ false>(
1802 visitor, MemberOffset(offset_within_obj), MemberOffset(-1));
1803 should_dirty_card = visitor.ShouldDirtyCard();
1804 } else {
1805 RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ true, kSetupForGenerational> visitor(
1806 this, to_ref, start_addr, start_addr + gPageSize, card_table, post_compact_obj);
1807 obj_size =
1808 from_obj->VisitRefsForCompaction</*kFetchObjSize*/ true, /*kVisitNativeRoots*/ false>(
1809 visitor,
1810 MemberOffset(offset_within_obj),
1811 MemberOffset(offset_within_obj + gPageSize));
1812 should_dirty_card = visitor.ShouldDirtyCard();
1813 }
1814 if (kSetupForGenerational && should_dirty_card) {
1815 card_table->MarkCard(post_compact_obj);
1816 }
1817 obj_size = RoundUp(obj_size, kAlignment);
1818 DCHECK_GT(obj_size, offset_within_obj)
1819 << "obj:" << obj
1820 << " class:" << from_obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
1821 << " to_addr:" << to_ref
1822 << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
1823 << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
1824 << " offset:" << offset * kAlignment << " class-after-obj-iter:"
1825 << (class_after_obj_iter_ != class_after_obj_map_.rend()
1826 ? class_after_obj_iter_->first.AsMirrorPtr()
1827 : nullptr)
1828 << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
1829 << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
1830 << " offset-of-last-idx:"
1831 << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
1832 << " first-obj-of-last-idx:"
1833 << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
1834
1835 obj_size -= offset_within_obj;
1836 // If there is only one stride, then adjust last_stride_begin to the
1837 // end of the first object.
1838 if (stride_count == 1) {
1839 last_stride_begin += obj_size / kAlignment;
1840 }
1841 }
1842
1843 // Except for the last page being compacted, the pages will have addr ==
1844 // start_addr + gPageSize.
1845 uint8_t* const end_addr = addr;
1846 addr = start_addr;
1847 size_t bytes_done = obj_size;
1848 // All strides except the last one can be updated without any boundary
1849 // checks.
1850 DCHECK_LE(addr, last_stride);
1851 size_t bytes_to_visit = last_stride - addr;
1852 DCHECK_LE(bytes_to_visit, gPageSize);
1853 while (bytes_to_visit > bytes_done) {
1854 mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
1855 VerifyObject(ref, verify_obj_callback);
1856 RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ false, kSetupForGenerational> visitor(
1857 this,
1858 ref,
1859 nullptr,
1860 nullptr,
1861 dest_cards & (1 << (bytes_done >> accounting::CardTable::kCardShift)));
1862 obj_size = ref->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
1863 if (kSetupForGenerational) {
1864 SetBitForMidToOldPromotion(to_space_addr + bytes_done);
1865 if (visitor.ShouldDirtyCard()) {
1866 card_table->MarkCard(reinterpret_cast<mirror::Object*>(to_space_addr + bytes_done));
1867 }
1868 }
1869 obj_size = RoundUp(obj_size, kAlignment);
1870 bytes_done += obj_size;
1871 }
1872 // Last stride may have multiple objects in it and we don't know where the
1873 // last object which crosses the page boundary starts, therefore check
1874 // page-end in all of these objects. Also, we need to call
1875 // VisitRefsForCompaction() with from-space object as we fetch object size,
1876 // which in case of klass requires 'class_size_'.
1877 uint8_t* from_addr = from_space_begin_ + last_stride_begin * kAlignment;
1878 bytes_to_visit = end_addr - addr;
1879 DCHECK_LE(bytes_to_visit, gPageSize);
1880 while (bytes_to_visit > bytes_done) {
1881 mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
1882 obj = reinterpret_cast<mirror::Object*>(from_addr);
1883 VerifyObject(ref, verify_obj_callback);
1884 RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true, kSetupForGenerational> visitor(
1885 this,
1886 ref,
1887 nullptr,
1888 start_addr + gPageSize,
1889 dest_cards & (1 << (bytes_done >> accounting::CardTable::kCardShift)));
1890 obj_size = obj->VisitRefsForCompaction(visitor,
1891 MemberOffset(0),
1892 MemberOffset(end_addr - (addr + bytes_done)));
1893 if (kSetupForGenerational) {
1894 SetBitForMidToOldPromotion(to_space_addr + bytes_done);
1895 if (visitor.ShouldDirtyCard()) {
1896 card_table->MarkCard(reinterpret_cast<mirror::Object*>(to_space_addr + bytes_done));
1897 }
1898 }
1899 obj_size = RoundUp(obj_size, kAlignment);
1900 DCHECK_GT(obj_size, 0u)
1901 << "from_addr:" << obj
1902 << " from-space-class:" << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
1903 << " to_addr:" << ref
1904 << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
1905 << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
1906 << " offset:" << offset * kAlignment << " bytes_done:" << bytes_done
1907 << " class-after-obj-iter:"
1908 << (class_after_obj_iter_ != class_after_obj_map_.rend() ?
1909 class_after_obj_iter_->first.AsMirrorPtr() :
1910 nullptr)
1911 << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
1912 << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
1913 << " offset-of-last-idx:"
1914 << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
1915 << " first-obj-of-last-idx:"
1916 << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
1917
1918 from_addr += obj_size;
1919 bytes_done += obj_size;
1920 }
1921 // The last page that we compact may have some bytes left untouched in the
1922 // end, we should zero them as the kernel copies at page granularity.
1923 if (needs_memset_zero && UNLIKELY(bytes_done < gPageSize)) {
1924 std::memset(addr + bytes_done, 0x0, gPageSize - bytes_done);
1925 }
1926 }
1927
1928 // We store the starting point (pre_compact_page - first_obj) and first-chunk's
1929 // size. If more TLAB(s) started in this page, then those chunks are identified
1930 // using mark bitmap. All this info is prepared in UpdateMovingSpaceBlackAllocations().
1931 // If we find a set bit in the bitmap, then we copy the remaining page and then
1932 // use the bitmap to visit each object for updating references.
SlideBlackPage(mirror::Object * first_obj,mirror::Object * next_page_first_obj,uint32_t first_chunk_size,uint8_t * const pre_compact_page,uint8_t * dest,bool needs_memset_zero)1933 void MarkCompact::SlideBlackPage(mirror::Object* first_obj,
1934 mirror::Object* next_page_first_obj,
1935 uint32_t first_chunk_size,
1936 uint8_t* const pre_compact_page,
1937 uint8_t* dest,
1938 bool needs_memset_zero) {
1939 DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
1940 size_t bytes_copied;
1941 uint8_t* src_addr = reinterpret_cast<uint8_t*>(GetFromSpaceAddr(first_obj));
1942 uint8_t* pre_compact_addr = reinterpret_cast<uint8_t*>(first_obj);
1943 uint8_t* const pre_compact_page_end = pre_compact_page + gPageSize;
1944 uint8_t* const dest_page_end = dest + gPageSize;
1945
1946 auto verify_obj_callback = [&] (std::ostream& os) {
1947 os << " first_obj=" << first_obj
1948 << " next_page_first_obj=" << next_page_first_obj
1949 << " first_chunk_sie=" << first_chunk_size
1950 << " dest=" << static_cast<void*>(dest)
1951 << " pre_compact_page="
1952 << static_cast<void* const>(pre_compact_page);
1953 };
1954 // We have empty portion at the beginning of the page. Zero it.
1955 if (pre_compact_addr > pre_compact_page) {
1956 bytes_copied = pre_compact_addr - pre_compact_page;
1957 DCHECK_LT(bytes_copied, gPageSize);
1958 if (needs_memset_zero) {
1959 std::memset(dest, 0x0, bytes_copied);
1960 }
1961 dest += bytes_copied;
1962 } else {
1963 bytes_copied = 0;
1964 size_t offset = pre_compact_page - pre_compact_addr;
1965 pre_compact_addr = pre_compact_page;
1966 src_addr += offset;
1967 DCHECK(IsAlignedParam(src_addr, gPageSize));
1968 }
1969 // Copy the first chunk of live words
1970 std::memcpy(dest, src_addr, first_chunk_size);
1971 // Update references in the first chunk. Use object size to find next object.
1972 {
1973 size_t bytes_to_visit = first_chunk_size;
1974 size_t obj_size;
1975 // The first object started in some previous page. So we need to check the
1976 // beginning.
1977 DCHECK_LE(reinterpret_cast<uint8_t*>(first_obj), pre_compact_addr);
1978 size_t offset = pre_compact_addr - reinterpret_cast<uint8_t*>(first_obj);
1979 if (bytes_copied == 0 && offset > 0) {
1980 mirror::Object* to_obj = reinterpret_cast<mirror::Object*>(dest - offset);
1981 mirror::Object* from_obj = reinterpret_cast<mirror::Object*>(src_addr - offset);
1982 // If the next page's first-obj is in this page or nullptr, then we don't
1983 // need to check end boundary
1984 if (next_page_first_obj == nullptr
1985 || (first_obj != next_page_first_obj
1986 && reinterpret_cast<uint8_t*>(next_page_first_obj) <= pre_compact_page_end)) {
1987 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this,
1988 to_obj,
1989 dest,
1990 nullptr);
1991 obj_size = from_obj->VisitRefsForCompaction<
1992 /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
1993 MemberOffset(offset),
1994 MemberOffset(-1));
1995 } else {
1996 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this,
1997 to_obj,
1998 dest,
1999 dest_page_end);
2000 obj_size = from_obj->VisitRefsForCompaction<
2001 /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
2002 MemberOffset(offset),
2003 MemberOffset(offset
2004 + gPageSize));
2005 if (first_obj == next_page_first_obj) {
2006 // First object is the only object on this page. So there's nothing else left to do.
2007 return;
2008 }
2009 }
2010 obj_size = RoundUp(obj_size, kAlignment);
2011 obj_size -= offset;
2012 dest += obj_size;
2013 bytes_to_visit -= obj_size;
2014 }
2015 bytes_copied += first_chunk_size;
2016 // If the last object in this page is next_page_first_obj, then we need to check end boundary
2017 bool check_last_obj = false;
2018 if (next_page_first_obj != nullptr
2019 && reinterpret_cast<uint8_t*>(next_page_first_obj) < pre_compact_page_end
2020 && bytes_copied == gPageSize) {
2021 size_t diff = pre_compact_page_end - reinterpret_cast<uint8_t*>(next_page_first_obj);
2022 DCHECK_LE(diff, gPageSize);
2023 DCHECK_LE(diff, bytes_to_visit);
2024 bytes_to_visit -= diff;
2025 check_last_obj = true;
2026 }
2027 while (bytes_to_visit > 0) {
2028 mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
2029 VerifyObject(dest_obj, verify_obj_callback);
2030 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(this,
2031 dest_obj,
2032 nullptr,
2033 nullptr);
2034 obj_size = dest_obj->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
2035 obj_size = RoundUp(obj_size, kAlignment);
2036 bytes_to_visit -= obj_size;
2037 dest += obj_size;
2038 }
2039 DCHECK_EQ(bytes_to_visit, 0u);
2040 if (check_last_obj) {
2041 mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
2042 VerifyObject(dest_obj, verify_obj_callback);
2043 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> visitor(this,
2044 dest_obj,
2045 nullptr,
2046 dest_page_end);
2047 mirror::Object* obj = GetFromSpaceAddr(next_page_first_obj);
2048 obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
2049 MemberOffset(0),
2050 MemberOffset(dest_page_end - dest));
2051 return;
2052 }
2053 }
2054
2055 // Probably a TLAB finished on this page and/or a new TLAB started as well.
2056 if (bytes_copied < gPageSize) {
2057 src_addr += first_chunk_size;
2058 pre_compact_addr += first_chunk_size;
2059 // Use mark-bitmap to identify where objects are. First call
2060 // VisitMarkedRange for only the first marked bit. If found, zero all bytes
2061 // until that object and then call memcpy on the rest of the page.
2062 // Then call VisitMarkedRange for all marked bits *after* the one found in
2063 // this invocation. This time to visit references.
2064 uintptr_t start_visit = reinterpret_cast<uintptr_t>(pre_compact_addr);
2065 uintptr_t page_end = reinterpret_cast<uintptr_t>(pre_compact_page_end);
2066 mirror::Object* found_obj = nullptr;
2067 moving_space_bitmap_->VisitMarkedRange</*kVisitOnce*/true>(start_visit,
2068 page_end,
2069 [&found_obj](mirror::Object* obj) {
2070 found_obj = obj;
2071 });
2072 size_t remaining_bytes = gPageSize - bytes_copied;
2073 if (found_obj == nullptr) {
2074 if (needs_memset_zero) {
2075 // No more black objects in this page. Zero the remaining bytes and return.
2076 std::memset(dest, 0x0, remaining_bytes);
2077 }
2078 return;
2079 }
2080 // Copy everything in this page, which includes any zeroed regions
2081 // in-between.
2082 std::memcpy(dest, src_addr, remaining_bytes);
2083 DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
2084 moving_space_bitmap_->VisitMarkedRange(
2085 reinterpret_cast<uintptr_t>(found_obj) + mirror::kObjectHeaderSize,
2086 page_end,
2087 [&found_obj, pre_compact_addr, dest, this, verify_obj_callback] (mirror::Object* obj)
2088 REQUIRES_SHARED(Locks::mutator_lock_) {
2089 ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
2090 mirror::Object* ref = reinterpret_cast<mirror::Object*>(dest + diff);
2091 VerifyObject(ref, verify_obj_callback);
2092 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
2093 visitor(this, ref, nullptr, nullptr);
2094 ref->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
2095 MemberOffset(0),
2096 MemberOffset(-1));
2097 // Remember for next round.
2098 found_obj = obj;
2099 });
2100 // found_obj may have been updated in VisitMarkedRange. Visit the last found
2101 // object.
2102 DCHECK_GT(reinterpret_cast<uint8_t*>(found_obj), pre_compact_addr);
2103 DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
2104 ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
2105 mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest + diff);
2106 VerifyObject(dest_obj, verify_obj_callback);
2107 RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true> visitor(
2108 this, dest_obj, nullptr, dest_page_end);
2109 // Last object could overlap with next page. And if it happens to be a
2110 // class, then we may access something (like static-fields' offsets) which
2111 // is on the next page. Therefore, use from-space's reference.
2112 mirror::Object* obj = GetFromSpaceAddr(found_obj);
2113 obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
2114 visitor, MemberOffset(0), MemberOffset(page_end - reinterpret_cast<uintptr_t>(found_obj)));
2115 }
2116 }
2117
2118 template <uint32_t kYieldMax = 5, uint64_t kSleepUs = 10>
BackOff(uint32_t i)2119 static void BackOff(uint32_t i) {
2120 // TODO: Consider adding x86 PAUSE and/or ARM YIELD here.
2121 if (i <= kYieldMax) {
2122 sched_yield();
2123 } else {
2124 // nanosleep is not in the async-signal-safe list, but bionic implements it
2125 // with a pure system call, so it should be fine.
2126 NanoSleep(kSleepUs * 1000 * (i - kYieldMax));
2127 }
2128 }
2129
ZeropageIoctl(void * addr,size_t length,bool tolerate_eexist,bool tolerate_enoent)2130 size_t MarkCompact::ZeropageIoctl(void* addr,
2131 size_t length,
2132 bool tolerate_eexist,
2133 bool tolerate_enoent) {
2134 int32_t backoff_count = -1;
2135 int32_t max_backoff = 10; // max native priority.
2136 struct uffdio_zeropage uffd_zeropage;
2137 DCHECK(IsAlignedParam(addr, gPageSize));
2138 uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(addr);
2139 uffd_zeropage.range.len = length;
2140 uffd_zeropage.mode = gUffdSupportsMmapTrylock ? UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK : 0;
2141 while (true) {
2142 uffd_zeropage.zeropage = 0;
2143 int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage);
2144 if (ret == 0) {
2145 DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(length));
2146 return length;
2147 } else if (errno == EAGAIN) {
2148 if (uffd_zeropage.zeropage > 0) {
2149 // Contention was observed after acquiring mmap_lock. But the first page
2150 // is already done, which is what we care about.
2151 DCHECK(IsAlignedParam(uffd_zeropage.zeropage, gPageSize));
2152 DCHECK_GE(uffd_zeropage.zeropage, static_cast<ssize_t>(gPageSize));
2153 return uffd_zeropage.zeropage;
2154 } else if (uffd_zeropage.zeropage < 0) {
2155 // mmap_read_trylock() failed due to contention. Back-off and retry.
2156 DCHECK_EQ(uffd_zeropage.zeropage, -EAGAIN);
2157 if (backoff_count == -1) {
2158 int prio = Thread::Current()->GetNativePriority();
2159 DCHECK(prio > 0 && prio <= 10) << prio;
2160 max_backoff -= prio;
2161 backoff_count = 0;
2162 }
2163 if (backoff_count < max_backoff) {
2164 // Using 3 to align 'normal' priority threads with sleep.
2165 BackOff</*kYieldMax=*/3, /*kSleepUs=*/1000>(backoff_count++);
2166 } else {
2167 uffd_zeropage.mode = 0;
2168 }
2169 }
2170 } else if (tolerate_eexist && errno == EEXIST) {
2171 // Ioctl returns the number of bytes it mapped. The page on which EEXIST occurred
2172 // wouldn't be included in it.
2173 return uffd_zeropage.zeropage > 0 ? uffd_zeropage.zeropage + gPageSize : gPageSize;
2174 } else {
2175 CHECK(tolerate_enoent && errno == ENOENT)
2176 << "ioctl_userfaultfd: zeropage failed: " << strerror(errno) << ". addr:" << addr;
2177 return 0;
2178 }
2179 }
2180 }
2181
CopyIoctl(void * dst,void * buffer,size_t length,bool return_on_contention,bool tolerate_enoent)2182 size_t MarkCompact::CopyIoctl(
2183 void* dst, void* buffer, size_t length, bool return_on_contention, bool tolerate_enoent) {
2184 int32_t backoff_count = -1;
2185 int32_t max_backoff = 10; // max native priority.
2186 struct uffdio_copy uffd_copy;
2187 uffd_copy.mode = gUffdSupportsMmapTrylock ? UFFDIO_COPY_MODE_MMAP_TRYLOCK : 0;
2188 uffd_copy.src = reinterpret_cast<uintptr_t>(buffer);
2189 uffd_copy.dst = reinterpret_cast<uintptr_t>(dst);
2190 uffd_copy.len = length;
2191 uffd_copy.copy = 0;
2192 while (true) {
2193 int ret = ioctl(uffd_, UFFDIO_COPY, &uffd_copy);
2194 if (ret == 0) {
2195 DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(length));
2196 break;
2197 } else if (errno == EAGAIN) {
2198 // Contention observed.
2199 DCHECK_NE(uffd_copy.copy, 0);
2200 if (uffd_copy.copy > 0) {
2201 // Contention was observed after acquiring mmap_lock.
2202 DCHECK(IsAlignedParam(uffd_copy.copy, gPageSize));
2203 DCHECK_GE(uffd_copy.copy, static_cast<ssize_t>(gPageSize));
2204 break;
2205 } else {
2206 // mmap_read_trylock() failed due to contention.
2207 DCHECK_EQ(uffd_copy.copy, -EAGAIN);
2208 uffd_copy.copy = 0;
2209 if (return_on_contention) {
2210 break;
2211 }
2212 }
2213 if (backoff_count == -1) {
2214 int prio = Thread::Current()->GetNativePriority();
2215 DCHECK(prio > 0 && prio <= 10) << prio;
2216 max_backoff -= prio;
2217 backoff_count = 0;
2218 }
2219 if (backoff_count < max_backoff) {
2220 // Using 3 to align 'normal' priority threads with sleep.
2221 BackOff</*kYieldMax=*/3, /*kSleepUs=*/1000>(backoff_count++);
2222 } else {
2223 uffd_copy.mode = 0;
2224 }
2225 } else if (errno == EEXIST) {
2226 DCHECK_NE(uffd_copy.copy, 0);
2227 if (uffd_copy.copy < 0) {
2228 uffd_copy.copy = 0;
2229 }
2230 // Ioctl returns the number of bytes it mapped. The page on which EEXIST occurred
2231 // wouldn't be included in it.
2232 uffd_copy.copy += gPageSize;
2233 break;
2234 } else {
2235 CHECK(tolerate_enoent && errno == ENOENT)
2236 << "ioctl_userfaultfd: copy failed: " << strerror(errno) << ". src:" << buffer
2237 << " dst:" << dst;
2238 return uffd_copy.copy > 0 ? uffd_copy.copy : 0;
2239 }
2240 }
2241 return uffd_copy.copy;
2242 }
2243
2244 template <int kMode, typename CompactionFn>
DoPageCompactionWithStateChange(size_t page_idx,uint8_t * to_space_page,uint8_t * page,bool map_immediately,CompactionFn func)2245 bool MarkCompact::DoPageCompactionWithStateChange(size_t page_idx,
2246 uint8_t* to_space_page,
2247 uint8_t* page,
2248 bool map_immediately,
2249 CompactionFn func) {
2250 uint32_t expected_state = static_cast<uint8_t>(PageState::kUnprocessed);
2251 uint32_t desired_state = static_cast<uint8_t>(map_immediately ? PageState::kProcessingAndMapping :
2252 PageState::kProcessing);
2253 // In the concurrent case (kMode != kFallbackMode) we need to ensure that the update
2254 // to moving_spaces_status_[page_idx] is released before the contents of the page are
2255 // made accessible to other threads.
2256 //
2257 // We need acquire ordering here to ensure that when the CAS fails, another thread
2258 // has completed processing the page, which is guaranteed by the release below.
2259 if (kMode == kFallbackMode || moving_pages_status_[page_idx].compare_exchange_strong(
2260 expected_state, desired_state, std::memory_order_acquire)) {
2261 func();
2262 if (kMode == kCopyMode) {
2263 if (map_immediately) {
2264 CopyIoctl(to_space_page,
2265 page,
2266 gPageSize,
2267 /*return_on_contention=*/false,
2268 /*tolerate_enoent=*/false);
2269 // Store is sufficient as no other thread could modify the status at this
2270 // point. Relaxed order is sufficient as the ioctl will act as a fence.
2271 moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
2272 std::memory_order_relaxed);
2273 } else {
2274 // Add the src page's index in the status word.
2275 DCHECK(from_space_map_.HasAddress(page));
2276 DCHECK_LE(static_cast<size_t>(page - from_space_begin_),
2277 std::numeric_limits<uint32_t>::max());
2278 uint32_t store_val = page - from_space_begin_;
2279 DCHECK_EQ(store_val & kPageStateMask, 0u);
2280 store_val |= static_cast<uint8_t>(PageState::kProcessed);
2281 // Store is sufficient as no other thread would modify the status at this point.
2282 moving_pages_status_[page_idx].store(store_val, std::memory_order_release);
2283 }
2284 }
2285 return true;
2286 } else {
2287 // Only GC thread could have set the state to Processed.
2288 DCHECK_NE(expected_state, static_cast<uint8_t>(PageState::kProcessed));
2289 return false;
2290 }
2291 }
2292
FreeFromSpacePages(size_t cur_page_idx,int mode,size_t end_idx_for_mapping)2293 bool MarkCompact::FreeFromSpacePages(size_t cur_page_idx, int mode, size_t end_idx_for_mapping) {
2294 // Thanks to sliding compaction, bump-pointer allocations, and reverse
2295 // compaction (see CompactMovingSpace) the logic here is pretty simple: find
2296 // the to-space page up to which compaction has finished, all the from-space
2297 // pages corresponding to this onwards can be freed. There are some corner
2298 // cases to be taken care of, which are described below.
2299 size_t idx = last_checked_reclaim_page_idx_;
2300 // Find the to-space page up to which the corresponding from-space pages can be
2301 // freed.
2302 for (; idx > cur_page_idx; idx--) {
2303 PageState state = GetMovingPageState(idx - 1);
2304 if (state == PageState::kMutatorProcessing) {
2305 // Some mutator is working on the page.
2306 break;
2307 }
2308 DCHECK(state >= PageState::kProcessed ||
2309 (state == PageState::kUnprocessed &&
2310 (mode == kFallbackMode || idx > moving_first_objs_count_)));
2311 }
2312 DCHECK_LE(idx, last_checked_reclaim_page_idx_);
2313 if (idx == last_checked_reclaim_page_idx_) {
2314 // Nothing to do.
2315 return false;
2316 }
2317
2318 uint8_t* reclaim_begin;
2319 uint8_t* idx_addr;
2320 // Calculate the first from-space page to be freed using 'idx'. If the
2321 // first-object of the idx'th to-space page started before the corresponding
2322 // from-space page, which is almost always the case in the compaction portion
2323 // of the moving-space, then it indicates that the subsequent pages that are
2324 // yet to be compacted will need the from-space pages. Therefore, find the page
2325 // (from the already compacted pages) whose first-object is different from
2326 // ours. All the from-space pages starting from that one are safe to be
2327 // removed. Please note that this iteration is not expected to be long in
2328 // normal cases as objects are smaller than page size.
2329 if (idx >= moving_first_objs_count_) {
2330 // black-allocated portion of the moving-space
2331 idx_addr = black_allocations_begin_ + (idx - moving_first_objs_count_) * gPageSize;
2332 reclaim_begin = idx_addr;
2333 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2334 if (first_obj != nullptr && reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
2335 size_t idx_len = moving_first_objs_count_ + black_page_count_;
2336 for (size_t i = idx + 1; i < idx_len; i++) {
2337 mirror::Object* obj = first_objs_moving_space_[i].AsMirrorPtr();
2338 // A null first-object indicates that the corresponding to-space page is
2339 // not used yet. So we can compute its from-space page and use that.
2340 if (obj != first_obj) {
2341 reclaim_begin = obj != nullptr
2342 ? AlignUp(reinterpret_cast<uint8_t*>(obj), gPageSize)
2343 : (black_allocations_begin_ + (i - moving_first_objs_count_) * gPageSize);
2344 break;
2345 }
2346 }
2347 }
2348 } else {
2349 DCHECK_GE(pre_compact_offset_moving_space_[idx], 0u);
2350 idx_addr = moving_space_begin_ + idx * gPageSize;
2351 if (idx_addr >= black_dense_end_) {
2352 idx_addr = moving_space_begin_ + pre_compact_offset_moving_space_[idx] * kAlignment;
2353 }
2354 reclaim_begin = idx_addr;
2355 DCHECK_LE(reclaim_begin, black_allocations_begin_);
2356 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2357 if (first_obj != nullptr) {
2358 if (reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
2359 DCHECK_LT(idx, moving_first_objs_count_);
2360 mirror::Object* obj = first_obj;
2361 for (size_t i = idx + 1; i < moving_first_objs_count_; i++) {
2362 obj = first_objs_moving_space_[i].AsMirrorPtr();
2363 if (obj == nullptr) {
2364 reclaim_begin = moving_space_begin_ + i * gPageSize;
2365 break;
2366 } else if (first_obj != obj) {
2367 DCHECK_LT(first_obj, obj);
2368 DCHECK_LT(reclaim_begin, reinterpret_cast<uint8_t*>(obj));
2369 reclaim_begin = reinterpret_cast<uint8_t*>(obj);
2370 break;
2371 }
2372 }
2373 if (obj == first_obj) {
2374 reclaim_begin = black_allocations_begin_;
2375 }
2376 }
2377 }
2378 reclaim_begin = AlignUp(reclaim_begin, gPageSize);
2379 }
2380
2381 DCHECK_NE(reclaim_begin, nullptr);
2382 DCHECK_ALIGNED_PARAM(reclaim_begin, gPageSize);
2383 DCHECK_ALIGNED_PARAM(last_reclaimed_page_, gPageSize);
2384 // Check if the 'class_after_obj_map_' map allows pages to be freed.
2385 for (; class_after_obj_iter_ != class_after_obj_map_.rend(); class_after_obj_iter_++) {
2386 mirror::Object* klass = class_after_obj_iter_->first.AsMirrorPtr();
2387 mirror::Class* from_klass = static_cast<mirror::Class*>(GetFromSpaceAddr(klass));
2388 // Check with class' end to ensure that, if required, the entire class survives.
2389 uint8_t* klass_end = reinterpret_cast<uint8_t*>(klass) + from_klass->SizeOf<kVerifyNone>();
2390 DCHECK_LE(klass_end, last_reclaimed_page_);
2391 if (reinterpret_cast<uint8_t*>(klass_end) >= reclaim_begin) {
2392 // Found a class which is in the reclaim range.
2393 if (reinterpret_cast<uint8_t*>(class_after_obj_iter_->second.AsMirrorPtr()) < idx_addr) {
2394 // Its lowest-address object is not compacted yet. Reclaim starting from
2395 // the end of this class.
2396 reclaim_begin = AlignUp(klass_end, gPageSize);
2397 } else {
2398 // Continue consuming pairs wherein the lowest address object has already
2399 // been compacted.
2400 continue;
2401 }
2402 }
2403 // All the remaining class (and thereby corresponding object) addresses are
2404 // lower than the reclaim range.
2405 break;
2406 }
2407 bool all_mapped = mode == kFallbackMode;
2408 ssize_t size = last_reclaimed_page_ - reclaim_begin;
2409 if (size > kMinFromSpaceMadviseSize) {
2410 // Map all the pages in the range.
2411 if (mode == kCopyMode && cur_page_idx < end_idx_for_mapping) {
2412 if (MapMovingSpacePages(cur_page_idx,
2413 end_idx_for_mapping,
2414 /*from_ioctl=*/false,
2415 /*return_on_contention=*/true,
2416 /*tolerate_enoent=*/false) == end_idx_for_mapping - cur_page_idx) {
2417 all_mapped = true;
2418 }
2419 } else {
2420 // This for the black-allocations pages so that madvise is not missed.
2421 all_mapped = true;
2422 }
2423 // If not all pages are mapped, then take it as a hint that mmap_lock is
2424 // contended and hence don't madvise as that also needs the same lock.
2425 if (all_mapped) {
2426 // Retain a few pages for subsequent compactions.
2427 const ssize_t gBufferPages = 4 * gPageSize;
2428 DCHECK_LT(gBufferPages, kMinFromSpaceMadviseSize);
2429 size -= gBufferPages;
2430 uint8_t* addr = last_reclaimed_page_ - size;
2431 CHECK_EQ(madvise(addr + from_space_slide_diff_, size, MADV_DONTNEED), 0)
2432 << "madvise of from-space failed: " << strerror(errno);
2433 last_reclaimed_page_ = addr;
2434 cur_reclaimable_page_ = addr;
2435 }
2436 }
2437 last_reclaimable_page_ = std::min(reclaim_begin, last_reclaimable_page_);
2438 last_checked_reclaim_page_idx_ = idx;
2439 return all_mapped;
2440 }
2441
2442 template <int kMode>
CompactMovingSpace(uint8_t * page)2443 void MarkCompact::CompactMovingSpace(uint8_t* page) {
2444 // For every page we have a starting object, which may have started in some
2445 // preceding page, and an offset within that object from where we must start
2446 // copying.
2447 // Consult the live-words bitmap to copy all contiguously live words at a
2448 // time. These words may constitute multiple objects. To avoid the need for
2449 // consulting mark-bitmap to find where does the next live object start, we
2450 // use the object-size returned by VisitRefsForCompaction.
2451 //
2452 // We do the compaction in reverse direction so that the pages containing
2453 // TLAB and latest allocations are processed first.
2454 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
2455 size_t page_status_arr_len = moving_first_objs_count_ + black_page_count_;
2456 size_t idx = page_status_arr_len;
2457 size_t black_dense_end_idx = (black_dense_end_ - moving_space_begin_) / gPageSize;
2458 uint8_t* to_space_end = moving_space_begin_ + page_status_arr_len * gPageSize;
2459 uint8_t* pre_compact_page = black_allocations_begin_ + (black_page_count_ * gPageSize);
2460
2461 DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
2462
2463 // These variables are maintained by FreeFromSpacePages().
2464 last_reclaimed_page_ = pre_compact_page;
2465 last_reclaimable_page_ = last_reclaimed_page_;
2466 cur_reclaimable_page_ = last_reclaimed_page_;
2467 last_checked_reclaim_page_idx_ = idx;
2468 class_after_obj_iter_ = class_after_obj_map_.rbegin();
2469 // Allocated-black pages
2470 mirror::Object* next_page_first_obj = nullptr;
2471 while (idx > moving_first_objs_count_) {
2472 idx--;
2473 pre_compact_page -= gPageSize;
2474 to_space_end -= gPageSize;
2475 if (kMode == kFallbackMode) {
2476 page = to_space_end;
2477 }
2478 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2479 uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[idx];
2480 if (first_obj != nullptr) {
2481 DoPageCompactionWithStateChange<kMode>(idx,
2482 to_space_end,
2483 page,
2484 /*map_immediately=*/true,
2485 [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2486 SlideBlackPage(first_obj,
2487 next_page_first_obj,
2488 first_chunk_size,
2489 pre_compact_page,
2490 page,
2491 kMode == kCopyMode);
2492 });
2493 // We are sliding here, so no point attempting to madvise for every
2494 // page. Wait for enough pages to be done.
2495 if (idx % DivideByPageSize(kMinFromSpaceMadviseSize) == 0) {
2496 FreeFromSpacePages(idx, kMode, /*end_idx_for_mapping=*/0);
2497 }
2498 }
2499 next_page_first_obj = first_obj;
2500 }
2501 DCHECK_EQ(pre_compact_page, black_allocations_begin_);
2502 // Reserved page to be used if we can't find any reclaimable page for processing.
2503 uint8_t* reserve_page = page;
2504 size_t end_idx_for_mapping = idx;
2505 while (idx > black_dense_end_idx) {
2506 idx--;
2507 to_space_end -= gPageSize;
2508 if (kMode == kFallbackMode) {
2509 page = to_space_end;
2510 } else {
2511 DCHECK_EQ(kMode, kCopyMode);
2512 if (cur_reclaimable_page_ > last_reclaimable_page_) {
2513 cur_reclaimable_page_ -= gPageSize;
2514 page = cur_reclaimable_page_ + from_space_slide_diff_;
2515 } else {
2516 page = reserve_page;
2517 }
2518 }
2519 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2520 bool success = DoPageCompactionWithStateChange<kMode>(
2521 idx,
2522 to_space_end,
2523 page,
2524 /*map_immediately=*/page == reserve_page,
2525 [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2526 if (use_generational_ && to_space_end < mid_gen_end_) {
2527 CompactPage</*kSetupForGenerational=*/true>(first_obj,
2528 pre_compact_offset_moving_space_[idx],
2529 page,
2530 to_space_end,
2531 kMode == kCopyMode);
2532 } else {
2533 CompactPage</*kSetupForGenerational=*/false>(first_obj,
2534 pre_compact_offset_moving_space_[idx],
2535 page,
2536 to_space_end,
2537 kMode == kCopyMode);
2538 }
2539 });
2540 if (kMode == kCopyMode && (!success || page == reserve_page) && end_idx_for_mapping - idx > 1) {
2541 // map the pages in the following address as they can't be mapped with the
2542 // pages yet-to-be-compacted as their src-side pages won't be contiguous.
2543 MapMovingSpacePages(idx + 1,
2544 end_idx_for_mapping,
2545 /*from_fault=*/false,
2546 /*return_on_contention=*/true,
2547 /*tolerate_enoent=*/false);
2548 }
2549 if (FreeFromSpacePages(idx, kMode, end_idx_for_mapping)) {
2550 end_idx_for_mapping = idx;
2551 }
2552 }
2553 while (idx > 0) {
2554 idx--;
2555 to_space_end -= gPageSize;
2556 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2557 if (first_obj != nullptr) {
2558 DoPageCompactionWithStateChange<kMode>(
2559 idx,
2560 to_space_end,
2561 to_space_end + from_space_slide_diff_,
2562 /*map_immediately=*/false,
2563 [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2564 if (use_generational_) {
2565 UpdateNonMovingPage</*kSetupForGenerational=*/true>(
2566 first_obj, to_space_end, from_space_slide_diff_, moving_space_bitmap_);
2567 } else {
2568 UpdateNonMovingPage</*kSetupForGenerational=*/false>(
2569 first_obj, to_space_end, from_space_slide_diff_, moving_space_bitmap_);
2570 }
2571 if (kMode == kFallbackMode) {
2572 memcpy(to_space_end, to_space_end + from_space_slide_diff_, gPageSize);
2573 }
2574 });
2575 } else {
2576 // The page has no reachable object on it. Just declare it mapped.
2577 // Mutators shouldn't step on this page, which is asserted in sigbus
2578 // handler.
2579 DCHECK_EQ(moving_pages_status_[idx].load(std::memory_order_relaxed),
2580 static_cast<uint8_t>(PageState::kUnprocessed));
2581 moving_pages_status_[idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
2582 std::memory_order_release);
2583 }
2584 if (FreeFromSpacePages(idx, kMode, end_idx_for_mapping)) {
2585 end_idx_for_mapping = idx;
2586 }
2587 }
2588 // map one last time to finish anything left.
2589 if (kMode == kCopyMode && end_idx_for_mapping > 0) {
2590 MapMovingSpacePages(idx,
2591 end_idx_for_mapping,
2592 /*from_fault=*/false,
2593 /*return_on_contention=*/false,
2594 /*tolerate_enoent=*/false);
2595 }
2596 DCHECK_EQ(to_space_end, bump_pointer_space_->Begin());
2597 }
2598
MapMovingSpacePages(size_t start_idx,size_t arr_len,bool from_fault,bool return_on_contention,bool tolerate_enoent)2599 size_t MarkCompact::MapMovingSpacePages(size_t start_idx,
2600 size_t arr_len,
2601 bool from_fault,
2602 bool return_on_contention,
2603 bool tolerate_enoent) {
2604 DCHECK_LT(start_idx, arr_len);
2605 size_t arr_idx = start_idx;
2606 bool wait_for_unmapped = false;
2607 while (arr_idx < arr_len) {
2608 size_t map_count = 0;
2609 uint32_t cur_state = moving_pages_status_[arr_idx].load(std::memory_order_acquire);
2610 // Find a contiguous range that can be mapped with single ioctl.
2611 for (uint32_t i = arr_idx, from_page = cur_state & ~kPageStateMask; i < arr_len;
2612 i++, map_count++, from_page += gPageSize) {
2613 uint32_t s = moving_pages_status_[i].load(std::memory_order_acquire);
2614 uint32_t cur_from_page = s & ~kPageStateMask;
2615 if (GetPageStateFromWord(s) != PageState::kProcessed || cur_from_page != from_page) {
2616 break;
2617 }
2618 }
2619
2620 if (map_count == 0) {
2621 if (from_fault) {
2622 bool mapped = GetPageStateFromWord(cur_state) == PageState::kProcessedAndMapped;
2623 return mapped ? 1 : 0;
2624 }
2625 // Skip the pages that this thread cannot map.
2626 for (; arr_idx < arr_len; arr_idx++) {
2627 PageState s = GetMovingPageState(arr_idx);
2628 if (s == PageState::kProcessed) {
2629 break;
2630 } else if (s != PageState::kProcessedAndMapped) {
2631 wait_for_unmapped = true;
2632 }
2633 }
2634 } else {
2635 uint32_t from_space_offset = cur_state & ~kPageStateMask;
2636 uint8_t* to_space_start = moving_space_begin_ + arr_idx * gPageSize;
2637 uint8_t* from_space_start = from_space_begin_ + from_space_offset;
2638 DCHECK_ALIGNED_PARAM(to_space_start, gPageSize);
2639 DCHECK_ALIGNED_PARAM(from_space_start, gPageSize);
2640 size_t mapped_len = CopyIoctl(to_space_start,
2641 from_space_start,
2642 map_count * gPageSize,
2643 return_on_contention,
2644 tolerate_enoent);
2645 for (size_t l = 0; l < mapped_len; l += gPageSize, arr_idx++) {
2646 // Store is sufficient as anyone storing is doing it with the same value.
2647 moving_pages_status_[arr_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
2648 std::memory_order_release);
2649 }
2650 if (from_fault) {
2651 return DivideByPageSize(mapped_len);
2652 }
2653 // We can return from COPY ioctl with a smaller length also if a page
2654 // was found to be already mapped. But that doesn't count as contention.
2655 if (return_on_contention && DivideByPageSize(mapped_len) < map_count && errno != EEXIST) {
2656 return arr_idx - start_idx;
2657 }
2658 }
2659 }
2660 if (wait_for_unmapped) {
2661 for (size_t i = start_idx; i < arr_len; i++) {
2662 PageState s = GetMovingPageState(i);
2663 DCHECK_GT(s, PageState::kProcessed);
2664 uint32_t backoff_count = 0;
2665 while (s != PageState::kProcessedAndMapped) {
2666 BackOff(backoff_count++);
2667 s = GetMovingPageState(i);
2668 }
2669 }
2670 }
2671 return arr_len - start_idx;
2672 }
2673
2674 template <bool kSetupForGenerational>
UpdateNonMovingPage(mirror::Object * first,uint8_t * page,ptrdiff_t from_space_diff,accounting::ContinuousSpaceBitmap * bitmap)2675 void MarkCompact::UpdateNonMovingPage(mirror::Object* first,
2676 uint8_t* page,
2677 ptrdiff_t from_space_diff,
2678 accounting::ContinuousSpaceBitmap* bitmap) {
2679 DCHECK_LT(reinterpret_cast<uint8_t*>(first), page + gPageSize);
2680 accounting::CardTable* card_table = heap_->GetCardTable();
2681 mirror::Object* curr_obj = first;
2682 uint8_t* from_page = page + from_space_diff;
2683 uint8_t* from_page_end = from_page + gPageSize;
2684 uint8_t* scan_begin =
2685 std::max(reinterpret_cast<uint8_t*>(first) + mirror::kObjectHeaderSize, page);
2686 // For every object found in the page, visit the previous object. This ensures
2687 // that we can visit without checking page-end boundary.
2688 // Call VisitRefsForCompaction with from-space read-barrier as the klass object and
2689 // super-class loads require it.
2690 // TODO: Set kVisitNativeRoots to false once we implement concurrent
2691 // compaction
2692 auto obj_visitor = [&](mirror::Object* next_obj) {
2693 if (curr_obj != nullptr) {
2694 mirror::Object* from_obj =
2695 reinterpret_cast<mirror::Object*>(reinterpret_cast<uint8_t*>(curr_obj) + from_space_diff);
2696 bool should_dirty_card;
2697 if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
2698 RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ false, kSetupForGenerational> visitor(
2699 this, from_obj, from_page, from_page_end, card_table, curr_obj);
2700 MemberOffset begin_offset(page - reinterpret_cast<uint8_t*>(curr_obj));
2701 // Native roots shouldn't be visited as they are done when this
2702 // object's beginning was visited in the preceding page.
2703 from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ false>(
2704 visitor, begin_offset, MemberOffset(-1));
2705 should_dirty_card = visitor.ShouldDirtyCard();
2706 } else {
2707 RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ false, kSetupForGenerational>
2708 visitor(this, from_obj, from_page, from_page_end, card_table, curr_obj);
2709 from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
2710 visitor, MemberOffset(0), MemberOffset(-1));
2711 should_dirty_card = visitor.ShouldDirtyCard();
2712 }
2713 if (kSetupForGenerational && should_dirty_card) {
2714 card_table->MarkCard(curr_obj);
2715 }
2716 }
2717 curr_obj = next_obj;
2718 };
2719
2720 if (young_gen_) {
2721 DCHECK(bitmap->Test(first));
2722 // If the first-obj is covered by the same card which also covers the first
2723 // word of the page, then it's important to set curr_obj to nullptr to avoid
2724 // updating the references twice.
2725 if (card_table->IsClean(first) ||
2726 card_table->CardFromAddr(first) == card_table->CardFromAddr(scan_begin)) {
2727 curr_obj = nullptr;
2728 }
2729 // We cannot acquire heap-bitmap-lock here as this function is called from
2730 // SIGBUS handler. But it's safe as the bitmap passed to Scan function
2731 // can't get modified until this GC cycle is finished.
2732 FakeMutexLock mu(*Locks::heap_bitmap_lock_);
2733 card_table->Scan</*kClearCard=*/false>(
2734 bitmap, scan_begin, page + gPageSize, obj_visitor, accounting::CardTable::kCardAged2);
2735 } else {
2736 bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(scan_begin),
2737 reinterpret_cast<uintptr_t>(page + gPageSize),
2738 obj_visitor);
2739 }
2740
2741 if (curr_obj != nullptr) {
2742 bool should_dirty_card;
2743 mirror::Object* from_obj =
2744 reinterpret_cast<mirror::Object*>(reinterpret_cast<uint8_t*>(curr_obj) + from_space_diff);
2745 MemberOffset end_offset(page + gPageSize - reinterpret_cast<uint8_t*>(curr_obj));
2746 if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
2747 RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ true, kSetupForGenerational> visitor(
2748 this, from_obj, from_page, from_page_end, card_table, curr_obj);
2749 from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ false>(
2750 visitor, MemberOffset(page - reinterpret_cast<uint8_t*>(curr_obj)), end_offset);
2751 should_dirty_card = visitor.ShouldDirtyCard();
2752 } else {
2753 RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true, kSetupForGenerational> visitor(
2754 this, from_obj, from_page, from_page_end, card_table, curr_obj);
2755 from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
2756 visitor, MemberOffset(0), end_offset);
2757 should_dirty_card = visitor.ShouldDirtyCard();
2758 }
2759 if (kSetupForGenerational && should_dirty_card) {
2760 card_table->MarkCard(curr_obj);
2761 }
2762 }
2763 }
2764
UpdateNonMovingSpace()2765 void MarkCompact::UpdateNonMovingSpace() {
2766 TimingLogger::ScopedTiming t("(Paused)UpdateNonMovingSpace", GetTimings());
2767 // Iterating in reverse ensures that the class pointer in objects which span
2768 // across more than one page gets updated in the end. This is necessary for
2769 // VisitRefsForCompaction() to work correctly.
2770 // TODO: If and when we make non-moving space update concurrent, implement a
2771 // mechanism to remember class pointers for such objects off-heap and pass it
2772 // to VisitRefsForCompaction().
2773 uint8_t* page = non_moving_space_->Begin() + non_moving_first_objs_count_ * gPageSize;
2774 for (ssize_t i = non_moving_first_objs_count_ - 1; i >= 0; i--) {
2775 mirror::Object* obj = first_objs_non_moving_space_[i].AsMirrorPtr();
2776 page -= gPageSize;
2777 // null means there are no objects on the page to update references.
2778 if (obj != nullptr) {
2779 if (use_generational_) {
2780 UpdateNonMovingPage</*kSetupForGenerational=*/true>(
2781 obj, page, /*from_space_diff=*/0, non_moving_space_bitmap_);
2782 } else {
2783 UpdateNonMovingPage</*kSetupForGenerational=*/false>(
2784 obj, page, /*from_space_diff=*/0, non_moving_space_bitmap_);
2785 }
2786 }
2787 }
2788 }
2789
UpdateMovingSpaceBlackAllocations()2790 void MarkCompact::UpdateMovingSpaceBlackAllocations() {
2791 // For sliding black pages, we need the first-object, which overlaps with the
2792 // first byte of the page. Additionally, we compute the size of first chunk of
2793 // black objects. This will suffice for most black pages. Unlike, compaction
2794 // pages, here we don't need to pre-compute the offset within first-obj from
2795 // where sliding has to start. That can be calculated using the pre-compact
2796 // address of the page. Therefore, to save space, we store the first chunk's
2797 // size in black_alloc_pages_first_chunk_size_ array.
2798 // For the pages which may have holes after the first chunk, which could happen
2799 // if a new TLAB starts in the middle of the page, we mark the objects in
2800 // the mark-bitmap. So, if the first-chunk size is smaller than gPageSize,
2801 // then we use the mark-bitmap for the remainder of the page.
2802 uint8_t* const begin = bump_pointer_space_->Begin();
2803 uint8_t* black_allocs = black_allocations_begin_;
2804 DCHECK_LE(begin, black_allocs);
2805 size_t consumed_blocks_count = 0;
2806 size_t first_block_size;
2807 // Needed only for debug at the end of the function. Hopefully compiler will
2808 // eliminate it otherwise.
2809 size_t num_blocks = 0;
2810 // Get the list of all blocks allocated in the bump-pointer space.
2811 std::vector<size_t>* block_sizes = bump_pointer_space_->GetBlockSizes(thread_running_gc_,
2812 &first_block_size);
2813 DCHECK_LE(first_block_size, (size_t)(black_allocs - begin));
2814 if (block_sizes != nullptr) {
2815 size_t black_page_idx = moving_first_objs_count_;
2816 uint8_t* block_end = begin + first_block_size;
2817 uint32_t remaining_chunk_size = 0;
2818 uint32_t first_chunk_size = 0;
2819 mirror::Object* first_obj = nullptr;
2820 num_blocks = block_sizes->size();
2821 for (size_t block_size : *block_sizes) {
2822 block_end += block_size;
2823 // Skip the blocks that are prior to the black allocations. These will be
2824 // merged with the main-block later.
2825 if (black_allocs >= block_end) {
2826 consumed_blocks_count++;
2827 continue;
2828 }
2829 mirror::Object* obj = reinterpret_cast<mirror::Object*>(black_allocs);
2830 bool set_mark_bit = remaining_chunk_size > 0;
2831 // We don't know how many objects are allocated in the current block. When we hit
2832 // a null assume it's the end. This works as every block is expected to
2833 // have objects allocated linearly using bump-pointer.
2834 // BumpPointerSpace::Walk() also works similarly.
2835 while (black_allocs < block_end
2836 && obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) {
2837 // Try to keep instructions which access class instance together to
2838 // avoid reloading the pointer from object.
2839 size_t obj_size = obj->SizeOf();
2840 bytes_scanned_ += obj_size;
2841 obj_size = RoundUp(obj_size, kAlignment);
2842 UpdateClassAfterObjectMap(obj);
2843 if (first_obj == nullptr) {
2844 first_obj = obj;
2845 }
2846 // We only need the mark-bitmap in the pages wherein a new TLAB starts in
2847 // the middle of the page.
2848 if (set_mark_bit) {
2849 moving_space_bitmap_->Set(obj);
2850 }
2851 // Handle objects which cross page boundary, including objects larger
2852 // than page size.
2853 if (remaining_chunk_size + obj_size >= gPageSize) {
2854 set_mark_bit = false;
2855 first_chunk_size += gPageSize - remaining_chunk_size;
2856 remaining_chunk_size += obj_size;
2857 // We should not store first-object and remaining_chunk_size if there were
2858 // unused bytes before this TLAB, in which case we must have already
2859 // stored the values (below).
2860 if (black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
2861 black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2862 first_objs_moving_space_[black_page_idx].Assign(first_obj);
2863 }
2864 black_page_idx++;
2865 remaining_chunk_size -= gPageSize;
2866 // Consume an object larger than page size.
2867 while (remaining_chunk_size >= gPageSize) {
2868 black_alloc_pages_first_chunk_size_[black_page_idx] = gPageSize;
2869 first_objs_moving_space_[black_page_idx].Assign(obj);
2870 black_page_idx++;
2871 remaining_chunk_size -= gPageSize;
2872 }
2873 first_obj = remaining_chunk_size > 0 ? obj : nullptr;
2874 first_chunk_size = remaining_chunk_size;
2875 } else {
2876 DCHECK_LE(first_chunk_size, remaining_chunk_size);
2877 first_chunk_size += obj_size;
2878 remaining_chunk_size += obj_size;
2879 }
2880 black_allocs += obj_size;
2881 obj = reinterpret_cast<mirror::Object*>(black_allocs);
2882 }
2883 DCHECK_LE(black_allocs, block_end);
2884 DCHECK_LT(remaining_chunk_size, gPageSize);
2885 // consume the unallocated portion of the block
2886 if (black_allocs < block_end) {
2887 // first-chunk of the current page ends here. Store it.
2888 if (first_chunk_size > 0 && black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
2889 black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2890 first_objs_moving_space_[black_page_idx].Assign(first_obj);
2891 }
2892 first_chunk_size = 0;
2893 first_obj = nullptr;
2894 size_t page_remaining = gPageSize - remaining_chunk_size;
2895 size_t block_remaining = block_end - black_allocs;
2896 if (page_remaining <= block_remaining) {
2897 block_remaining -= page_remaining;
2898 // current page and the subsequent empty pages in the block
2899 black_page_idx += 1 + DivideByPageSize(block_remaining);
2900 remaining_chunk_size = ModuloPageSize(block_remaining);
2901 } else {
2902 remaining_chunk_size += block_remaining;
2903 }
2904 black_allocs = block_end;
2905 }
2906 }
2907 if (black_page_idx < DivideByPageSize(bump_pointer_space_->Size())) {
2908 // Store the leftover first-chunk, if any, and update page index.
2909 if (black_alloc_pages_first_chunk_size_[black_page_idx] > 0) {
2910 black_page_idx++;
2911 } else if (first_chunk_size > 0) {
2912 black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2913 first_objs_moving_space_[black_page_idx].Assign(first_obj);
2914 black_page_idx++;
2915 }
2916 }
2917 black_page_count_ = black_page_idx - moving_first_objs_count_;
2918 delete block_sizes;
2919 }
2920 // Update bump-pointer space by consuming all the pre-black blocks into the
2921 // main one.
2922 bump_pointer_space_->SetBlockSizes(thread_running_gc_,
2923 post_compact_end_ - begin,
2924 consumed_blocks_count);
2925 if (kIsDebugBuild) {
2926 size_t moving_space_size = bump_pointer_space_->Size();
2927 size_t los_size = 0;
2928 if (heap_->GetLargeObjectsSpace()) {
2929 los_size = heap_->GetLargeObjectsSpace()->GetBytesAllocated();
2930 }
2931 // The moving-space size is already updated to post-compact size in SetBlockSizes above.
2932 // Also, bytes-allocated has already been adjusted with large-object space' freed-bytes
2933 // in Sweep(), but not with moving-space freed-bytes.
2934 CHECK_GE(heap_->GetBytesAllocated() - black_objs_slide_diff_, moving_space_size + los_size)
2935 << " moving-space size:" << moving_space_size
2936 << " moving-space bytes-freed:" << black_objs_slide_diff_
2937 << " large-object-space size:" << los_size
2938 << " large-object-space bytes-freed:" << GetCurrentIteration()->GetFreedLargeObjectBytes()
2939 << " num-tlabs-merged:" << consumed_blocks_count
2940 << " main-block-size:" << (post_compact_end_ - begin)
2941 << " total-tlabs-moving-space:" << num_blocks;
2942 }
2943 }
2944
UpdateNonMovingSpaceBlackAllocations()2945 void MarkCompact::UpdateNonMovingSpaceBlackAllocations() {
2946 accounting::ObjectStack* stack = heap_->GetAllocationStack();
2947 const StackReference<mirror::Object>* limit = stack->End();
2948 uint8_t* const space_begin = non_moving_space_->Begin();
2949 size_t num_pages = DivideByPageSize(non_moving_space_->Capacity());
2950 for (StackReference<mirror::Object>* it = stack->Begin(); it != limit; ++it) {
2951 mirror::Object* obj = it->AsMirrorPtr();
2952 if (obj != nullptr && non_moving_space_bitmap_->HasAddress(obj)) {
2953 non_moving_space_bitmap_->Set(obj);
2954 if (!use_generational_) {
2955 // Clear so that we don't try to set the bit again in the next GC-cycle.
2956 it->Clear();
2957 }
2958 size_t idx = DivideByPageSize(reinterpret_cast<uint8_t*>(obj) - space_begin);
2959 uint8_t* page_begin = AlignDown(reinterpret_cast<uint8_t*>(obj), gPageSize);
2960 mirror::Object* first_obj = first_objs_non_moving_space_[idx].AsMirrorPtr();
2961 if (first_obj == nullptr
2962 || (obj < first_obj && reinterpret_cast<uint8_t*>(first_obj) > page_begin)) {
2963 first_objs_non_moving_space_[idx].Assign(obj);
2964 }
2965 if (++idx == num_pages) {
2966 continue;
2967 }
2968 mirror::Object* next_page_first_obj = first_objs_non_moving_space_[idx].AsMirrorPtr();
2969 uint8_t* next_page_begin = page_begin + gPageSize;
2970 if (next_page_first_obj == nullptr
2971 || reinterpret_cast<uint8_t*>(next_page_first_obj) > next_page_begin) {
2972 size_t obj_size = RoundUp(obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
2973 uint8_t* obj_end = reinterpret_cast<uint8_t*>(obj) + obj_size;
2974 while (next_page_begin < obj_end) {
2975 first_objs_non_moving_space_[idx++].Assign(obj);
2976 next_page_begin += gPageSize;
2977 }
2978 }
2979 // update first_objs count in case we went past non_moving_first_objs_count_
2980 non_moving_first_objs_count_ = std::max(non_moving_first_objs_count_, idx);
2981 }
2982 }
2983 }
2984
2985 class MarkCompact::ImmuneSpaceUpdateObjVisitor {
2986 public:
ImmuneSpaceUpdateObjVisitor(MarkCompact * collector)2987 explicit ImmuneSpaceUpdateObjVisitor(MarkCompact* collector) : collector_(collector) {}
2988
operator ()(mirror::Object * obj) const2989 void operator()(mirror::Object* obj) const ALWAYS_INLINE REQUIRES(Locks::mutator_lock_) {
2990 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(collector_,
2991 obj,
2992 /*begin_*/nullptr,
2993 /*end_*/nullptr);
2994 obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
2995 visitor, MemberOffset(0), MemberOffset(-1));
2996 }
2997
Callback(mirror::Object * obj,void * arg)2998 static void Callback(mirror::Object* obj, void* arg) REQUIRES(Locks::mutator_lock_) {
2999 reinterpret_cast<ImmuneSpaceUpdateObjVisitor*>(arg)->operator()(obj);
3000 }
3001
3002 private:
3003 MarkCompact* const collector_;
3004 };
3005
3006 class MarkCompact::ClassLoaderRootsUpdater : public ClassLoaderVisitor {
3007 public:
ClassLoaderRootsUpdater(MarkCompact * collector)3008 explicit ClassLoaderRootsUpdater(MarkCompact* collector)
3009 : collector_(collector),
3010 moving_space_begin_(collector->black_dense_end_),
3011 moving_space_end_(collector->moving_space_end_) {}
3012
Visit(ObjPtr<mirror::ClassLoader> class_loader)3013 void Visit(ObjPtr<mirror::ClassLoader> class_loader) override
3014 REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) {
3015 ClassTable* const class_table = class_loader->GetClassTable();
3016 if (class_table != nullptr) {
3017 // Classes are updated concurrently.
3018 class_table->VisitRoots(*this, /*skip_classes=*/true);
3019 }
3020 }
3021
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const3022 void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
3023 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
3024 if (!root->IsNull()) {
3025 VisitRoot(root);
3026 }
3027 }
3028
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const3029 void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
3030 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
3031 collector_->UpdateRoot(
3032 root, moving_space_begin_, moving_space_end_, RootInfo(RootType::kRootVMInternal));
3033 }
3034
3035 private:
3036 MarkCompact* collector_;
3037 uint8_t* const moving_space_begin_;
3038 uint8_t* const moving_space_end_;
3039 };
3040
3041 class MarkCompact::LinearAllocPageUpdater {
3042 public:
LinearAllocPageUpdater(MarkCompact * collector)3043 explicit LinearAllocPageUpdater(MarkCompact* collector)
3044 : collector_(collector),
3045 moving_space_begin_(collector->black_dense_end_),
3046 moving_space_end_(collector->moving_space_end_),
3047 last_page_touched_(false) {}
3048
3049 // Update a page in multi-object arena.
MultiObjectArena(uint8_t * page_begin,uint8_t * first_obj)3050 void MultiObjectArena(uint8_t* page_begin, uint8_t* first_obj)
3051 REQUIRES_SHARED(Locks::mutator_lock_) {
3052 DCHECK(first_obj != nullptr);
3053 DCHECK_ALIGNED_PARAM(page_begin, gPageSize);
3054 uint8_t* page_end = page_begin + gPageSize;
3055 uint32_t obj_size;
3056 for (uint8_t* byte = first_obj; byte < page_end;) {
3057 TrackingHeader* header = reinterpret_cast<TrackingHeader*>(byte);
3058 obj_size = header->GetSize();
3059 if (UNLIKELY(obj_size == 0)) {
3060 // No more objects in this page to visit.
3061 last_page_touched_ = byte >= page_begin;
3062 return;
3063 }
3064 uint8_t* obj = byte + sizeof(TrackingHeader);
3065 uint8_t* obj_end = byte + obj_size;
3066 if (header->Is16Aligned()) {
3067 obj = AlignUp(obj, 16);
3068 }
3069 uint8_t* begin_boundary = std::max(obj, page_begin);
3070 uint8_t* end_boundary = std::min(obj_end, page_end);
3071 if (begin_boundary < end_boundary) {
3072 VisitObject(header->GetKind(), obj, begin_boundary, end_boundary);
3073 }
3074 if (ArenaAllocator::IsRunningOnMemoryTool()) {
3075 obj_size += ArenaAllocator::kMemoryToolRedZoneBytes;
3076 }
3077 byte += RoundUp(obj_size, LinearAlloc::kAlignment);
3078 }
3079 last_page_touched_ = true;
3080 }
3081
3082 // This version is only used for cases where the entire page is filled with
3083 // GC-roots. For example, class-table and intern-table.
SingleObjectArena(uint8_t * page_begin,size_t page_size)3084 void SingleObjectArena(uint8_t* page_begin, size_t page_size)
3085 REQUIRES_SHARED(Locks::mutator_lock_) {
3086 static_assert(sizeof(uint32_t) == sizeof(GcRoot<mirror::Object>));
3087 DCHECK_ALIGNED(page_begin, kAlignment);
3088 // Least significant bits are used by class-table.
3089 static constexpr uint32_t kMask = kObjectAlignment - 1;
3090 size_t num_roots = page_size / sizeof(GcRoot<mirror::Object>);
3091 uint32_t* root_ptr = reinterpret_cast<uint32_t*>(page_begin);
3092 for (size_t i = 0; i < num_roots; root_ptr++, i++) {
3093 uint32_t word = *root_ptr;
3094 if (word != 0) {
3095 uint32_t lsbs = word & kMask;
3096 word &= ~kMask;
3097 VisitRootIfNonNull(reinterpret_cast<mirror::CompressedReference<mirror::Object>*>(&word));
3098 *root_ptr = word | lsbs;
3099 last_page_touched_ = true;
3100 }
3101 }
3102 }
3103
WasLastPageTouched() const3104 bool WasLastPageTouched() const { return last_page_touched_; }
3105
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const3106 void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
3107 ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
3108 if (!root->IsNull()) {
3109 VisitRoot(root);
3110 }
3111 }
3112
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const3113 void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
3114 ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
3115 mirror::Object* old_ref = root->AsMirrorPtr();
3116 DCHECK_NE(old_ref, nullptr);
3117 if (MarkCompact::HasAddress(old_ref, moving_space_begin_, moving_space_end_)) {
3118 mirror::Object* new_ref = old_ref;
3119 if (reinterpret_cast<uint8_t*>(old_ref) >= collector_->black_allocations_begin_) {
3120 new_ref = collector_->PostCompactBlackObjAddr(old_ref);
3121 } else if (collector_->live_words_bitmap_->Test(old_ref)) {
3122 DCHECK(collector_->moving_space_bitmap_->Test(old_ref))
3123 << "ref:" << old_ref << " root:" << root;
3124 new_ref = collector_->PostCompactOldObjAddr(old_ref);
3125 }
3126 if (old_ref != new_ref) {
3127 root->Assign(new_ref);
3128 }
3129 }
3130 }
3131
3132 private:
VisitObject(LinearAllocKind kind,void * obj,uint8_t * start_boundary,uint8_t * end_boundary) const3133 void VisitObject(LinearAllocKind kind,
3134 void* obj,
3135 uint8_t* start_boundary,
3136 uint8_t* end_boundary) const ALWAYS_INLINE
3137 REQUIRES_SHARED(Locks::mutator_lock_) {
3138 switch (kind) {
3139 case LinearAllocKind::kNoGCRoots:
3140 break;
3141 case LinearAllocKind::kGCRootArray:
3142 {
3143 GcRoot<mirror::Object>* root = reinterpret_cast<GcRoot<mirror::Object>*>(start_boundary);
3144 GcRoot<mirror::Object>* last = reinterpret_cast<GcRoot<mirror::Object>*>(end_boundary);
3145 for (; root < last; root++) {
3146 VisitRootIfNonNull(root->AddressWithoutBarrier());
3147 }
3148 }
3149 break;
3150 case LinearAllocKind::kArtMethodArray:
3151 {
3152 LengthPrefixedArray<ArtMethod>* array = static_cast<LengthPrefixedArray<ArtMethod>*>(obj);
3153 // Old methods are clobbered in debug builds. Check size to confirm if the array
3154 // has any GC roots to visit. See ClassLinker::LinkMethodsHelper::ClobberOldMethods()
3155 if (array->size() > 0) {
3156 if (collector_->pointer_size_ == PointerSize::k64) {
3157 ArtMethod::VisitArrayRoots<PointerSize::k64>(
3158 *this, start_boundary, end_boundary, array);
3159 } else {
3160 DCHECK_EQ(collector_->pointer_size_, PointerSize::k32);
3161 ArtMethod::VisitArrayRoots<PointerSize::k32>(
3162 *this, start_boundary, end_boundary, array);
3163 }
3164 }
3165 }
3166 break;
3167 case LinearAllocKind::kArtMethod:
3168 ArtMethod::VisitRoots(*this, start_boundary, end_boundary, static_cast<ArtMethod*>(obj));
3169 break;
3170 case LinearAllocKind::kArtFieldArray:
3171 ArtField::VisitArrayRoots(*this,
3172 start_boundary,
3173 end_boundary,
3174 static_cast<LengthPrefixedArray<ArtField>*>(obj));
3175 break;
3176 case LinearAllocKind::kDexCacheArray:
3177 {
3178 mirror::DexCachePair<mirror::Object>* first =
3179 reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(start_boundary);
3180 mirror::DexCachePair<mirror::Object>* last =
3181 reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(end_boundary);
3182 mirror::DexCache::VisitDexCachePairRoots(*this, first, last);
3183 }
3184 }
3185 }
3186
3187 MarkCompact* const collector_;
3188 // Cache to speed up checking if GC-root is in moving space or not.
3189 uint8_t* const moving_space_begin_;
3190 uint8_t* const moving_space_end_;
3191 // Whether the last page was touched or not.
3192 bool last_page_touched_ = false;
3193 };
3194
UpdateClassTableClasses(Runtime * runtime,bool immune_class_table_only)3195 void MarkCompact::UpdateClassTableClasses(Runtime* runtime, bool immune_class_table_only) {
3196 // If the process is debuggable then redefinition is allowed, which may mean
3197 // pre-zygote-fork class-tables may have pointer to class in moving-space.
3198 // So visit classes from class-sets that are not in linear-alloc arena-pool.
3199 if (UNLIKELY(runtime->IsJavaDebuggableAtInit())) {
3200 ClassLinker* linker = runtime->GetClassLinker();
3201 ClassLoaderRootsUpdater updater(this);
3202 GcVisitedArenaPool* pool = static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
3203 auto cond = [this, pool, immune_class_table_only](ClassTable::ClassSet& set) -> bool {
3204 if (!set.empty()) {
3205 return immune_class_table_only ?
3206 immune_spaces_.ContainsObject(reinterpret_cast<mirror::Object*>(&*set.begin())) :
3207 !pool->Contains(reinterpret_cast<void*>(&*set.begin()));
3208 }
3209 return false;
3210 };
3211 linker->VisitClassTables([cond, &updater](ClassTable* table)
3212 REQUIRES_SHARED(Locks::mutator_lock_) {
3213 table->VisitClassesIfConditionMet(cond, updater);
3214 });
3215 ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
3216 linker->GetBootClassTable()->VisitClassesIfConditionMet(cond, updater);
3217 }
3218 }
3219
CompactionPause()3220 void MarkCompact::CompactionPause() {
3221 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3222 Runtime* runtime = Runtime::Current();
3223 if (kIsDebugBuild) {
3224 DCHECK_EQ(thread_running_gc_, Thread::Current());
3225 // TODO(Simulator): Test that this should not operate on the simulated stack when the simulator
3226 // supports mark compact.
3227 stack_low_addr_ = thread_running_gc_->GetStackEnd<kNativeStackType>();
3228 stack_high_addr_ = reinterpret_cast<char*>(stack_low_addr_)
3229 + thread_running_gc_->GetUsableStackSize<kNativeStackType>();
3230 }
3231 {
3232 TimingLogger::ScopedTiming t2("(Paused)UpdateCompactionDataStructures", GetTimings());
3233 ReaderMutexLock rmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
3234 // Refresh data-structures to catch-up on allocations that may have
3235 // happened since marking-phase pause.
3236 // There could be several TLABs that got allocated since marking pause. We
3237 // don't want to compact them and instead update the TLAB info in TLS and
3238 // let mutators continue to use the TLABs.
3239 // We need to set all the bits in live-words bitmap corresponding to allocated
3240 // objects. Also, we need to find the objects that are overlapping with
3241 // page-begin boundaries. Unlike objects allocated before
3242 // black_allocations_begin_, which can be identified via mark-bitmap, we can get
3243 // this info only via walking the space past black_allocations_begin_, which
3244 // involves fetching object size.
3245 // TODO: We can reduce the time spent on this in a pause by performing one
3246 // round of this concurrently prior to the pause.
3247 UpdateMovingSpaceBlackAllocations();
3248 // Iterate over the allocation_stack_, for every object in the non-moving
3249 // space:
3250 // 1. Mark the object in live bitmap
3251 // 2. Erase the object from allocation stack
3252 // 3. In the corresponding page, if the first-object vector needs updating
3253 // then do so.
3254 UpdateNonMovingSpaceBlackAllocations();
3255 // This store is visible to mutator (or uffd worker threads) as the mutator
3256 // lock's unlock guarantees that.
3257 compacting_ = true;
3258 // Start updating roots and system weaks now.
3259 heap_->GetReferenceProcessor()->UpdateRoots(this);
3260 }
3261 {
3262 // TODO: Immune space updation has to happen either before or after
3263 // remapping pre-compact pages to from-space. And depending on when it's
3264 // done, we have to invoke VisitRefsForCompaction() with or without
3265 // read-barrier.
3266 TimingLogger::ScopedTiming t2("(Paused)UpdateImmuneSpaces", GetTimings());
3267 accounting::CardTable* const card_table = heap_->GetCardTable();
3268 for (auto& space : immune_spaces_.GetSpaces()) {
3269 DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
3270 accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
3271 accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
3272 // Having zygote-space indicates that the first zygote fork has taken
3273 // place and that the classes/dex-caches in immune-spaces may have allocations
3274 // (ArtMethod/ArtField arrays, dex-cache array, etc.) in the
3275 // non-userfaultfd visited private-anonymous mappings. Visit them here.
3276 ImmuneSpaceUpdateObjVisitor visitor(this);
3277 if (table != nullptr) {
3278 table->ProcessCards();
3279 table->VisitObjects(ImmuneSpaceUpdateObjVisitor::Callback, &visitor);
3280 } else {
3281 WriterMutexLock wmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
3282 card_table->Scan<false>(
3283 live_bitmap,
3284 space->Begin(),
3285 space->Limit(),
3286 visitor,
3287 accounting::CardTable::kCardDirty - 1);
3288 }
3289 }
3290 }
3291
3292 {
3293 TimingLogger::ScopedTiming t2("(Paused)UpdateRoots", GetTimings());
3294 runtime->VisitConcurrentRoots(this, kVisitRootFlagAllRoots);
3295 runtime->VisitNonThreadRoots(this);
3296 {
3297 ClassLinker* linker = runtime->GetClassLinker();
3298 ClassLoaderRootsUpdater updater(this);
3299 ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
3300 linker->VisitClassLoaders(&updater);
3301 linker->GetBootClassTable()->VisitRoots(updater, /*skip_classes=*/true);
3302 }
3303 SweepSystemWeaks(thread_running_gc_, runtime, /*paused=*/true);
3304
3305 bool has_zygote_space = heap_->HasZygoteSpace();
3306 GcVisitedArenaPool* arena_pool =
3307 static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
3308 // Update immune/pre-zygote class-tables in case class redefinition took
3309 // place. pre-zygote class-tables that are not in immune spaces are updated
3310 // below if we are in fallback-mode or if there is no zygote space. So in
3311 // that case only visit class-tables that are there in immune-spaces.
3312 UpdateClassTableClasses(runtime, uffd_ == kFallbackMode || !has_zygote_space);
3313
3314 // Acquire arena-pool's lock, which should be released after the pool is
3315 // userfaultfd registered. This is to ensure that no new arenas are
3316 // allocated and used in between. Since they will not be captured in
3317 // linear_alloc_arenas_ below, we will miss updating their pages. The same
3318 // reason also applies to new allocations within the existing arena which
3319 // may change last_byte.
3320 // Since we are in a STW pause, this shouldn't happen anyways, but holding
3321 // the lock confirms it.
3322 // TODO (b/305779657): Replace with ExclusiveTryLock() and assert that it
3323 // doesn't fail once it is available for ReaderWriterMutex.
3324 WriterMutexLock pool_wmu(thread_running_gc_, arena_pool->GetLock());
3325
3326 // TODO: Find out why it's not sufficient to visit native roots of immune
3327 // spaces, and why all the pre-zygote fork arenas have to be linearly updated.
3328 // Is it possible that some native root starts getting pointed to by some object
3329 // in moving space after fork? Or are we missing a write-barrier somewhere
3330 // when a native root is updated?
3331 auto arena_visitor = [this](uint8_t* page_begin, uint8_t* first_obj, size_t page_size)
3332 REQUIRES_SHARED(Locks::mutator_lock_) {
3333 LinearAllocPageUpdater updater(this);
3334 if (first_obj != nullptr) {
3335 updater.MultiObjectArena(page_begin, first_obj);
3336 } else {
3337 updater.SingleObjectArena(page_begin, page_size);
3338 }
3339 };
3340 if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) {
3341 // Besides fallback-mode, visit linear-alloc space in the pause for zygote
3342 // processes prior to first fork (that's when zygote space gets created).
3343 if (kIsDebugBuild && IsValidFd(uffd_)) {
3344 // All arenas allocated so far are expected to be pre-zygote fork.
3345 arena_pool->ForEachAllocatedArena(
3346 [](const TrackedArena& arena)
3347 REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); });
3348 }
3349 arena_pool->VisitRoots(arena_visitor);
3350 } else {
3351 // Inform the arena-pool that compaction is going on. So the TrackedArena
3352 // objects corresponding to the arenas that are freed shouldn't be deleted
3353 // immediately. We will do that in FinishPhase(). This is to avoid ABA
3354 // problem.
3355 arena_pool->DeferArenaFreeing();
3356 arena_pool->ForEachAllocatedArena(
3357 [this, arena_visitor, has_zygote_space](const TrackedArena& arena)
3358 REQUIRES_SHARED(Locks::mutator_lock_) {
3359 // The pre-zygote fork arenas are not visited concurrently in the
3360 // zygote children processes. The native roots of the dirty objects
3361 // are visited during immune space visit below.
3362 if (!arena.IsPreZygoteForkArena()) {
3363 uint8_t* last_byte = arena.GetLastUsedByte();
3364 auto ret = linear_alloc_arenas_.insert({&arena, last_byte});
3365 CHECK(ret.second);
3366 } else if (!arena.IsSingleObjectArena() || !has_zygote_space) {
3367 // Pre-zygote class-table and intern-table don't need to be updated.
3368 // TODO: Explore the possibility of using /proc/self/pagemap to
3369 // fetch which pages in these arenas are private-dirty and then only
3370 // visit those pages. To optimize it further, we can keep all
3371 // pre-zygote arenas in a single memory range so that just one read
3372 // from pagemap is sufficient.
3373 arena.VisitRoots(arena_visitor);
3374 }
3375 });
3376 }
3377 // Release order wrt to mutator threads' SIGBUS handler load.
3378 sigbus_in_progress_count_[0].store(0, std::memory_order_relaxed);
3379 sigbus_in_progress_count_[1].store(0, std::memory_order_release);
3380 app_slow_path_start_time_ = MilliTime();
3381 KernelPreparation();
3382 }
3383
3384 UpdateNonMovingSpace();
3385 // fallback mode
3386 if (uffd_ == kFallbackMode) {
3387 CompactMovingSpace<kFallbackMode>(nullptr);
3388
3389 int32_t freed_bytes = black_objs_slide_diff_;
3390 bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
3391 RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
3392 } else {
3393 DCHECK_EQ(compaction_buffer_counter_.load(std::memory_order_relaxed), 1);
3394 }
3395 stack_low_addr_ = nullptr;
3396 }
3397
KernelPrepareRangeForUffd(uint8_t * to_addr,uint8_t * from_addr,size_t map_size)3398 void MarkCompact::KernelPrepareRangeForUffd(uint8_t* to_addr, uint8_t* from_addr, size_t map_size) {
3399 int mremap_flags = MREMAP_MAYMOVE | MREMAP_FIXED;
3400 if (gHaveMremapDontunmap) {
3401 mremap_flags |= MREMAP_DONTUNMAP;
3402 }
3403
3404 void* ret = mremap(to_addr, map_size, map_size, mremap_flags, from_addr);
3405 CHECK_EQ(ret, static_cast<void*>(from_addr))
3406 << "mremap to move pages failed: " << strerror(errno)
3407 << ". space-addr=" << reinterpret_cast<void*>(to_addr) << " size=" << PrettySize(map_size);
3408
3409 if (!gHaveMremapDontunmap) {
3410 // Without MREMAP_DONTUNMAP the source mapping is unmapped by mremap. So mmap
3411 // the moving space again.
3412 int mmap_flags = MAP_FIXED;
3413 // Use MAP_FIXED_NOREPLACE so that if someone else reserves 'to_addr'
3414 // mapping in meantime, which can happen when MREMAP_DONTUNMAP isn't
3415 // available, to avoid unmapping someone else' mapping and then causing
3416 // crashes elsewhere.
3417 mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
3418 ret = mmap(to_addr, map_size, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
3419 CHECK_EQ(ret, static_cast<void*>(to_addr))
3420 << "mmap for moving space failed: " << strerror(errno);
3421 }
3422 }
3423
KernelPreparation()3424 void MarkCompact::KernelPreparation() {
3425 TimingLogger::ScopedTiming t("(Paused)KernelPreparation", GetTimings());
3426 uint8_t* moving_space_begin = bump_pointer_space_->Begin();
3427 size_t moving_space_size = bump_pointer_space_->Capacity();
3428 size_t moving_space_register_sz = (moving_first_objs_count_ + black_page_count_) * gPageSize;
3429 DCHECK_LE(moving_space_register_sz, moving_space_size);
3430
3431 KernelPrepareRangeForUffd(moving_space_begin, from_space_begin_, moving_space_size);
3432
3433 if (IsValidFd(uffd_)) {
3434 if (moving_space_register_sz > 0) {
3435 // mremap clears 'anon_vma' field of anonymous mappings. If we
3436 // uffd-register only the used portion of the space, then the vma gets
3437 // split (between used and unused portions) and as soon as pages are
3438 // mapped to the vmas, they get different `anon_vma` assigned, which
3439 // ensures that the two vmas cannot merge after we uffd-unregister the
3440 // used portion. OTOH, registering the entire space avoids the split, but
3441 // unnecessarily causes userfaults on allocations.
3442 // By faulting-in a page we force the kernel to allocate 'anon_vma' *before*
3443 // the vma-split in uffd-register. This ensures that when we unregister
3444 // the used portion after compaction, the two split vmas merge. This is
3445 // necessary for the mremap of the next GC cycle to not fail due to having
3446 // more than one vma in the source range.
3447 //
3448 // Fault in address aligned to PMD size so that in case THP is enabled,
3449 // we don't mistakenly fault a page in beginning portion that will be
3450 // registered with uffd. If the alignment takes us beyond the space, then
3451 // fault the first page and madvise it.
3452 size_t pmd_size = Heap::GetPMDSize();
3453 uint8_t* fault_in_addr = AlignUp(moving_space_begin + moving_space_register_sz, pmd_size);
3454 if (bump_pointer_space_->Contains(reinterpret_cast<mirror::Object*>(fault_in_addr))) {
3455 *const_cast<volatile uint8_t*>(fault_in_addr) = 0;
3456 } else {
3457 DCHECK_ALIGNED_PARAM(moving_space_begin, gPageSize);
3458 *const_cast<volatile uint8_t*>(moving_space_begin) = 0;
3459 madvise(moving_space_begin, pmd_size, MADV_DONTNEED);
3460 }
3461 // Register the moving space with userfaultfd.
3462 RegisterUffd(moving_space_begin, moving_space_register_sz);
3463 // madvise ensures that if any page gets mapped (only possible if some
3464 // thread is reading the page(s) without trying to make sense as we hold
3465 // mutator-lock exclusively) between mremap and uffd-registration, then
3466 // it gets zapped so that the map is empty and ready for userfaults. If
3467 // we could mremap after uffd-registration (like in case of linear-alloc
3468 // space below) then we wouldn't need it. But since we don't register the
3469 // entire space, we can't do that.
3470 madvise(moving_space_begin, moving_space_register_sz, MADV_DONTNEED);
3471 }
3472 // Prepare linear-alloc for concurrent compaction.
3473 for (auto& data : linear_alloc_spaces_data_) {
3474 DCHECK_EQ(static_cast<ssize_t>(data.shadow_.Size()), data.end_ - data.begin_);
3475 // There could be threads running in suspended mode when the compaction
3476 // pause is being executed. In order to make the userfaultfd setup atomic,
3477 // the registration has to be done *before* moving the pages to shadow map.
3478 RegisterUffd(data.begin_, data.shadow_.Size());
3479 KernelPrepareRangeForUffd(data.begin_, data.shadow_.Begin(), data.shadow_.Size());
3480 }
3481 }
3482 }
3483
SigbusHandler(siginfo_t * info)3484 bool MarkCompact::SigbusHandler(siginfo_t* info) {
3485 class ScopedInProgressCount {
3486 public:
3487 explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) {
3488 // Increment the count only if compaction is not done yet.
3489 for (idx_ = 0; idx_ < 2; idx_++) {
3490 SigbusCounterType prev =
3491 collector_->sigbus_in_progress_count_[idx_].load(std::memory_order_relaxed);
3492 while ((prev & kSigbusCounterCompactionDoneMask) == 0) {
3493 if (collector_->sigbus_in_progress_count_[idx_].compare_exchange_strong(
3494 prev, prev + 1, std::memory_order_acquire)) {
3495 DCHECK_LT(prev, kSigbusCounterCompactionDoneMask - 1);
3496 return;
3497 }
3498 }
3499 }
3500 }
3501
3502 bool TolerateEnoent() const { return idx_ == 1; }
3503
3504 bool IsCompactionDone() const { return idx_ == 2; }
3505
3506 ~ScopedInProgressCount() {
3507 if (idx_ < 2) {
3508 collector_->sigbus_in_progress_count_[idx_].fetch_sub(1, std::memory_order_release);
3509 }
3510 }
3511
3512 private:
3513 MarkCompact* const collector_;
3514 uint8_t idx_;
3515 };
3516
3517 if (info->si_code != BUS_ADRERR) {
3518 // Userfaultfd raises SIGBUS with BUS_ADRERR. All other causes can't be
3519 // handled here.
3520 return false;
3521 }
3522
3523 ScopedInProgressCount spc(this);
3524 uint8_t* fault_page = AlignDown(reinterpret_cast<uint8_t*>(info->si_addr), gPageSize);
3525 if (!spc.IsCompactionDone()) {
3526 if (HasAddress(reinterpret_cast<mirror::Object*>(fault_page))) {
3527 Thread* self = Thread::Current();
3528 Locks::mutator_lock_->AssertSharedHeld(self);
3529 size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_;
3530 ConcurrentlyProcessMovingPage(fault_page,
3531 self->GetThreadLocalGcBuffer(),
3532 nr_moving_space_used_pages,
3533 spc.TolerateEnoent());
3534 return true;
3535 } else {
3536 // Find the linear-alloc space containing fault-addr
3537 for (auto& data : linear_alloc_spaces_data_) {
3538 if (data.begin_ <= fault_page && data.end_ > fault_page) {
3539 ConcurrentlyProcessLinearAllocPage(fault_page, spc.TolerateEnoent());
3540 return true;
3541 }
3542 }
3543 // Fault address doesn't belong to either moving-space or linear-alloc.
3544 return false;
3545 }
3546 } else {
3547 // We may spuriously get SIGBUS fault, which was initiated before the
3548 // compaction was finished, but ends up here. In that case, if the fault
3549 // address is valid then consider it handled.
3550 return HasAddress(reinterpret_cast<mirror::Object*>(fault_page)) ||
3551 linear_alloc_spaces_data_.end() !=
3552 std::find_if(linear_alloc_spaces_data_.begin(),
3553 linear_alloc_spaces_data_.end(),
3554 [fault_page](const LinearAllocSpaceData& data) {
3555 return data.begin_ <= fault_page && data.end_ > fault_page;
3556 });
3557 }
3558 }
3559
ConcurrentlyProcessMovingPage(uint8_t * fault_page,uint8_t * buf,size_t nr_moving_space_used_pages,bool tolerate_enoent)3560 void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page,
3561 uint8_t* buf,
3562 size_t nr_moving_space_used_pages,
3563 bool tolerate_enoent) {
3564 Thread* self = Thread::Current();
3565 uint8_t* unused_space_begin = moving_space_begin_ + nr_moving_space_used_pages * gPageSize;
3566 DCHECK(IsAlignedParam(unused_space_begin, gPageSize));
3567 if (fault_page >= unused_space_begin) {
3568 // There is a race which allows more than one thread to install a
3569 // zero-page. But we can tolerate that. So absorb the EEXIST returned by
3570 // the ioctl and move on.
3571 ZeropageIoctl(fault_page, gPageSize, /*tolerate_eexist=*/true, tolerate_enoent);
3572 return;
3573 }
3574 size_t page_idx = DivideByPageSize(fault_page - moving_space_begin_);
3575 DCHECK_LT(page_idx, moving_first_objs_count_ + black_page_count_);
3576 mirror::Object* first_obj = first_objs_moving_space_[page_idx].AsMirrorPtr();
3577 if (first_obj == nullptr) {
3578 DCHECK_GT(fault_page, post_compact_end_);
3579 // Install zero-page in the entire remaining tlab to avoid multiple ioctl invocations.
3580 uint8_t* end = AlignDown(self->GetTlabEnd(), gPageSize);
3581 if (fault_page < self->GetTlabStart() || fault_page >= end) {
3582 end = fault_page + gPageSize;
3583 }
3584 size_t end_idx = page_idx + DivideByPageSize(end - fault_page);
3585 size_t length = 0;
3586 for (size_t idx = page_idx; idx < end_idx; idx++, length += gPageSize) {
3587 uint32_t cur_state = moving_pages_status_[idx].load(std::memory_order_acquire);
3588 if (cur_state != static_cast<uint8_t>(PageState::kUnprocessed)) {
3589 DCHECK_EQ(cur_state, static_cast<uint8_t>(PageState::kProcessedAndMapped));
3590 break;
3591 }
3592 }
3593 if (length > 0) {
3594 length = ZeropageIoctl(fault_page, length, /*tolerate_eexist=*/true, tolerate_enoent);
3595 for (size_t len = 0, idx = page_idx; len < length; idx++, len += gPageSize) {
3596 moving_pages_status_[idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
3597 std::memory_order_release);
3598 }
3599 }
3600 return;
3601 }
3602
3603 uint32_t raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3604 uint32_t backoff_count = 0;
3605 PageState state;
3606 while (true) {
3607 state = GetPageStateFromWord(raw_state);
3608 if (state == PageState::kProcessing || state == PageState::kMutatorProcessing ||
3609 state == PageState::kProcessingAndMapping || state == PageState::kProcessedAndMapping) {
3610 // Wait for the page to be mapped (by gc-thread or some mutator) before returning.
3611 // The wait is not expected to be long as the read state indicates that the other
3612 // thread is actively working on the page.
3613 BackOff(backoff_count++);
3614 raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3615 } else if (state == PageState::kProcessedAndMapped) {
3616 // Nothing to do.
3617 break;
3618 } else {
3619 // Acquire order to ensure we don't start writing to a page, which could
3620 // be shared, before the CAS is successful.
3621 if (state == PageState::kUnprocessed &&
3622 moving_pages_status_[page_idx].compare_exchange_strong(
3623 raw_state,
3624 static_cast<uint8_t>(PageState::kMutatorProcessing),
3625 std::memory_order_acquire)) {
3626 if (fault_page < black_dense_end_) {
3627 if (use_generational_) {
3628 UpdateNonMovingPage</*kSetupForGenerational=*/true>(
3629 first_obj, fault_page, from_space_slide_diff_, moving_space_bitmap_);
3630 } else {
3631 UpdateNonMovingPage</*kSetupForGenerational=*/false>(
3632 first_obj, fault_page, from_space_slide_diff_, moving_space_bitmap_);
3633 }
3634 buf = fault_page + from_space_slide_diff_;
3635 } else {
3636 if (UNLIKELY(buf == nullptr)) {
3637 uint16_t idx = compaction_buffer_counter_.fetch_add(1, std::memory_order_relaxed);
3638 // The buffer-map is one page bigger as the first buffer is used by GC-thread.
3639 CHECK_LE(idx, kMutatorCompactionBufferCount);
3640 buf = compaction_buffers_map_.Begin() + idx * gPageSize;
3641 DCHECK(compaction_buffers_map_.HasAddress(buf));
3642 self->SetThreadLocalGcBuffer(buf);
3643 }
3644
3645 if (fault_page < post_compact_end_) {
3646 // The page has to be compacted.
3647 if (use_generational_ && fault_page < mid_gen_end_) {
3648 CompactPage</*kSetupGenerational=*/true>(first_obj,
3649 pre_compact_offset_moving_space_[page_idx],
3650 buf,
3651 fault_page,
3652 /*needs_memset_zero=*/true);
3653 } else {
3654 CompactPage</*kSetupGenerational=*/false>(first_obj,
3655 pre_compact_offset_moving_space_[page_idx],
3656 buf,
3657 fault_page,
3658 /*needs_memset_zero=*/true);
3659 }
3660 } else {
3661 DCHECK_NE(first_obj, nullptr);
3662 DCHECK_GT(pre_compact_offset_moving_space_[page_idx], 0u);
3663 uint8_t* pre_compact_page = black_allocations_begin_ + (fault_page - post_compact_end_);
3664 uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[page_idx];
3665 mirror::Object* next_page_first_obj = nullptr;
3666 if (page_idx + 1 < moving_first_objs_count_ + black_page_count_) {
3667 next_page_first_obj = first_objs_moving_space_[page_idx + 1].AsMirrorPtr();
3668 }
3669 DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
3670 SlideBlackPage(first_obj,
3671 next_page_first_obj,
3672 first_chunk_size,
3673 pre_compact_page,
3674 buf,
3675 /*needs_memset_zero=*/true);
3676 }
3677 }
3678 // Nobody else would simultaneously modify this page's state so an
3679 // atomic store is sufficient. Use 'release' order to guarantee that
3680 // loads/stores to the page are finished before this store. Since the
3681 // mutator used its own buffer for the processing, there is no reason to
3682 // put its index in the status of the page. Also, the mutator is going
3683 // to immediately map the page, so that info is not needed.
3684 moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapping),
3685 std::memory_order_release);
3686 CopyIoctl(fault_page, buf, gPageSize, /*return_on_contention=*/false, tolerate_enoent);
3687 // Store is sufficient as no other thread modifies the status at this stage.
3688 moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
3689 std::memory_order_release);
3690 break;
3691 }
3692 state = GetPageStateFromWord(raw_state);
3693 if (state == PageState::kProcessed) {
3694 size_t arr_len = moving_first_objs_count_ + black_page_count_;
3695 // The page is processed but not mapped. We should map it. The release
3696 // order used in MapMovingSpacePages will ensure that the increment to
3697 // moving_compaction_in_progress is done first.
3698 if (MapMovingSpacePages(page_idx,
3699 arr_len,
3700 /*from_fault=*/true,
3701 /*return_on_contention=*/false,
3702 tolerate_enoent) > 0) {
3703 break;
3704 }
3705 raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3706 }
3707 }
3708 }
3709 }
3710
MapUpdatedLinearAllocPages(uint8_t * start_page,uint8_t * start_shadow_page,Atomic<PageState> * state,size_t length,bool free_pages,bool single_ioctl,bool tolerate_enoent)3711 bool MarkCompact::MapUpdatedLinearAllocPages(uint8_t* start_page,
3712 uint8_t* start_shadow_page,
3713 Atomic<PageState>* state,
3714 size_t length,
3715 bool free_pages,
3716 bool single_ioctl,
3717 bool tolerate_enoent) {
3718 DCHECK_ALIGNED_PARAM(length, gPageSize);
3719 Atomic<PageState>* madv_state = state;
3720 size_t madv_len = length;
3721 uint8_t* madv_start = start_shadow_page;
3722 bool check_state_for_madv = false;
3723 uint8_t* end_page = start_page + length;
3724 while (start_page < end_page) {
3725 size_t map_len = 0;
3726 // Find a contiguous range of pages that we can map in single ioctl.
3727 for (Atomic<PageState>* cur_state = state;
3728 map_len < length && cur_state->load(std::memory_order_acquire) == PageState::kProcessed;
3729 map_len += gPageSize, cur_state++) {
3730 // No body.
3731 }
3732
3733 if (map_len == 0) {
3734 if (single_ioctl) {
3735 return state->load(std::memory_order_relaxed) == PageState::kProcessedAndMapped;
3736 }
3737 // Skip all the pages that this thread can't map.
3738 while (length > 0) {
3739 PageState s = state->load(std::memory_order_relaxed);
3740 if (s == PageState::kProcessed) {
3741 break;
3742 }
3743 // If we find any page which is being processed or mapped (only possible by a mutator(s))
3744 // then we need to re-check the page-state and, if needed, wait for the state to change
3745 // to 'mapped', before the shadow pages are reclaimed.
3746 check_state_for_madv |= s > PageState::kUnprocessed && s < PageState::kProcessedAndMapped;
3747 state++;
3748 length -= gPageSize;
3749 start_shadow_page += gPageSize;
3750 start_page += gPageSize;
3751 }
3752 } else {
3753 map_len = CopyIoctl(start_page,
3754 start_shadow_page,
3755 map_len,
3756 /*return_on_contention=*/false,
3757 tolerate_enoent);
3758 DCHECK_NE(map_len, 0u);
3759 // Declare that the pages are ready to be accessed. Store is sufficient
3760 // as any thread will be storing the same value.
3761 for (size_t l = 0; l < map_len; l += gPageSize, state++) {
3762 PageState s = state->load(std::memory_order_relaxed);
3763 DCHECK(s == PageState::kProcessed || s == PageState::kProcessedAndMapped) << "state:" << s;
3764 state->store(PageState::kProcessedAndMapped, std::memory_order_release);
3765 }
3766 if (single_ioctl) {
3767 break;
3768 }
3769 start_page += map_len;
3770 start_shadow_page += map_len;
3771 length -= map_len;
3772 // state is already updated above.
3773 }
3774 }
3775 if (free_pages) {
3776 if (check_state_for_madv) {
3777 // Wait until all the pages are mapped before releasing them. This is needed to be
3778 // checked only if some mutators were found to be concurrently mapping pages earlier.
3779 for (size_t l = 0; l < madv_len; l += gPageSize, madv_state++) {
3780 uint32_t backoff_count = 0;
3781 PageState s = madv_state->load(std::memory_order_relaxed);
3782 while (s > PageState::kUnprocessed && s < PageState::kProcessedAndMapped) {
3783 BackOff(backoff_count++);
3784 s = madv_state->load(std::memory_order_relaxed);
3785 }
3786 }
3787 }
3788 ZeroAndReleaseMemory(madv_start, madv_len);
3789 }
3790 return true;
3791 }
3792
ConcurrentlyProcessLinearAllocPage(uint8_t * fault_page,bool tolerate_enoent)3793 void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool tolerate_enoent) {
3794 auto arena_iter = linear_alloc_arenas_.end();
3795 {
3796 TrackedArena temp_arena(fault_page);
3797 arena_iter = linear_alloc_arenas_.upper_bound(&temp_arena);
3798 arena_iter = arena_iter != linear_alloc_arenas_.begin() ? std::prev(arena_iter)
3799 : linear_alloc_arenas_.end();
3800 }
3801 // Unlike ProcessLinearAlloc(), we don't need to hold arena-pool's lock here
3802 // because a thread trying to access the page and as a result causing this
3803 // userfault confirms that nobody can delete the corresponding arena and
3804 // release its pages.
3805 // NOTE: We may have some memory range be recycled several times during a
3806 // compaction cycle, thereby potentially causing userfault on the same page
3807 // several times. That's not a problem as all of them (except for possibly the
3808 // first one) would require us mapping a zero-page, which we do without updating
3809 // the 'state_arr'.
3810 if (arena_iter == linear_alloc_arenas_.end() ||
3811 arena_iter->first->IsWaitingForDeletion() ||
3812 arena_iter->second <= fault_page) {
3813 // Fault page isn't in any of the arenas that existed before we started
3814 // compaction. So map zeropage and return.
3815 ZeropageIoctl(fault_page, gPageSize, /*tolerate_eexist=*/true, tolerate_enoent);
3816 } else {
3817 // Find the linear-alloc space containing fault-page
3818 LinearAllocSpaceData* space_data = nullptr;
3819 for (auto& data : linear_alloc_spaces_data_) {
3820 if (data.begin_ <= fault_page && fault_page < data.end_) {
3821 space_data = &data;
3822 break;
3823 }
3824 }
3825 DCHECK_NE(space_data, nullptr);
3826 ptrdiff_t diff = space_data->shadow_.Begin() - space_data->begin_;
3827 size_t page_idx = DivideByPageSize(fault_page - space_data->begin_);
3828 Atomic<PageState>* state_arr =
3829 reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin());
3830 PageState state = state_arr[page_idx].load(std::memory_order_acquire);
3831 uint32_t backoff_count = 0;
3832 while (true) {
3833 switch (state) {
3834 case PageState::kUnprocessed: {
3835 // Acquire order to ensure we don't start writing to shadow map, which is
3836 // shared, before the CAS is successful.
3837 if (state_arr[page_idx].compare_exchange_strong(
3838 state, PageState::kProcessing, std::memory_order_acquire)) {
3839 LinearAllocPageUpdater updater(this);
3840 uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page);
3841 // null first_obj indicates that it's a page from arena for
3842 // intern-table/class-table. So first object isn't required.
3843 if (first_obj != nullptr) {
3844 updater.MultiObjectArena(fault_page + diff, first_obj + diff);
3845 } else {
3846 updater.SingleObjectArena(fault_page + diff, gPageSize);
3847 }
3848 if (updater.WasLastPageTouched()) {
3849 state_arr[page_idx].store(PageState::kProcessed, std::memory_order_release);
3850 state = PageState::kProcessed;
3851 continue;
3852 } else {
3853 // If the page wasn't touched, then it means it is empty and
3854 // is most likely not present on the shadow-side. Furthermore,
3855 // since the shadow is also userfaultfd registered doing copy
3856 // ioctl fails as the copy-from-user in the kernel will cause
3857 // userfault. Instead, just map a zeropage, which is not only
3858 // correct but also efficient as it avoids unnecessary memcpy
3859 // in the kernel.
3860 if (ZeropageIoctl(fault_page,
3861 gPageSize,
3862 /*tolerate_eexist=*/false,
3863 tolerate_enoent)) {
3864 state_arr[page_idx].store(PageState::kProcessedAndMapped,
3865 std::memory_order_release);
3866 }
3867 return;
3868 }
3869 }
3870 }
3871 continue;
3872 case PageState::kProcessed:
3873 // Map as many pages as possible in a single ioctl, without spending
3874 // time freeing pages.
3875 if (MapUpdatedLinearAllocPages(fault_page,
3876 fault_page + diff,
3877 state_arr + page_idx,
3878 space_data->end_ - fault_page,
3879 /*free_pages=*/false,
3880 /*single_ioctl=*/true,
3881 tolerate_enoent)) {
3882 return;
3883 }
3884 // fault_page was not mapped by this thread (some other thread claimed
3885 // it). Wait for it to be mapped before returning.
3886 FALLTHROUGH_INTENDED;
3887 case PageState::kProcessing:
3888 case PageState::kProcessingAndMapping:
3889 case PageState::kProcessedAndMapping:
3890 // Wait for the page to be mapped before returning.
3891 BackOff(backoff_count++);
3892 state = state_arr[page_idx].load(std::memory_order_acquire);
3893 continue;
3894 case PageState::kMutatorProcessing:
3895 LOG(FATAL) << "Unreachable";
3896 UNREACHABLE();
3897 case PageState::kProcessedAndMapped:
3898 // Somebody else took care of the page.
3899 return;
3900 }
3901 break;
3902 }
3903 }
3904 }
3905
ProcessLinearAlloc()3906 void MarkCompact::ProcessLinearAlloc() {
3907 GcVisitedArenaPool* arena_pool =
3908 static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
3909 DCHECK_EQ(thread_running_gc_, Thread::Current());
3910 uint8_t* unmapped_range_start = nullptr;
3911 uint8_t* unmapped_range_end = nullptr;
3912 // Pointer to the linear-alloc space containing the current arena in the loop
3913 // below. Also helps in ensuring that two arenas, which are contiguous in
3914 // address space but are from different linear-alloc spaces, are not coalesced
3915 // into one range for mapping purpose.
3916 LinearAllocSpaceData* space_data = nullptr;
3917 Atomic<PageState>* state_arr = nullptr;
3918 ptrdiff_t diff = 0;
3919
3920 auto map_pages = [&]() {
3921 DCHECK_NE(diff, 0);
3922 DCHECK_NE(space_data, nullptr);
3923 DCHECK_GE(unmapped_range_start, space_data->begin_);
3924 DCHECK_LT(unmapped_range_start, space_data->end_);
3925 DCHECK_GT(unmapped_range_end, space_data->begin_);
3926 DCHECK_LE(unmapped_range_end, space_data->end_);
3927 DCHECK_LT(unmapped_range_start, unmapped_range_end);
3928 DCHECK_ALIGNED_PARAM(unmapped_range_end - unmapped_range_start, gPageSize);
3929 size_t page_idx = DivideByPageSize(unmapped_range_start - space_data->begin_);
3930 MapUpdatedLinearAllocPages(unmapped_range_start,
3931 unmapped_range_start + diff,
3932 state_arr + page_idx,
3933 unmapped_range_end - unmapped_range_start,
3934 /*free_pages=*/true,
3935 /*single_ioctl=*/false,
3936 /*tolerate_enoent=*/false);
3937 };
3938 for (auto& pair : linear_alloc_arenas_) {
3939 const TrackedArena* arena = pair.first;
3940 size_t arena_size = arena->Size();
3941 uint8_t* arena_begin = arena->Begin();
3942 // linear_alloc_arenas_ is sorted on arena-begin. So we will get all arenas
3943 // in that order.
3944 DCHECK_LE(unmapped_range_end, arena_begin);
3945 DCHECK(space_data == nullptr || arena_begin > space_data->begin_)
3946 << "space-begin:" << static_cast<void*>(space_data->begin_)
3947 << " arena-begin:" << static_cast<void*>(arena_begin);
3948 if (space_data == nullptr || space_data->end_ <= arena_begin) {
3949 // Map the processed arenas as we are switching to another space.
3950 if (space_data != nullptr && unmapped_range_end != nullptr) {
3951 map_pages();
3952 unmapped_range_end = nullptr;
3953 }
3954 // Find the linear-alloc space containing the arena
3955 LinearAllocSpaceData* curr_space_data = space_data;
3956 for (auto& data : linear_alloc_spaces_data_) {
3957 if (data.begin_ <= arena_begin && arena_begin < data.end_) {
3958 // Since arenas are sorted, the next space should be higher in address
3959 // order than the current one.
3960 DCHECK(space_data == nullptr || data.begin_ >= space_data->end_);
3961 diff = data.shadow_.Begin() - data.begin_;
3962 state_arr = reinterpret_cast<Atomic<PageState>*>(data.page_status_map_.Begin());
3963 space_data = &data;
3964 break;
3965 }
3966 }
3967 CHECK_NE(space_data, curr_space_data)
3968 << "Couldn't find space for arena-begin:" << static_cast<void*>(arena_begin);
3969 }
3970 // Map the processed arenas if we found a hole within the current space.
3971 if (unmapped_range_end != nullptr && unmapped_range_end < arena_begin) {
3972 map_pages();
3973 unmapped_range_end = nullptr;
3974 }
3975 if (unmapped_range_end == nullptr) {
3976 unmapped_range_start = unmapped_range_end = arena_begin;
3977 }
3978 DCHECK_NE(unmapped_range_start, nullptr);
3979 // It's ok to include all arenas in the unmapped range. Since the
3980 // corresponding state bytes will be kUnprocessed, we will skip calling
3981 // ioctl and madvise on arenas which are waiting to be deleted.
3982 unmapped_range_end += arena_size;
3983 {
3984 // Acquire arena-pool's lock (in shared-mode) so that the arena being updated
3985 // does not get deleted at the same time. If this critical section is too
3986 // long and impacts mutator response time, then we get rid of this lock by
3987 // holding onto memory ranges of all deleted (since compaction pause)
3988 // arenas until completion finishes.
3989 ReaderMutexLock rmu(thread_running_gc_, arena_pool->GetLock());
3990 // If any arenas were freed since compaction pause then skip them from
3991 // visiting.
3992 if (arena->IsWaitingForDeletion()) {
3993 continue;
3994 }
3995 uint8_t* last_byte = pair.second;
3996 DCHECK_ALIGNED_PARAM(last_byte, gPageSize);
3997 auto visitor = [space_data, last_byte, diff, this, state_arr](
3998 uint8_t* page_begin,
3999 uint8_t* first_obj,
4000 size_t page_size) REQUIRES_SHARED(Locks::mutator_lock_) {
4001 // No need to process pages past last_byte as they already have updated
4002 // gc-roots, if any.
4003 if (page_begin >= last_byte) {
4004 return;
4005 }
4006 LinearAllocPageUpdater updater(this);
4007 size_t page_idx = DivideByPageSize(page_begin - space_data->begin_);
4008 DCHECK_LT(page_idx, space_data->page_status_map_.Size());
4009 PageState expected_state = PageState::kUnprocessed;
4010 // Acquire order to ensure that we don't start accessing the shadow page,
4011 // which is shared with other threads, prior to CAS. Also, for same
4012 // reason, we used 'release' order for changing the state to 'processed'.
4013 if (state_arr[page_idx].compare_exchange_strong(
4014 expected_state, PageState::kProcessing, std::memory_order_acquire)) {
4015 // null first_obj indicates that it's a page from arena for
4016 // intern-table/class-table. So first object isn't required.
4017 if (first_obj != nullptr) {
4018 updater.MultiObjectArena(page_begin + diff, first_obj + diff);
4019 } else {
4020 DCHECK_EQ(page_size, gPageSize);
4021 updater.SingleObjectArena(page_begin + diff, page_size);
4022 }
4023 expected_state = PageState::kProcessing;
4024 // Store is sufficient as no other thread could be modifying it. Use
4025 // release order to ensure that the writes to shadow page are
4026 // committed to memory before.
4027 if (updater.WasLastPageTouched()) {
4028 state_arr[page_idx].store(PageState::kProcessed, std::memory_order_release);
4029 } else {
4030 // See comment in ConcurrentlyProcessLinearAllocPage() with same situation.
4031 ZeropageIoctl(
4032 page_begin, gPageSize, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
4033 // Ioctl will act as release fence.
4034 state_arr[page_idx].store(PageState::kProcessedAndMapped, std::memory_order_release);
4035 }
4036 }
4037 };
4038
4039 arena->VisitRoots(visitor);
4040 }
4041 }
4042 if (unmapped_range_end > unmapped_range_start) {
4043 // Map remaining pages.
4044 map_pages();
4045 }
4046 }
4047
RegisterUffd(void * addr,size_t size)4048 void MarkCompact::RegisterUffd(void* addr, size_t size) {
4049 DCHECK(IsValidFd(uffd_));
4050 struct uffdio_register uffd_register;
4051 uffd_register.range.start = reinterpret_cast<uintptr_t>(addr);
4052 uffd_register.range.len = size;
4053 uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
4054 CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0)
4055 << "ioctl_userfaultfd: register failed: " << strerror(errno)
4056 << ". start:" << static_cast<void*>(addr) << " len:" << PrettySize(size);
4057 }
4058
4059 // TODO: sometime we may want to tolerate certain error conditions (like ENOMEM
4060 // when we unregister the unused portion of the moving-space). Implement support
4061 // for that.
UnregisterUffd(uint8_t * start,size_t len)4062 void MarkCompact::UnregisterUffd(uint8_t* start, size_t len) {
4063 DCHECK(IsValidFd(uffd_));
4064 struct uffdio_range range;
4065 range.start = reinterpret_cast<uintptr_t>(start);
4066 range.len = len;
4067 CHECK_EQ(ioctl(uffd_, UFFDIO_UNREGISTER, &range), 0)
4068 << "ioctl_userfaultfd: unregister failed: " << strerror(errno)
4069 << ". addr:" << static_cast<void*>(start) << " len:" << PrettySize(len);
4070 }
4071
CompactionPhase()4072 void MarkCompact::CompactionPhase() {
4073 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4074 {
4075 int32_t freed_bytes = black_objs_slide_diff_;
4076 bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
4077 RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
4078 }
4079
4080 CompactMovingSpace<kCopyMode>(compaction_buffers_map_.Begin());
4081
4082 ProcessLinearAlloc();
4083
4084 auto wait_for_compaction_counter = [this](size_t idx) {
4085 SigbusCounterType count = sigbus_in_progress_count_[idx].fetch_or(
4086 kSigbusCounterCompactionDoneMask, std::memory_order_acq_rel);
4087 // Wait for SIGBUS handlers already in play.
4088 for (uint32_t i = 0; count > 0; i++) {
4089 BackOff(i);
4090 count = sigbus_in_progress_count_[idx].load(std::memory_order_acquire);
4091 count &= ~kSigbusCounterCompactionDoneMask;
4092 }
4093 };
4094 // Set compaction-done bit in the first counter to indicate that gc-thread
4095 // is done compacting and mutators should stop incrementing this counter.
4096 // Mutator should tolerate ENOENT after this. This helps avoid priority
4097 // inversion in case mutators need to map zero-pages after compaction is
4098 // finished but before gc-thread manages to unregister the spaces.
4099 wait_for_compaction_counter(0);
4100
4101 // Unregister moving-space
4102 size_t moving_space_size = bump_pointer_space_->Capacity();
4103 size_t used_size = (moving_first_objs_count_ + black_page_count_) * gPageSize;
4104 if (used_size > 0) {
4105 UnregisterUffd(bump_pointer_space_->Begin(), used_size);
4106 }
4107 // Unregister linear-alloc spaces
4108 for (auto& data : linear_alloc_spaces_data_) {
4109 DCHECK_EQ(data.end_ - data.begin_, static_cast<ssize_t>(data.shadow_.Size()));
4110 UnregisterUffd(data.begin_, data.shadow_.Size());
4111 }
4112 GetCurrentIteration()->SetAppSlowPathDurationMs(MilliTime() - app_slow_path_start_time_);
4113
4114 // Set compaction-done bit in the second counter to indicate that gc-thread
4115 // is done unregistering the spaces and therefore mutators, if in SIGBUS,
4116 // should return without attempting to map the faulted page. When the mutator
4117 // will access the address again, it will succeed. Once this counter is 0,
4118 // the gc-thread can safely initialize/madvise the data structures.
4119 wait_for_compaction_counter(1);
4120
4121 // Release all of the memory taken by moving-space's from-map
4122 from_space_map_.MadviseDontNeedAndZero();
4123 // mprotect(PROT_NONE) all maps except to-space in debug-mode to catch any unexpected accesses.
4124 DCHECK_EQ(mprotect(from_space_begin_, moving_space_size, PROT_NONE), 0)
4125 << "mprotect(PROT_NONE) for from-space failed: " << strerror(errno);
4126
4127 // madvise linear-allocs's page-status array. Note that we don't need to
4128 // madvise the shado-map as the pages from it were reclaimed in
4129 // ProcessLinearAlloc() after arenas were mapped.
4130 for (auto& data : linear_alloc_spaces_data_) {
4131 data.page_status_map_.MadviseDontNeedAndZero();
4132 }
4133 }
4134
4135 template <size_t kBufferSize>
4136 class MarkCompact::ThreadRootsVisitor : public RootVisitor {
4137 public:
ThreadRootsVisitor(MarkCompact * mark_compact,Thread * const self)4138 explicit ThreadRootsVisitor(MarkCompact* mark_compact, Thread* const self)
4139 : mark_compact_(mark_compact), self_(self) {}
4140
~ThreadRootsVisitor()4141 ~ThreadRootsVisitor() {
4142 Flush();
4143 }
4144
VisitRoots(mirror::Object *** roots,size_t count,const RootInfo & info)4145 void VisitRoots(mirror::Object*** roots,
4146 size_t count,
4147 [[maybe_unused]] const RootInfo& info) override
4148 REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
4149 for (size_t i = 0; i < count; i++) {
4150 mirror::Object* obj = *roots[i];
4151 if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
4152 Push(obj);
4153 }
4154 }
4155 }
4156
VisitRoots(mirror::CompressedReference<mirror::Object> ** roots,size_t count,const RootInfo & info)4157 void VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
4158 size_t count,
4159 [[maybe_unused]] const RootInfo& info) override
4160 REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
4161 for (size_t i = 0; i < count; i++) {
4162 mirror::Object* obj = roots[i]->AsMirrorPtr();
4163 if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
4164 Push(obj);
4165 }
4166 }
4167 }
4168
4169 private:
Flush()4170 void Flush() REQUIRES_SHARED(Locks::mutator_lock_)
4171 REQUIRES(Locks::heap_bitmap_lock_) {
4172 StackReference<mirror::Object>* start;
4173 StackReference<mirror::Object>* end;
4174 {
4175 MutexLock mu(self_, mark_compact_->lock_);
4176 // Loop here because even after expanding once it may not be sufficient to
4177 // accommodate all references. It's almost impossible, but there is no harm
4178 // in implementing it this way.
4179 while (!mark_compact_->mark_stack_->BumpBack(idx_, &start, &end)) {
4180 mark_compact_->ExpandMarkStack();
4181 }
4182 }
4183 while (idx_ > 0) {
4184 *start++ = roots_[--idx_];
4185 }
4186 DCHECK_EQ(start, end);
4187 }
4188
Push(mirror::Object * obj)4189 void Push(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_)
4190 REQUIRES(Locks::heap_bitmap_lock_) {
4191 if (UNLIKELY(idx_ >= kBufferSize)) {
4192 Flush();
4193 }
4194 roots_[idx_++].Assign(obj);
4195 }
4196
4197 StackReference<mirror::Object> roots_[kBufferSize];
4198 size_t idx_ = 0;
4199 MarkCompact* const mark_compact_;
4200 Thread* const self_;
4201 };
4202
4203 class MarkCompact::CheckpointMarkThreadRoots : public Closure {
4204 public:
CheckpointMarkThreadRoots(MarkCompact * mark_compact)4205 explicit CheckpointMarkThreadRoots(MarkCompact* mark_compact) : mark_compact_(mark_compact) {}
4206
Run(Thread * thread)4207 void Run(Thread* thread) override NO_THREAD_SAFETY_ANALYSIS {
4208 ScopedTrace trace("Marking thread roots");
4209 // Note: self is not necessarily equal to thread since thread may be
4210 // suspended.
4211 Thread* const self = Thread::Current();
4212 CHECK(thread == self
4213 || thread->IsSuspended()
4214 || thread->GetState() == ThreadState::kWaitingPerformingGc)
4215 << thread->GetState() << " thread " << thread << " self " << self;
4216 {
4217 ThreadRootsVisitor</*kBufferSize*/ 20> visitor(mark_compact_, self);
4218 thread->VisitRoots(&visitor, kVisitRootFlagAllRoots);
4219 }
4220 // Clear page-buffer to prepare for compaction phase.
4221 thread->SetThreadLocalGcBuffer(nullptr);
4222
4223 // If thread is a running mutator, then act on behalf of the garbage
4224 // collector. See the code in ThreadList::RunCheckpoint.
4225 mark_compact_->GetBarrier().Pass(self);
4226 }
4227
4228 private:
4229 MarkCompact* const mark_compact_;
4230 };
4231
MarkRootsCheckpoint(Thread * self,Runtime * runtime)4232 void MarkCompact::MarkRootsCheckpoint(Thread* self, Runtime* runtime) {
4233 // We revote TLABs later during paused round of marking.
4234 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4235 CheckpointMarkThreadRoots check_point(this);
4236 ThreadList* thread_list = runtime->GetThreadList();
4237 gc_barrier_.Init(self, 0);
4238 // Request the check point is run on all threads returning a count of the threads that must
4239 // run through the barrier including self.
4240 size_t barrier_count = thread_list->RunCheckpoint(&check_point);
4241 // Release locks then wait for all mutator threads to pass the barrier.
4242 // If there are no threads to wait which implys that all the checkpoint functions are finished,
4243 // then no need to release locks.
4244 if (barrier_count == 0) {
4245 return;
4246 }
4247 Locks::heap_bitmap_lock_->ExclusiveUnlock(self);
4248 Locks::mutator_lock_->SharedUnlock(self);
4249 {
4250 ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun);
4251 gc_barrier_.Increment(self, barrier_count);
4252 }
4253 Locks::mutator_lock_->SharedLock(self);
4254 Locks::heap_bitmap_lock_->ExclusiveLock(self);
4255 ProcessMarkStack();
4256 }
4257
MarkNonThreadRoots(Runtime * runtime)4258 void MarkCompact::MarkNonThreadRoots(Runtime* runtime) {
4259 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4260 runtime->VisitNonThreadRoots(this);
4261 ProcessMarkStack();
4262 }
4263
MarkConcurrentRoots(VisitRootFlags flags,Runtime * runtime)4264 void MarkCompact::MarkConcurrentRoots(VisitRootFlags flags, Runtime* runtime) {
4265 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4266 runtime->VisitConcurrentRoots(this, flags);
4267 ProcessMarkStack();
4268 }
4269
RevokeAllThreadLocalBuffers()4270 void MarkCompact::RevokeAllThreadLocalBuffers() {
4271 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4272 bump_pointer_space_->RevokeAllThreadLocalBuffers();
4273 }
4274
4275 class MarkCompact::ScanObjectVisitor {
4276 public:
ScanObjectVisitor(MarkCompact * const mark_compact)4277 explicit ScanObjectVisitor(MarkCompact* const mark_compact) ALWAYS_INLINE
4278 : mark_compact_(mark_compact) {}
4279
operator ()(ObjPtr<mirror::Object> obj) const4280 void operator()(ObjPtr<mirror::Object> obj) const
4281 ALWAYS_INLINE
4282 REQUIRES(Locks::heap_bitmap_lock_)
4283 REQUIRES_SHARED(Locks::mutator_lock_) {
4284 mark_compact_->ScanObject</*kUpdateLiveWords*/ false>(obj.Ptr());
4285 }
4286
4287 private:
4288 MarkCompact* const mark_compact_;
4289 };
4290
UpdateAndMarkModUnion()4291 void MarkCompact::UpdateAndMarkModUnion() {
4292 accounting::CardTable* const card_table = heap_->GetCardTable();
4293 for (const auto& space : immune_spaces_.GetSpaces()) {
4294 const char* name = space->IsZygoteSpace()
4295 ? "UpdateAndMarkZygoteModUnionTable"
4296 : "UpdateAndMarkImageModUnionTable";
4297 DCHECK(space->IsZygoteSpace() || space->IsImageSpace()) << *space;
4298 TimingLogger::ScopedTiming t(name, GetTimings());
4299 accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
4300 if (table != nullptr) {
4301 // UpdateAndMarkReferences() doesn't visit Reference-type objects. But
4302 // that's fine because these objects are immutable enough (referent can
4303 // only be cleared) and hence the only referents they can have are intra-space.
4304 table->UpdateAndMarkReferences(this);
4305 } else {
4306 // No mod-union table, scan all dirty/aged cards in the corresponding
4307 // card-table. This can only occur for app images.
4308 card_table->Scan</*kClearCard*/ false>(space->GetMarkBitmap(),
4309 space->Begin(),
4310 space->End(),
4311 ScanObjectVisitor(this),
4312 gc::accounting::CardTable::kCardAged);
4313 }
4314 }
4315 }
4316
ScanOldGenObjects()4317 void MarkCompact::ScanOldGenObjects() {
4318 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4319 accounting::CardTable* const card_table = heap_->GetCardTable();
4320 // Moving space
4321 card_table->Scan</*kClearCard=*/false>(moving_space_bitmap_,
4322 moving_space_begin_,
4323 old_gen_end_,
4324 ScanObjectVisitor(this),
4325 gc::accounting::CardTable::kCardAged2);
4326 ProcessMarkStack();
4327 // Non-moving space
4328 card_table->Scan</*kClearCard=*/false>(non_moving_space_bitmap_,
4329 non_moving_space_->Begin(),
4330 non_moving_space_->End(),
4331 ScanObjectVisitor(this),
4332 gc::accounting::CardTable::kCardAged2);
4333 ProcessMarkStack();
4334 }
4335
MarkReachableObjects()4336 void MarkCompact::MarkReachableObjects() {
4337 UpdateAndMarkModUnion();
4338 // Recursively mark all the non-image bits set in the mark bitmap.
4339 ProcessMarkStack();
4340 if (young_gen_) {
4341 // For the object overlapping on the old-gen boundary, we need to visit it
4342 // to make sure that we don't miss the references in the mid-gen area, and
4343 // also update the corresponding liveness info.
4344 if (old_gen_end_ > moving_space_begin_) {
4345 uintptr_t old_gen_end = reinterpret_cast<uintptr_t>(old_gen_end_);
4346 mirror::Object* obj = moving_space_bitmap_->FindPrecedingObject(old_gen_end - kAlignment);
4347 if (obj != nullptr) {
4348 size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>();
4349 if (reinterpret_cast<uintptr_t>(obj) + RoundUp(obj_size, kAlignment) > old_gen_end) {
4350 ScanObject</*kUpdateLiveWords=*/true>(obj);
4351 }
4352 }
4353 }
4354 ScanOldGenObjects();
4355 }
4356 }
4357
ScanDirtyObjects(bool paused,uint8_t minimum_age)4358 void MarkCompact::ScanDirtyObjects(bool paused, uint8_t minimum_age) {
4359 accounting::CardTable* card_table = heap_->GetCardTable();
4360 for (const auto& space : heap_->GetContinuousSpaces()) {
4361 const char* name = nullptr;
4362 switch (space->GetGcRetentionPolicy()) {
4363 case space::kGcRetentionPolicyNeverCollect:
4364 name = paused ? "(Paused)ScanGrayImmuneSpaceObjects" : "ScanGrayImmuneSpaceObjects";
4365 break;
4366 case space::kGcRetentionPolicyFullCollect:
4367 name = paused ? "(Paused)ScanGrayZygoteSpaceObjects" : "ScanGrayZygoteSpaceObjects";
4368 break;
4369 case space::kGcRetentionPolicyAlwaysCollect:
4370 DCHECK(space == bump_pointer_space_ || space == non_moving_space_);
4371 name = paused ? "(Paused)ScanGrayAllocSpaceObjects" : "ScanGrayAllocSpaceObjects";
4372 break;
4373 }
4374 TimingLogger::ScopedTiming t(name, GetTimings());
4375 if (paused && use_generational_ &&
4376 space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
4377 DCHECK_EQ(minimum_age, accounting::CardTable::kCardDirty);
4378 auto mod_visitor = [](uint8_t* card, uint8_t cur_val) {
4379 DCHECK_EQ(cur_val, accounting::CardTable::kCardDirty);
4380 *card = accounting::CardTable::kCardAged;
4381 };
4382
4383 card_table->Scan</*kClearCard=*/false>(space->GetMarkBitmap(),
4384 space->Begin(),
4385 space->End(),
4386 ScanObjectVisitor(this),
4387 mod_visitor,
4388 minimum_age);
4389 } else {
4390 card_table->Scan</*kClearCard=*/false>(space->GetMarkBitmap(),
4391 space->Begin(),
4392 space->End(),
4393 ScanObjectVisitor(this),
4394 minimum_age);
4395 }
4396 ProcessMarkStack();
4397 }
4398 }
4399
RecursiveMarkDirtyObjects(bool paused,uint8_t minimum_age)4400 void MarkCompact::RecursiveMarkDirtyObjects(bool paused, uint8_t minimum_age) {
4401 ScanDirtyObjects(paused, minimum_age);
4402 CHECK(mark_stack_->IsEmpty());
4403 }
4404
MarkRoots(VisitRootFlags flags)4405 void MarkCompact::MarkRoots(VisitRootFlags flags) {
4406 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4407 Runtime* runtime = Runtime::Current();
4408 // Make sure that the checkpoint which collects the stack roots is the first
4409 // one capturning GC-roots. As this one is supposed to find the address
4410 // everything allocated after that (during this marking phase) will be
4411 // considered 'marked'.
4412 MarkRootsCheckpoint(thread_running_gc_, runtime);
4413 MarkNonThreadRoots(runtime);
4414 MarkConcurrentRoots(flags, runtime);
4415 }
4416
PreCleanCards()4417 void MarkCompact::PreCleanCards() {
4418 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4419 CHECK(!Locks::mutator_lock_->IsExclusiveHeld(thread_running_gc_));
4420 // Age the card-table before thread stack scanning checkpoint in MarkRoots()
4421 // as it ensures that there are no in-progress write barriers which started
4422 // prior to aging the card-table.
4423 PrepareForMarking(/*pre_marking=*/false);
4424 MarkRoots(static_cast<VisitRootFlags>(kVisitRootFlagClearRootLog | kVisitRootFlagNewRoots));
4425 RecursiveMarkDirtyObjects(/*paused*/ false, accounting::CardTable::kCardDirty - 1);
4426 }
4427
4428 // In a concurrent marking algorithm, if we are not using a write/read barrier, as
4429 // in this case, then we need a stop-the-world (STW) round in the end to mark
4430 // objects which were written into concurrently while concurrent marking was
4431 // performed.
4432 // In order to minimize the pause time, we could take one of the two approaches:
4433 // 1. Keep repeating concurrent marking of dirty cards until the time spent goes
4434 // below a threshold.
4435 // 2. Do two rounds concurrently and then attempt a paused one. If we figure
4436 // that it's taking too long, then resume mutators and retry.
4437 //
4438 // Given the non-trivial fixed overhead of running a round (card table and root
4439 // scan), it might be better to go with approach 2.
MarkingPhase()4440 void MarkCompact::MarkingPhase() {
4441 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4442 DCHECK_EQ(thread_running_gc_, Thread::Current());
4443 WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
4444 MaybeClampGcStructures();
4445 PrepareForMarking(/*pre_marking=*/true);
4446 MarkZygoteLargeObjects();
4447 MarkRoots(
4448 static_cast<VisitRootFlags>(kVisitRootFlagAllRoots | kVisitRootFlagStartLoggingNewRoots));
4449 MarkReachableObjects();
4450 // Pre-clean dirtied cards to reduce pauses.
4451 PreCleanCards();
4452
4453 // Setup reference processing and forward soft references once before enabling
4454 // slow path (in MarkingPause)
4455 ReferenceProcessor* rp = GetHeap()->GetReferenceProcessor();
4456 bool clear_soft_references = GetCurrentIteration()->GetClearSoftReferences();
4457 rp->Setup(thread_running_gc_, this, /*concurrent=*/ true, clear_soft_references);
4458 if (!clear_soft_references) {
4459 // Forward as many SoftReferences as possible before inhibiting reference access.
4460 rp->ForwardSoftReferences(GetTimings());
4461 }
4462 }
4463
4464 class MarkCompact::RefFieldsVisitor {
4465 public:
RefFieldsVisitor(MarkCompact * const mark_compact)4466 ALWAYS_INLINE RefFieldsVisitor(MarkCompact* const mark_compact)
4467 : mark_compact_(mark_compact),
4468 young_gen_begin_(mark_compact->mid_gen_end_),
4469 young_gen_end_(mark_compact->moving_space_end_),
4470 dirty_card_(false),
4471 // Ideally we should only check for objects outside young-gen. However,
4472 // the boundary of young-gen can change later in PrepareForCompaction()
4473 // as we need the mid-gen-end to be page-aligned. Since most of the
4474 // objects don't have native-roots, it's not too costly to check all
4475 // objects being visited during marking.
4476 check_native_roots_to_young_gen_(mark_compact->use_generational_) {}
4477
ShouldDirtyCard() const4478 bool ShouldDirtyCard() const { return dirty_card_; }
4479
operator ()(mirror::Object * obj,MemberOffset offset,bool is_static) const4480 ALWAYS_INLINE void operator()(mirror::Object* obj,
4481 MemberOffset offset,
4482 [[maybe_unused]] bool is_static) const
4483 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4484 if (kCheckLocks) {
4485 Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
4486 Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
4487 }
4488 mirror::Object* ref = obj->GetFieldObject<mirror::Object>(offset);
4489 mark_compact_->MarkObject(ref, obj, offset);
4490 }
4491
operator ()(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref) const4492 void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) const ALWAYS_INLINE
4493 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4494 mark_compact_->DelayReferenceReferent(klass, ref);
4495 }
4496
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const4497 void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
4498 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4499 if (!root->IsNull()) {
4500 VisitRoot(root);
4501 }
4502 }
4503
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const4504 void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
4505 REQUIRES(Locks::heap_bitmap_lock_)
4506 REQUIRES_SHARED(Locks::mutator_lock_) {
4507 if (kCheckLocks) {
4508 Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
4509 Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
4510 }
4511 mirror::Object* ref = root->AsMirrorPtr();
4512 mark_compact_->MarkObject(ref);
4513 if (check_native_roots_to_young_gen_) {
4514 dirty_card_ |= reinterpret_cast<uint8_t*>(ref) >= young_gen_begin_ &&
4515 reinterpret_cast<uint8_t*>(ref) < young_gen_end_;
4516 }
4517 }
4518
4519 private:
4520 MarkCompact* const mark_compact_;
4521 uint8_t* const young_gen_begin_;
4522 uint8_t* const young_gen_end_;
4523 mutable bool dirty_card_;
4524 const bool check_native_roots_to_young_gen_;
4525 };
4526
4527 template <size_t kAlignment>
LiveBytesInBitmapWord(size_t chunk_idx) const4528 size_t MarkCompact::LiveWordsBitmap<kAlignment>::LiveBytesInBitmapWord(size_t chunk_idx) const {
4529 const size_t index = chunk_idx * kBitmapWordsPerVectorWord;
4530 size_t words = 0;
4531 for (uint32_t i = 0; i < kBitmapWordsPerVectorWord; i++) {
4532 words += POPCOUNT(Bitmap::Begin()[index + i]);
4533 }
4534 return words * kAlignment;
4535 }
4536
UpdateLivenessInfo(mirror::Object * obj,size_t obj_size)4537 void MarkCompact::UpdateLivenessInfo(mirror::Object* obj, size_t obj_size) {
4538 DCHECK(obj != nullptr);
4539 DCHECK_EQ(obj_size, obj->SizeOf<kDefaultVerifyFlags>());
4540 uintptr_t obj_begin = reinterpret_cast<uintptr_t>(obj);
4541 UpdateClassAfterObjectMap(obj);
4542 size_t size = RoundUp(obj_size, kAlignment);
4543 uintptr_t bit_index = live_words_bitmap_->SetLiveWords(obj_begin, size);
4544 size_t chunk_idx =
4545 (obj_begin - reinterpret_cast<uintptr_t>(moving_space_begin_)) / kOffsetChunkSize;
4546 // Compute the bit-index within the chunk-info vector word.
4547 bit_index %= kBitsPerVectorWord;
4548 size_t first_chunk_portion = std::min(size, (kBitsPerVectorWord - bit_index) * kAlignment);
4549 chunk_info_vec_[chunk_idx] += first_chunk_portion;
4550 DCHECK_LE(chunk_info_vec_[chunk_idx], kOffsetChunkSize)
4551 << "first_chunk_portion:" << first_chunk_portion
4552 << " obj-size:" << RoundUp(obj_size, kAlignment);
4553 chunk_idx++;
4554 DCHECK_LE(first_chunk_portion, size);
4555 for (size -= first_chunk_portion; size > kOffsetChunkSize; size -= kOffsetChunkSize) {
4556 DCHECK_EQ(chunk_info_vec_[chunk_idx], 0u);
4557 chunk_info_vec_[chunk_idx++] = kOffsetChunkSize;
4558 }
4559 chunk_info_vec_[chunk_idx] += size;
4560 DCHECK_LE(chunk_info_vec_[chunk_idx], kOffsetChunkSize)
4561 << "size:" << size << " obj-size:" << RoundUp(obj_size, kAlignment);
4562 }
4563
4564 template <bool kUpdateLiveWords>
ScanObject(mirror::Object * obj)4565 void MarkCompact::ScanObject(mirror::Object* obj) {
4566 mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
4567 // TODO(lokeshgidra): Remove the following condition once b/373609505 is fixed.
4568 if (UNLIKELY(klass == nullptr)) {
4569 // It was seen in ConcurrentCopying GC that after a small wait when we reload
4570 // the class pointer, it turns out to be a valid class object. So as a workaround,
4571 // we can continue execution and log an error that this happened.
4572 for (size_t i = 0; i < 1000; i++) {
4573 // Wait for 1ms at a time. Don't wait for more than 1 second in total.
4574 usleep(1000);
4575 klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
4576 if (klass != nullptr) {
4577 break;
4578 }
4579 }
4580 if (klass == nullptr) {
4581 // It must be heap corruption.
4582 LOG(FATAL_WITHOUT_ABORT) << "klass pointer for obj: " << obj << " found to be null."
4583 << " black_dense_end: " << static_cast<void*>(black_dense_end_)
4584 << " mid_gen_end: " << static_cast<void*>(mid_gen_end_)
4585 << " prev_post_compact_end: " << prev_post_compact_end_
4586 << " prev_black_allocations_begin: " << prev_black_allocations_begin_
4587 << " prev_black_dense_end: " << prev_black_dense_end_
4588 << " prev_gc_young: " << prev_gc_young_
4589 << " prev_gc_performed_compaction: "
4590 << prev_gc_performed_compaction_;
4591 heap_->GetVerification()->LogHeapCorruption(
4592 obj, mirror::Object::ClassOffset(), klass, /*fatal=*/true);
4593 }
4594 }
4595 // The size of `obj` is used both here (to update `bytes_scanned_`) and in
4596 // `UpdateLivenessInfo`. As fetching this value can be expensive, do it once
4597 // here and pass that information to `UpdateLivenessInfo`.
4598 size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>();
4599 bytes_scanned_ += obj_size;
4600
4601 RefFieldsVisitor visitor(this);
4602 DCHECK(IsMarked(obj)) << "Scanning marked object " << obj << "\n" << heap_->DumpSpaces();
4603 if (kUpdateLiveWords && HasAddress(obj)) {
4604 UpdateLivenessInfo(obj, obj_size);
4605 freed_objects_--;
4606 }
4607 obj->VisitReferences(visitor, visitor);
4608 // old-gen cards for objects containing references to mid-gen needs to be kept
4609 // dirty for re-scan in the next GC cycle. We take care of that majorly during
4610 // compaction-phase as that enables us to implicitly take care of
4611 // black-allocated objects as well. Unfortunately, since we don't visit
4612 // native-roots during compaction, that has to be captured during marking.
4613 //
4614 // Note that we can't dirty the cards right away because then we will wrongly
4615 // age them during re-scan of this marking-phase, and thereby may loose them
4616 // by the end of the GC cycle.
4617 if (visitor.ShouldDirtyCard()) {
4618 dirty_cards_later_vec_.push_back(obj);
4619 }
4620 }
4621
4622 // Scan anything that's on the mark stack.
ProcessMarkStack()4623 void MarkCompact::ProcessMarkStack() {
4624 // TODO: eventually get rid of this as we now call this function quite a few times.
4625 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4626 // TODO: try prefetch like in CMS
4627 while (!mark_stack_->IsEmpty()) {
4628 mirror::Object* obj = mark_stack_->PopBack();
4629 DCHECK(obj != nullptr);
4630 ScanObject</*kUpdateLiveWords*/ true>(obj);
4631 }
4632 }
4633
ExpandMarkStack()4634 void MarkCompact::ExpandMarkStack() {
4635 const size_t new_size = mark_stack_->Capacity() * 2;
4636 std::vector<StackReference<mirror::Object>> temp(mark_stack_->Begin(),
4637 mark_stack_->End());
4638 mark_stack_->Resize(new_size);
4639 for (auto& ref : temp) {
4640 mark_stack_->PushBack(ref.AsMirrorPtr());
4641 }
4642 DCHECK(!mark_stack_->IsFull());
4643 }
4644
PushOnMarkStack(mirror::Object * obj)4645 inline void MarkCompact::PushOnMarkStack(mirror::Object* obj) {
4646 if (UNLIKELY(mark_stack_->IsFull())) {
4647 ExpandMarkStack();
4648 }
4649 mark_stack_->PushBack(obj);
4650 }
4651
MarkObjectNonNull(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4652 inline void MarkCompact::MarkObjectNonNull(mirror::Object* obj,
4653 mirror::Object* holder,
4654 MemberOffset offset) {
4655 DCHECK(obj != nullptr);
4656 if (MarkObjectNonNullNoPush</*kParallel*/false>(obj, holder, offset)) {
4657 PushOnMarkStack(obj);
4658 }
4659 }
4660
4661 template <bool kParallel>
MarkObjectNonNullNoPush(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4662 inline bool MarkCompact::MarkObjectNonNullNoPush(mirror::Object* obj,
4663 mirror::Object* holder,
4664 MemberOffset offset) {
4665 // We expect most of the referenes to be in bump-pointer space, so try that
4666 // first to keep the cost of this function minimal.
4667 if (LIKELY(HasAddress(obj))) {
4668 // If obj is in old-gen (during young-gc) then we shouldn't add it to
4669 // mark-stack to limit marking to young generation.
4670 if (young_gen_ && reinterpret_cast<uint8_t*>(obj) < old_gen_end_) {
4671 DCHECK(moving_space_bitmap_->Test(obj));
4672 return false;
4673 }
4674 return kParallel ? !moving_space_bitmap_->AtomicTestAndSet(obj)
4675 : !moving_space_bitmap_->Set(obj);
4676 } else if (non_moving_space_bitmap_->HasAddress(obj)) {
4677 return kParallel ? !non_moving_space_bitmap_->AtomicTestAndSet(obj)
4678 : !non_moving_space_bitmap_->Set(obj);
4679 } else if (immune_spaces_.ContainsObject(obj)) {
4680 DCHECK(IsMarked(obj) != nullptr);
4681 return false;
4682 } else {
4683 // Must be a large-object space, otherwise it's a case of heap corruption.
4684 if (!IsAlignedParam(obj, space::LargeObjectSpace::ObjectAlignment())) {
4685 // Objects in large-object space are aligned to the large-object alignment.
4686 // So if we have an object which doesn't belong to any space and is not
4687 // page-aligned as well, then it's memory corruption.
4688 // TODO: implement protect/unprotect in bump-pointer space.
4689 heap_->GetVerification()->LogHeapCorruption(holder, offset, obj, /*fatal*/ true);
4690 }
4691 DCHECK_NE(heap_->GetLargeObjectsSpace(), nullptr)
4692 << "ref=" << obj
4693 << " doesn't belong to any of the spaces and large object space doesn't exist";
4694 accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
4695 DCHECK(los_bitmap->HasAddress(obj));
4696 if (kParallel) {
4697 los_bitmap->AtomicTestAndSet(obj);
4698 } else {
4699 los_bitmap->Set(obj);
4700 }
4701 // We only have primitive arrays in large object space. So there is no
4702 // reason to push into mark-stack.
4703 DCHECK(obj->IsString() || (obj->IsArrayInstance() && !obj->IsObjectArray()));
4704 return false;
4705 }
4706 }
4707
MarkObject(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4708 inline void MarkCompact::MarkObject(mirror::Object* obj,
4709 mirror::Object* holder,
4710 MemberOffset offset) {
4711 if (obj != nullptr) {
4712 MarkObjectNonNull(obj, holder, offset);
4713 }
4714 }
4715
MarkObject(mirror::Object * obj)4716 mirror::Object* MarkCompact::MarkObject(mirror::Object* obj) {
4717 MarkObject(obj, nullptr, MemberOffset(0));
4718 return obj;
4719 }
4720
MarkHeapReference(mirror::HeapReference<mirror::Object> * obj,bool do_atomic_update)4721 void MarkCompact::MarkHeapReference(mirror::HeapReference<mirror::Object>* obj,
4722 [[maybe_unused]] bool do_atomic_update) {
4723 MarkObject(obj->AsMirrorPtr(), nullptr, MemberOffset(0));
4724 }
4725
VisitRoots(mirror::Object *** roots,size_t count,const RootInfo & info)4726 void MarkCompact::VisitRoots(mirror::Object*** roots,
4727 size_t count,
4728 const RootInfo& info) {
4729 if (compacting_) {
4730 uint8_t* moving_space_begin = black_dense_end_;
4731 uint8_t* moving_space_end = moving_space_end_;
4732 for (size_t i = 0; i < count; ++i) {
4733 UpdateRoot(roots[i], moving_space_begin, moving_space_end, info);
4734 }
4735 } else {
4736 for (size_t i = 0; i < count; ++i) {
4737 MarkObjectNonNull(*roots[i]);
4738 }
4739 }
4740 }
4741
VisitRoots(mirror::CompressedReference<mirror::Object> ** roots,size_t count,const RootInfo & info)4742 void MarkCompact::VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
4743 size_t count,
4744 const RootInfo& info) {
4745 // TODO: do we need to check if the root is null or not?
4746 if (compacting_) {
4747 uint8_t* moving_space_begin = black_dense_end_;
4748 uint8_t* moving_space_end = moving_space_end_;
4749 for (size_t i = 0; i < count; ++i) {
4750 UpdateRoot(roots[i], moving_space_begin, moving_space_end, info);
4751 }
4752 } else {
4753 for (size_t i = 0; i < count; ++i) {
4754 MarkObjectNonNull(roots[i]->AsMirrorPtr());
4755 }
4756 }
4757 }
4758
IsMarked(mirror::Object * obj)4759 mirror::Object* MarkCompact::IsMarked(mirror::Object* obj) {
4760 if (HasAddress(obj)) {
4761 const bool is_black = reinterpret_cast<uint8_t*>(obj) >= black_allocations_begin_;
4762 if (compacting_) {
4763 if (is_black) {
4764 return PostCompactBlackObjAddr(obj);
4765 } else if (moving_space_bitmap_->Test(obj)) {
4766 if (reinterpret_cast<uint8_t*>(obj) < black_dense_end_) {
4767 return obj;
4768 } else {
4769 return PostCompactOldObjAddr(obj);
4770 }
4771 } else {
4772 return nullptr;
4773 }
4774 }
4775 return (is_black || moving_space_bitmap_->Test(obj)) ? obj : nullptr;
4776 } else if (non_moving_space_bitmap_->HasAddress(obj)) {
4777 if (non_moving_space_bitmap_->Test(obj)) {
4778 return obj;
4779 }
4780 } else if (immune_spaces_.ContainsObject(obj)) {
4781 return obj;
4782 } else {
4783 DCHECK(heap_->GetLargeObjectsSpace())
4784 << "ref=" << obj
4785 << " doesn't belong to any of the spaces and large object space doesn't exist";
4786 accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
4787 if (los_bitmap->HasAddress(obj)) {
4788 DCHECK(IsAlignedParam(obj, space::LargeObjectSpace::ObjectAlignment()));
4789 if (los_bitmap->Test(obj)) {
4790 return obj;
4791 }
4792 } else {
4793 // The given obj is not in any of the known spaces, so return null. This could
4794 // happen for instance in interpreter caches wherein a concurrent updation
4795 // to the cache could result in obj being a non-reference. This is
4796 // tolerable because SweepInterpreterCaches only updates if the given
4797 // object has moved, which can't be the case for the non-reference.
4798 return nullptr;
4799 }
4800 }
4801 return marking_done_ && IsOnAllocStack(obj) ? obj : nullptr;
4802 }
4803
IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object> * obj,bool do_atomic_update)4804 bool MarkCompact::IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj,
4805 [[maybe_unused]] bool do_atomic_update) {
4806 mirror::Object* ref = obj->AsMirrorPtr();
4807 if (ref == nullptr) {
4808 return true;
4809 }
4810 return IsMarked(ref);
4811 }
4812
4813 // Process the 'referent' field in a java.lang.ref.Reference. If the referent
4814 // has not yet been marked, put it on the appropriate list in the heap for later
4815 // processing.
DelayReferenceReferent(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref)4816 void MarkCompact::DelayReferenceReferent(ObjPtr<mirror::Class> klass,
4817 ObjPtr<mirror::Reference> ref) {
4818 heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, this);
4819 }
4820
4821 template <typename Visitor>
4822 class MarkCompact::VisitReferencesVisitor {
4823 public:
VisitReferencesVisitor(Visitor visitor)4824 explicit VisitReferencesVisitor(Visitor visitor) : visitor_(visitor) {}
4825
operator ()(mirror::Object * obj,MemberOffset offset,bool is_static) const4826 ALWAYS_INLINE void operator()(mirror::Object* obj,
4827 MemberOffset offset,
4828 [[maybe_unused]] bool is_static) const
4829 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4830 visitor_(obj->GetFieldObject<mirror::Object>(offset));
4831 }
4832
operator ()(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref) const4833 ALWAYS_INLINE void operator()([[maybe_unused]] ObjPtr<mirror::Class> klass,
4834 ObjPtr<mirror::Reference> ref) const
4835 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4836 visitor_(ref.Ptr());
4837 }
4838
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const4839 void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
4840 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4841 if (!root->IsNull()) {
4842 VisitRoot(root);
4843 }
4844 }
4845
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const4846 void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
4847 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
4848 visitor_(root->AsMirrorPtr());
4849 }
4850
4851 private:
4852 Visitor visitor_;
4853 };
4854
VerifyNoMissingCardMarks()4855 void MarkCompact::VerifyNoMissingCardMarks() {
4856 if (kVerifyNoMissingCardMarks) {
4857 accounting::CardTable* card_table = heap_->GetCardTable();
4858 auto obj_visitor = [&](mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) {
4859 bool found = false;
4860 VisitReferencesVisitor visitor(
4861 [begin = old_gen_end_, end = moving_space_end_, &found](mirror::Object* ref) {
4862 found |= ref >= reinterpret_cast<mirror::Object*>(begin) &&
4863 ref < reinterpret_cast<mirror::Object*>(end);
4864 });
4865 obj->VisitReferences</*kVisitNativeRoots=*/true>(visitor, visitor);
4866 if (found) {
4867 size_t obj_size = RoundUp(obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
4868 if (!card_table->IsDirty(obj) &&
4869 reinterpret_cast<uint8_t*>(obj) + obj_size <= old_gen_end_) {
4870 std::ostringstream oss;
4871 obj->DumpReferences</*kDumpNativeRoots=*/true>(oss, /*dump_type_of=*/true);
4872 LOG(FATAL_WITHOUT_ABORT)
4873 << "Object " << obj << " (" << obj->PrettyTypeOf()
4874 << ") has references to mid-gen/young-gen:"
4875 << "\n obj-size = " << obj_size
4876 << "\n old-gen-end = " << static_cast<void*>(old_gen_end_)
4877 << "\n mid-gen-end = " << static_cast<void*>(mid_gen_end_) << "\n references =\n"
4878 << oss.str();
4879 heap_->GetVerification()->LogHeapCorruption(
4880 /*holder=*/nullptr, MemberOffset(0), obj, /*fatal=*/true);
4881 }
4882 }
4883 };
4884 moving_space_bitmap_->VisitMarkedRange(reinterpret_cast<uintptr_t>(moving_space_begin_),
4885 reinterpret_cast<uintptr_t>(old_gen_end_),
4886 obj_visitor);
4887 }
4888 }
4889
VerifyPostGCObjects(bool performed_compaction,uint8_t * mark_bitmap_clear_end)4890 void MarkCompact::VerifyPostGCObjects(bool performed_compaction, uint8_t* mark_bitmap_clear_end) {
4891 if (kVerifyPostGCObjects) {
4892 mirror::Object* last_visited_obj = nullptr;
4893 auto obj_visitor =
4894 [&](mirror::Object* obj, bool verify_bitmap = false) REQUIRES_SHARED(Locks::mutator_lock_) {
4895 std::vector<mirror::Object*> invalid_refs;
4896 if (verify_bitmap && !moving_space_bitmap_->Test(obj)) {
4897 LOG(FATAL) << "Obj " << obj << " (" << obj->PrettyTypeOf()
4898 << ") doesn't have mark-bit set"
4899 << "\n prev-black-dense-end = " << static_cast<void*>(prev_black_dense_end_)
4900 << "\n old-gen-end = " << static_cast<void*>(old_gen_end_)
4901 << "\n mid-gen-end = " << static_cast<void*>(mid_gen_end_);
4902 }
4903 VisitReferencesVisitor visitor(
4904 [verification = heap_->GetVerification(), &invalid_refs](mirror::Object* ref)
4905 REQUIRES_SHARED(Locks::mutator_lock_) {
4906 if (ref != nullptr && !verification->IsValidObject(ref)) {
4907 invalid_refs.push_back(ref);
4908 }
4909 });
4910 obj->VisitReferences</*kVisitNativeRoots=*/true>(visitor, visitor);
4911 if (!invalid_refs.empty()) {
4912 std::ostringstream oss;
4913 for (mirror::Object* ref : invalid_refs) {
4914 oss << ref << " ";
4915 }
4916 LOG(FATAL_WITHOUT_ABORT)
4917 << "Object " << obj << " (" << obj->PrettyTypeOf() << ") has invalid references:\n"
4918 << oss.str() << "\ncard = " << static_cast<int>(heap_->GetCardTable()->GetCard(obj))
4919 << "\n prev-black-dense-end = " << static_cast<void*>(prev_black_dense_end_)
4920 << "\n old-gen-end = " << static_cast<void*>(old_gen_end_)
4921 << "\n mid-gen-end = " << static_cast<void*>(mid_gen_end_)
4922 << "\n black-allocations-begin = " << static_cast<void*>(black_allocations_begin_);
4923
4924 // Calling PrettyTypeOf() on a stale reference mostly results in segfault.
4925 oss.str("");
4926 obj->DumpReferences</*kDumpNativeRoots=*/true>(oss, /*dump_type_of=*/false);
4927 LOG(FATAL_WITHOUT_ABORT) << "\n references =\n" << oss.str();
4928
4929 heap_->GetVerification()->LogHeapCorruption(
4930 /*holder=*/nullptr, MemberOffset(0), obj, /*fatal=*/true);
4931 }
4932 last_visited_obj = obj;
4933 };
4934 non_moving_space_bitmap_->VisitAllMarked(obj_visitor);
4935 last_visited_obj = nullptr;
4936 // We should verify all objects that have survived, which means old and mid-gen
4937 // Objects that were promoted to old-gen and mid-gen in this GC cycle are tightly
4938 // packed, except if compaction was not performed. So we use object size to walk
4939 // the heap and also verify that the mark-bit is set in the tightly packed portion.
4940 moving_space_bitmap_->VisitMarkedRange(
4941 reinterpret_cast<uintptr_t>(moving_space_begin_),
4942 reinterpret_cast<uintptr_t>(performed_compaction ? prev_black_dense_end_
4943 : mark_bitmap_clear_end),
4944 obj_visitor);
4945 if (performed_compaction) {
4946 mirror::Object* obj = last_visited_obj;
4947 if (obj == nullptr || AlignUp(reinterpret_cast<uint8_t*>(obj) + obj->SizeOf(), kAlignment) <
4948 prev_black_dense_end_) {
4949 obj = reinterpret_cast<mirror::Object*>(prev_black_dense_end_);
4950 }
4951 while (reinterpret_cast<uint8_t*>(obj) < mid_gen_end_ && obj->GetClass() != nullptr) {
4952 // Objects in mid-gen will not have their corresponding mark-bits set.
4953 obj_visitor(obj, reinterpret_cast<void*>(obj) < black_dense_end_);
4954 uintptr_t next = reinterpret_cast<uintptr_t>(obj) + obj->SizeOf();
4955 obj = reinterpret_cast<mirror::Object*>(RoundUp(next, kAlignment));
4956 }
4957 }
4958 }
4959 }
4960
FinishPhase(bool performed_compaction)4961 void MarkCompact::FinishPhase(bool performed_compaction) {
4962 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4963 GetCurrentIteration()->SetScannedBytes(bytes_scanned_);
4964 bool is_zygote = Runtime::Current()->IsZygote();
4965 compacting_ = false;
4966 marking_done_ = false;
4967 uint8_t* mark_bitmap_clear_end = black_dense_end_;
4968 LOG(DEBUG) << "ART-GC black_dense_end:" << static_cast<void*>(black_dense_end_)
4969 << " mid_gen_end:" << static_cast<void*>(mid_gen_end_)
4970 << " post_compact_end:" << static_cast<void*>(post_compact_end_)
4971 << " black_allocations_begin:" << static_cast<void*>(black_allocations_begin_)
4972 << " young:" << young_gen_ << " performed_compaction:" << performed_compaction;
4973
4974 // Retain values of some fields for logging in next GC cycle, in case there is
4975 // a memory corruption detected.
4976 prev_black_allocations_begin_ = static_cast<void*>(black_allocations_begin_);
4977 prev_black_dense_end_ = static_cast<void*>(black_dense_end_);
4978 prev_post_compact_end_ = static_cast<void*>(post_compact_end_);
4979 prev_gc_young_ = young_gen_;
4980 prev_gc_performed_compaction_ = performed_compaction;
4981
4982 // Whether compaction is performend or not, we always set post_compact_end_
4983 // before reaching here.
4984 CHECK_NE(post_compact_end_, nullptr);
4985 if (use_generational_) {
4986 {
4987 ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_);
4988 // We need to retain and update class-after-object map for old-gen as
4989 // that won't be created in next young-gc.
4990 // Jump to the first class which is getting promoted to old-gen. Since
4991 // it is not compacted, references into old-gen don't need to be udated.
4992 // All pairs in mid-gen will be updated with post-compact addresses and
4993 // retained, as mid-gen is getting consumed into old-gen now. All pairs
4994 // after mid-gen will be erased as they are not required in next GC cycle.
4995 auto iter = class_after_obj_map_.lower_bound(
4996 ObjReference::FromMirrorPtr(reinterpret_cast<mirror::Object*>(old_gen_end_)));
4997 while (iter != class_after_obj_map_.end()) {
4998 mirror::Object* klass = iter->first.AsMirrorPtr();
4999 mirror::Object* obj = iter->second.AsMirrorPtr();
5000 DCHECK_GT(klass, obj);
5001 // Black allocations begin after marking-pause. Therefore, we cannot
5002 // have a situation wherein class is allocated after the pause while its
5003 // object is before.
5004 if (reinterpret_cast<uint8_t*>(klass) >= black_allocations_begin_) {
5005 for (auto it = iter; it != class_after_obj_map_.end(); it++) {
5006 DCHECK_GE(reinterpret_cast<uint8_t*>(it->second.AsMirrorPtr()),
5007 black_allocations_begin_);
5008 }
5009 class_after_obj_map_.erase(iter, class_after_obj_map_.end());
5010 break;
5011 }
5012
5013 DCHECK(moving_space_bitmap_->Test(klass));
5014 DCHECK(moving_space_bitmap_->Test(obj));
5015 // As 'mid_gen_end_' is where our old-gen will end now, compute compacted
5016 // addresses of <class, object> for comparisons and updating in the map.
5017 mirror::Object* compacted_klass = klass;
5018 mirror::Object* compacted_obj = obj;
5019 if (performed_compaction) {
5020 compacted_klass = PostCompactAddress(klass, old_gen_end_, moving_space_end_);
5021 compacted_obj = PostCompactAddress(obj, old_gen_end_, moving_space_end_);
5022 DCHECK_GT(compacted_klass, compacted_obj);
5023 }
5024 if (reinterpret_cast<uint8_t*>(compacted_obj) >= mid_gen_end_) {
5025 iter = class_after_obj_map_.erase(iter);
5026 continue;
5027 } else if (mid_to_old_promo_bit_vec_.get() != nullptr) {
5028 if (reinterpret_cast<uint8_t*>(compacted_klass) >= old_gen_end_) {
5029 DCHECK(mid_to_old_promo_bit_vec_->IsBitSet(
5030 (reinterpret_cast<uint8_t*>(compacted_obj) - old_gen_end_) / kAlignment));
5031 }
5032 if (reinterpret_cast<uint8_t*>(compacted_klass) < mid_gen_end_) {
5033 DCHECK(mid_to_old_promo_bit_vec_->IsBitSet(
5034 (reinterpret_cast<uint8_t*>(compacted_klass) - old_gen_end_) / kAlignment));
5035 }
5036 }
5037 if (performed_compaction) {
5038 auto nh = class_after_obj_map_.extract(iter++);
5039 nh.key() = ObjReference::FromMirrorPtr(compacted_klass);
5040 nh.mapped() = ObjReference::FromMirrorPtr(compacted_obj);
5041 auto success = class_after_obj_map_.insert(iter, std::move(nh));
5042 CHECK_EQ(success->first.AsMirrorPtr(), compacted_klass);
5043 } else {
5044 iter++;
5045 }
5046 }
5047
5048 // Dirty the cards for objects captured from native-roots during marking-phase.
5049 accounting::CardTable* card_table = heap_->GetCardTable();
5050 for (auto obj : dirty_cards_later_vec_) {
5051 // Only moving and non-moving spaces are relevant as the remaining
5052 // spaces are all immune-spaces which anyways use card-table.
5053 if (HasAddress(obj)) {
5054 // Objects in young-gen that refer to other young-gen objects don't
5055 // need to be tracked.
5056 // The vector contains pre-compact object references whereas
5057 // 'mid_gen_end_' is post-compact boundary. So compare against
5058 // post-compact object reference.
5059 mirror::Object* compacted_obj =
5060 performed_compaction ? PostCompactAddress(obj, black_dense_end_, moving_space_end_)
5061 : obj;
5062 if (reinterpret_cast<uint8_t*>(compacted_obj) < mid_gen_end_) {
5063 card_table->MarkCard(compacted_obj);
5064 }
5065 } else if (non_moving_space_->HasAddress(obj)) {
5066 card_table->MarkCard(obj);
5067 }
5068 }
5069 }
5070 dirty_cards_later_vec_.clear();
5071
5072 // Copy mid-gen bitmap into moving-space's mark-bitmap
5073 if (mid_to_old_promo_bit_vec_.get() != nullptr) {
5074 DCHECK_EQ(mid_to_old_promo_bit_vec_->GetBitSizeOf(),
5075 (mid_gen_end_ - old_gen_end_) / kObjectAlignment);
5076 uint32_t* bitmap_begin = reinterpret_cast<uint32_t*>(moving_space_bitmap_->Begin());
5077 DCHECK(IsAligned<kObjectAlignment * BitVector::kWordBits>(gPageSize));
5078 size_t index = (old_gen_end_ - moving_space_begin_) / kObjectAlignment / BitVector::kWordBits;
5079 mid_to_old_promo_bit_vec_->CopyTo(&bitmap_begin[index],
5080 mid_to_old_promo_bit_vec_->GetSizeOf());
5081 mid_to_old_promo_bit_vec_.reset(nullptr);
5082 } else if (!performed_compaction) {
5083 // We typically only retain the mark-bitmap for the old-generation as the
5084 // objects following it are expected to be contiguous. However, when
5085 // compaction is not performed, we may have decided to tolerate few holes
5086 // here and there. So we have to retain the bitmap for the entire
5087 // 'compacted' portion of the heap, which is up to mid-gen-end.
5088 DCHECK_LE(old_gen_end_, post_compact_end_);
5089 mark_bitmap_clear_end = post_compact_end_;
5090 }
5091 // Promote all mid-gen objects to old-gen and young-gen objects to mid-gen
5092 // for next GC cycle.
5093 old_gen_end_ = mid_gen_end_;
5094 mid_gen_end_ = post_compact_end_;
5095 post_compact_end_ = nullptr;
5096
5097 // Verify (in debug builds) after updating mark-bitmap if class-after-object
5098 // map is correct or not.
5099 for (auto iter : class_after_obj_map_) {
5100 DCHECK(moving_space_bitmap_->Test(iter.second.AsMirrorPtr()));
5101 mirror::Object* klass = iter.first.AsMirrorPtr();
5102 DCHECK_IMPLIES(!moving_space_bitmap_->Test(klass),
5103 reinterpret_cast<uint8_t*>(klass) >= old_gen_end_);
5104 }
5105 } else {
5106 class_after_obj_map_.clear();
5107 if (!performed_compaction) {
5108 DCHECK_LE(old_gen_end_, post_compact_end_);
5109 mark_bitmap_clear_end = post_compact_end_;
5110 }
5111 }
5112 // Black-dense region, which requires bitmap for object-walk, could be larger
5113 // than old-gen. Therefore, until next GC retain the bitmap for entire
5114 // black-dense region. At the beginning of next cycle, we clear [old_gen_end_,
5115 // moving_space_end_).
5116 mark_bitmap_clear_end = std::max(black_dense_end_, mark_bitmap_clear_end);
5117 DCHECK_ALIGNED_PARAM(mark_bitmap_clear_end, gPageSize);
5118 if (moving_space_begin_ == mark_bitmap_clear_end) {
5119 moving_space_bitmap_->Clear();
5120 } else {
5121 DCHECK_LT(moving_space_begin_, mark_bitmap_clear_end);
5122 DCHECK_LE(mark_bitmap_clear_end, moving_space_end_);
5123 moving_space_bitmap_->ClearRange(reinterpret_cast<mirror::Object*>(mark_bitmap_clear_end),
5124 reinterpret_cast<mirror::Object*>(moving_space_end_));
5125 }
5126 bump_pointer_space_->SetBlackDenseRegionSize(mark_bitmap_clear_end - moving_space_begin_);
5127
5128 if (UNLIKELY(is_zygote && IsValidFd(uffd_))) {
5129 // This unregisters all ranges as a side-effect.
5130 close(uffd_);
5131 uffd_ = kFdUnused;
5132 uffd_initialized_ = false;
5133 }
5134 CHECK(mark_stack_->IsEmpty()); // Ensure that the mark stack is empty.
5135 mark_stack_->Reset();
5136 ZeroAndReleaseMemory(compaction_buffers_map_.Begin(), compaction_buffers_map_.Size());
5137 info_map_.MadviseDontNeedAndZero();
5138 live_words_bitmap_->ClearBitmap();
5139 DCHECK_EQ(thread_running_gc_, Thread::Current());
5140 if (kIsDebugBuild) {
5141 MutexLock mu(thread_running_gc_, lock_);
5142 if (updated_roots_.get() != nullptr) {
5143 updated_roots_->clear();
5144 }
5145 }
5146 linear_alloc_arenas_.clear();
5147 {
5148 ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_);
5149 WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_);
5150 heap_->ClearMarkedObjects();
5151 if (use_generational_) {
5152 if (performed_compaction) {
5153 // Clear the bits set temporarily for black allocations in non-moving
5154 // space in UpdateNonMovingSpaceBlackAllocations(), which is called when
5155 // we perform compaction, so that objects are considered for GC in next cycle.
5156 accounting::ObjectStack* stack = heap_->GetAllocationStack();
5157 const StackReference<mirror::Object>* limit = stack->End();
5158 for (StackReference<mirror::Object>* it = stack->Begin(); it != limit; ++it) {
5159 mirror::Object* obj = it->AsMirrorPtr();
5160 if (obj != nullptr && non_moving_space_bitmap_->HasAddress(obj)) {
5161 non_moving_space_bitmap_->Clear(obj);
5162 }
5163 }
5164 } else {
5165 // Since we didn't perform compaction, we need to identify old objects
5166 // referring to the mid-gen.
5167 auto obj_visitor = [this, card_table = heap_->GetCardTable()](mirror::Object* obj) {
5168 bool found = false;
5169 VisitReferencesVisitor visitor(
5170 [begin = old_gen_end_, end = mid_gen_end_, &found](mirror::Object* ref) {
5171 found |= ref >= reinterpret_cast<mirror::Object*>(begin) &&
5172 ref < reinterpret_cast<mirror::Object*>(end);
5173 });
5174 uint8_t* card = card_table->CardFromAddr(obj);
5175 if (*card == accounting::CardTable::kCardDirty) {
5176 return;
5177 }
5178 // Native-roots are captured during marking and the corresponding cards are already
5179 // dirtied above.
5180 obj->VisitReferences</*kVisitNativeRoots=*/false>(visitor, visitor);
5181 if (found) {
5182 *card = accounting::CardTable::kCardDirty;
5183 }
5184 };
5185 moving_space_bitmap_->VisitMarkedRange(reinterpret_cast<uintptr_t>(moving_space_begin_),
5186 reinterpret_cast<uintptr_t>(old_gen_end_),
5187 obj_visitor);
5188 non_moving_space_bitmap_->VisitAllMarked(obj_visitor);
5189 }
5190 }
5191 }
5192 GcVisitedArenaPool* arena_pool =
5193 static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
5194 arena_pool->DeleteUnusedArenas();
5195
5196 if (kVerifyNoMissingCardMarks && use_generational_) {
5197 // This must be done in a pause as otherwise verification between mutation
5198 // and card-dirtying by a mutator will spuriosely fail.
5199 ScopedPause pause(this);
5200 WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
5201 VerifyNoMissingCardMarks();
5202 }
5203 if (kVerifyPostGCObjects && use_generational_) {
5204 ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_);
5205 WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_);
5206 VerifyPostGCObjects(performed_compaction, mark_bitmap_clear_end);
5207 }
5208 }
5209
5210 } // namespace collector
5211 } // namespace gc
5212 } // namespace art
5213