1 /*
2 * Copyright 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <fcntl.h>
18 // Glibc v2.19 doesn't include these in fcntl.h so host builds will fail without.
19 #if !defined(FALLOC_FL_PUNCH_HOLE) || !defined(FALLOC_FL_KEEP_SIZE)
20 #include <linux/falloc.h>
21 #endif
22 #include <linux/userfaultfd.h>
23 #include <poll.h>
24 #include <sys/ioctl.h>
25 #include <sys/mman.h>
26 #include <sys/resource.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29
30 #include <fstream>
31 #include <numeric>
32 #include <string>
33 #include <string_view>
34 #include <vector>
35
36 #include "android-base/file.h"
37 #include "android-base/parsebool.h"
38 #include "android-base/parseint.h"
39 #include "android-base/properties.h"
40 #include "android-base/strings.h"
41 #include "base/file_utils.h"
42 #include "base/memfd.h"
43 #include "base/quasi_atomic.h"
44 #include "base/systrace.h"
45 #include "base/utils.h"
46 #include "gc/accounting/mod_union_table-inl.h"
47 #include "gc/collector_type.h"
48 #include "gc/reference_processor.h"
49 #include "gc/space/bump_pointer_space.h"
50 #include "gc/task_processor.h"
51 #include "gc/verification-inl.h"
52 #include "jit/jit_code_cache.h"
53 #include "mark_compact-inl.h"
54 #include "mirror/object-refvisitor-inl.h"
55 #include "read_barrier_config.h"
56 #include "scoped_thread_state_change-inl.h"
57 #include "sigchain.h"
58 #include "thread_list.h"
59
60 #ifdef ART_TARGET_ANDROID
61 #include "com_android_art.h"
62 #endif
63
64 #ifndef __BIONIC__
65 #ifndef MREMAP_DONTUNMAP
66 #define MREMAP_DONTUNMAP 4
67 #endif
68 #ifndef MAP_FIXED_NOREPLACE
69 #define MAP_FIXED_NOREPLACE 0x100000
70 #endif
71 #ifndef __NR_userfaultfd
72 #if defined(__x86_64__)
73 #define __NR_userfaultfd 323
74 #elif defined(__i386__)
75 #define __NR_userfaultfd 374
76 #elif defined(__aarch64__)
77 #define __NR_userfaultfd 282
78 #elif defined(__arm__)
79 #define __NR_userfaultfd 388
80 #else
81 #error "__NR_userfaultfd undefined"
82 #endif
83 #endif // __NR_userfaultfd
84 #endif // __BIONIC__
85
86 namespace {
87
88 using ::android::base::GetBoolProperty;
89 using ::android::base::ParseBool;
90 using ::android::base::ParseBoolResult;
91
92 } // namespace
93
94 namespace art {
95
HaveMremapDontunmap()96 static bool HaveMremapDontunmap() {
97 void* old = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
98 CHECK_NE(old, MAP_FAILED);
99 void* addr = mremap(old, kPageSize, kPageSize, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, nullptr);
100 CHECK_EQ(munmap(old, kPageSize), 0);
101 if (addr != MAP_FAILED) {
102 CHECK_EQ(munmap(addr, kPageSize), 0);
103 return true;
104 } else {
105 return false;
106 }
107 }
108 // We require MREMAP_DONTUNMAP functionality of the mremap syscall, which was
109 // introduced in 5.13 kernel version. But it was backported to GKI kernels.
110 static bool gHaveMremapDontunmap = IsKernelVersionAtLeast(5, 13) || HaveMremapDontunmap();
111 // Bitmap of features supported by userfaultfd. This is obtained via uffd API ioctl.
112 static uint64_t gUffdFeatures = 0;
113 // Both, missing and minor faults on shmem are needed only for minor-fault mode.
114 static constexpr uint64_t kUffdFeaturesForMinorFault =
115 UFFD_FEATURE_MISSING_SHMEM | UFFD_FEATURE_MINOR_SHMEM;
116 static constexpr uint64_t kUffdFeaturesForSigbus = UFFD_FEATURE_SIGBUS;
117 // We consider SIGBUS feature necessary to enable this GC as it's superior than
118 // threading-based implementation for janks. However, since we have the latter
119 // already implemented, for testing purposes, we allow choosing either of the
120 // two at boot time in the constructor below.
121 // Note that having minor-fault feature implies having SIGBUS feature as the
122 // latter was introduced earlier than the former. In other words, having
123 // minor-fault feature implies having SIGBUS. We still want minor-fault to be
124 // available for making jit-code-cache updation concurrent, which uses shmem.
125 static constexpr uint64_t kUffdFeaturesRequired =
126 kUffdFeaturesForMinorFault | kUffdFeaturesForSigbus;
127
KernelSupportsUffd()128 bool KernelSupportsUffd() {
129 #ifdef __linux__
130 if (gHaveMremapDontunmap) {
131 int fd = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
132 // On non-android devices we may not have the kernel patches that restrict
133 // userfaultfd to user mode. But that is not a security concern as we are
134 // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
135 if (!kIsTargetAndroid && fd == -1 && errno == EINVAL) {
136 fd = syscall(__NR_userfaultfd, O_CLOEXEC);
137 }
138 if (fd >= 0) {
139 // We are only fetching the available features, which is returned by the
140 // ioctl.
141 struct uffdio_api api = {.api = UFFD_API, .features = 0, .ioctls = 0};
142 CHECK_EQ(ioctl(fd, UFFDIO_API, &api), 0) << "ioctl_userfaultfd : API:" << strerror(errno);
143 gUffdFeatures = api.features;
144 close(fd);
145 // Allow this GC to be used only if minor-fault and sigbus feature is available.
146 return (api.features & kUffdFeaturesRequired) == kUffdFeaturesRequired;
147 }
148 }
149 #endif
150 return false;
151 }
152
153 // The other cases are defined as constexpr in runtime/read_barrier_config.h
154 #if !defined(ART_FORCE_USE_READ_BARRIER) && defined(ART_USE_READ_BARRIER)
155 // Returns collector type asked to be used on the cmdline.
FetchCmdlineGcType()156 static gc::CollectorType FetchCmdlineGcType() {
157 std::string argv;
158 gc::CollectorType gc_type = gc::CollectorType::kCollectorTypeNone;
159 if (android::base::ReadFileToString("/proc/self/cmdline", &argv)) {
160 if (argv.find("-Xgc:CMC") != std::string::npos) {
161 gc_type = gc::CollectorType::kCollectorTypeCMC;
162 } else if (argv.find("-Xgc:CC") != std::string::npos) {
163 gc_type = gc::CollectorType::kCollectorTypeCC;
164 }
165 }
166 return gc_type;
167 }
168
169 #ifdef ART_TARGET_ANDROID
GetOverrideCacheInfoFd()170 static int GetOverrideCacheInfoFd() {
171 std::string args_str;
172 if (!android::base::ReadFileToString("/proc/self/cmdline", &args_str)) {
173 LOG(WARNING) << "Failed to load /proc/self/cmdline";
174 return -1;
175 }
176 std::vector<std::string_view> args;
177 Split(std::string_view(args_str), /*separator=*/'\0', &args);
178 for (std::string_view arg : args) {
179 if (android::base::ConsumePrefix(&arg, "--cache-info-fd=")) { // This is a dex2oat flag.
180 int fd;
181 if (!android::base::ParseInt(std::string(arg), &fd)) {
182 LOG(ERROR) << "Failed to parse --cache-info-fd (value: '" << arg << "')";
183 return -1;
184 }
185 return fd;
186 }
187 }
188 return -1;
189 }
190
GetCachedBoolProperty(const std::string & key,bool default_value)191 static bool GetCachedBoolProperty(const std::string& key, bool default_value) {
192 // For simplicity, we don't handle multiple calls because otherwise we would have to reset the fd.
193 static bool called = false;
194 CHECK(!called) << "GetCachedBoolProperty can be called only once";
195 called = true;
196
197 std::string cache_info_contents;
198 int fd = GetOverrideCacheInfoFd();
199 if (fd >= 0) {
200 if (!android::base::ReadFdToString(fd, &cache_info_contents)) {
201 PLOG(ERROR) << "Failed to read cache-info from fd " << fd;
202 return default_value;
203 }
204 } else {
205 std::string path = GetApexDataDalvikCacheDirectory(InstructionSet::kNone) + "/cache-info.xml";
206 if (!android::base::ReadFileToString(path, &cache_info_contents)) {
207 // If the file is not found, then we are in chroot or in a standalone runtime process (e.g.,
208 // IncidentHelper), or odsign/odrefresh failed to generate and sign the cache info. There's
209 // nothing we can do.
210 if (errno != ENOENT) {
211 PLOG(ERROR) << "Failed to read cache-info from the default path";
212 }
213 return default_value;
214 }
215 }
216
217 std::optional<com::android::art::CacheInfo> cache_info =
218 com::android::art::parse(cache_info_contents.c_str());
219 if (!cache_info.has_value()) {
220 // This should never happen.
221 LOG(ERROR) << "Failed to parse cache-info";
222 return default_value;
223 }
224 const com::android::art::KeyValuePairList* list = cache_info->getFirstSystemProperties();
225 if (list == nullptr) {
226 // This should never happen.
227 LOG(ERROR) << "Missing system properties from cache-info";
228 return default_value;
229 }
230 const std::vector<com::android::art::KeyValuePair>& properties = list->getItem();
231 for (const com::android::art::KeyValuePair& pair : properties) {
232 if (pair.getK() == key) {
233 ParseBoolResult result = ParseBool(pair.getV());
234 switch (result) {
235 case ParseBoolResult::kTrue:
236 return true;
237 case ParseBoolResult::kFalse:
238 return false;
239 case ParseBoolResult::kError:
240 return default_value;
241 }
242 }
243 }
244 return default_value;
245 }
246
SysPropSaysUffdGc()247 static bool SysPropSaysUffdGc() {
248 // The phenotype flag can change at time time after boot, but it shouldn't take effect until a
249 // reboot. Therefore, we read the phenotype flag from the cache info, which is generated on boot.
250 return GetCachedBoolProperty("persist.device_config.runtime_native_boot.enable_uffd_gc",
251 GetBoolProperty("ro.dalvik.vm.enable_uffd_gc", false));
252 }
253 #else
254 // Never called.
SysPropSaysUffdGc()255 static bool SysPropSaysUffdGc() { return false; }
256 #endif
257
ShouldUseUserfaultfd()258 static bool ShouldUseUserfaultfd() {
259 static_assert(kUseBakerReadBarrier || kUseTableLookupReadBarrier);
260 #ifdef __linux__
261 // Use CMC/CC if that is being explicitly asked for on cmdline. Otherwise,
262 // always use CC on host. On target, use CMC only if system properties says so
263 // and the kernel supports it.
264 gc::CollectorType gc_type = FetchCmdlineGcType();
265 return gc_type == gc::CollectorType::kCollectorTypeCMC ||
266 (gc_type == gc::CollectorType::kCollectorTypeNone &&
267 kIsTargetAndroid &&
268 SysPropSaysUffdGc() &&
269 KernelSupportsUffd());
270 #else
271 return false;
272 #endif
273 }
274
275 const bool gUseUserfaultfd = ShouldUseUserfaultfd();
276 const bool gUseReadBarrier = !gUseUserfaultfd;
277 #endif
278
279 namespace gc {
280 namespace collector {
281
282 // Turn off kCheckLocks when profiling the GC as it slows down the GC
283 // significantly.
284 static constexpr bool kCheckLocks = kDebugLocking;
285 static constexpr bool kVerifyRootsMarked = kIsDebugBuild;
286 // Two threads should suffice on devices.
287 static constexpr size_t kMaxNumUffdWorkers = 2;
288 // Number of compaction buffers reserved for mutator threads in SIGBUS feature
289 // case. It's extremely unlikely that we will ever have more than these number
290 // of mutator threads trying to access the moving-space during one compaction
291 // phase. Using a lower number in debug builds to hopefully catch the issue
292 // before it becomes a problem on user builds.
293 static constexpr size_t kMutatorCompactionBufferCount = kIsDebugBuild ? 256 : 512;
294 // Minimum from-space chunk to be madvised (during concurrent compaction) in one go.
295 static constexpr ssize_t kMinFromSpaceMadviseSize = 1 * MB;
296 // Concurrent compaction termination logic is different (and slightly more efficient) if the
297 // kernel has the fault-retry feature (allowing repeated faults on the same page), which was
298 // introduced in 5.7 (https://android-review.git.corp.google.com/c/kernel/common/+/1540088).
299 // This allows a single page fault to be handled, in turn, by each worker thread, only waking
300 // up the GC thread at the end.
301 static const bool gKernelHasFaultRetry = IsKernelVersionAtLeast(5, 7);
302
GetUffdAndMinorFault()303 std::pair<bool, bool> MarkCompact::GetUffdAndMinorFault() {
304 bool uffd_available;
305 // In most cases the gUffdFeatures will already be initialized at boot time
306 // when libart is loaded. On very old kernels we may get '0' from the kernel,
307 // in which case we would be doing the syscalls each time this function is
308 // called. But that's very unlikely case. There are no correctness issues as
309 // the response from kernel never changes after boot.
310 if (UNLIKELY(gUffdFeatures == 0)) {
311 uffd_available = KernelSupportsUffd();
312 } else {
313 // We can have any uffd features only if uffd exists.
314 uffd_available = true;
315 }
316 bool minor_fault_available =
317 (gUffdFeatures & kUffdFeaturesForMinorFault) == kUffdFeaturesForMinorFault;
318 return std::pair<bool, bool>(uffd_available, minor_fault_available);
319 }
320
CreateUserfaultfd(bool post_fork)321 bool MarkCompact::CreateUserfaultfd(bool post_fork) {
322 if (post_fork || uffd_ == kFdUnused) {
323 // Check if we have MREMAP_DONTUNMAP here for cases where
324 // 'ART_USE_READ_BARRIER=false' is used. Additionally, this check ensures
325 // that userfaultfd isn't used on old kernels, which cause random ioctl
326 // failures.
327 if (gHaveMremapDontunmap) {
328 // Don't use O_NONBLOCK as we rely on read waiting on uffd_ if there isn't
329 // any read event available. We don't use poll.
330 uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
331 // On non-android devices we may not have the kernel patches that restrict
332 // userfaultfd to user mode. But that is not a security concern as we are
333 // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
334 if (!kIsTargetAndroid && UNLIKELY(uffd_ == -1 && errno == EINVAL)) {
335 uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC);
336 }
337 if (UNLIKELY(uffd_ == -1)) {
338 uffd_ = kFallbackMode;
339 LOG(WARNING) << "Userfaultfd isn't supported (reason: " << strerror(errno)
340 << ") and therefore falling back to stop-the-world compaction.";
341 } else {
342 DCHECK(IsValidFd(uffd_));
343 // Initialize uffd with the features which are required and available.
344 struct uffdio_api api = {.api = UFFD_API, .features = gUffdFeatures, .ioctls = 0};
345 api.features &= use_uffd_sigbus_ ? kUffdFeaturesRequired : kUffdFeaturesForMinorFault;
346 CHECK_EQ(ioctl(uffd_, UFFDIO_API, &api), 0)
347 << "ioctl_userfaultfd: API: " << strerror(errno);
348 }
349 } else {
350 uffd_ = kFallbackMode;
351 }
352 }
353 uffd_initialized_ = !post_fork || uffd_ == kFallbackMode;
354 return IsValidFd(uffd_);
355 }
356
357 template <size_t kAlignment>
Create(uintptr_t begin,uintptr_t end)358 MarkCompact::LiveWordsBitmap<kAlignment>* MarkCompact::LiveWordsBitmap<kAlignment>::Create(
359 uintptr_t begin, uintptr_t end) {
360 return static_cast<LiveWordsBitmap<kAlignment>*>(
361 MemRangeBitmap::Create("Concurrent Mark Compact live words bitmap", begin, end));
362 }
363
IsSigbusFeatureAvailable()364 static bool IsSigbusFeatureAvailable() {
365 MarkCompact::GetUffdAndMinorFault();
366 return gUffdFeatures & UFFD_FEATURE_SIGBUS;
367 }
368
MarkCompact(Heap * heap)369 MarkCompact::MarkCompact(Heap* heap)
370 : GarbageCollector(heap, "concurrent mark compact"),
371 gc_barrier_(0),
372 lock_("mark compact lock", kGenericBottomLock),
373 bump_pointer_space_(heap->GetBumpPointerSpace()),
374 moving_space_bitmap_(bump_pointer_space_->GetMarkBitmap()),
375 moving_to_space_fd_(kFdUnused),
376 moving_from_space_fd_(kFdUnused),
377 uffd_(kFdUnused),
378 sigbus_in_progress_count_(kSigbusCounterCompactionDoneMask),
379 compaction_in_progress_count_(0),
380 thread_pool_counter_(0),
381 compacting_(false),
382 uffd_initialized_(false),
383 uffd_minor_fault_supported_(false),
384 use_uffd_sigbus_(IsSigbusFeatureAvailable()),
385 minor_fault_initialized_(false),
386 map_linear_alloc_shared_(false) {
387 if (kIsDebugBuild) {
388 updated_roots_.reset(new std::unordered_set<void*>());
389 }
390 // TODO: When using minor-fault feature, the first GC after zygote-fork
391 // requires mapping the linear-alloc again with MAP_SHARED. This leaves a
392 // gap for suspended threads to access linear-alloc when it's empty (after
393 // mremap) and not yet userfaultfd registered. This cannot be fixed by merely
394 // doing uffd registration first. For now, just assert that we are not using
395 // minor-fault. Eventually, a cleanup of linear-alloc update logic to only
396 // use private anonymous would be ideal.
397 CHECK(!uffd_minor_fault_supported_);
398
399 // TODO: Depending on how the bump-pointer space move is implemented. If we
400 // switch between two virtual memories each time, then we will have to
401 // initialize live_words_bitmap_ accordingly.
402 live_words_bitmap_.reset(LiveWordsBitmap<kAlignment>::Create(
403 reinterpret_cast<uintptr_t>(bump_pointer_space_->Begin()),
404 reinterpret_cast<uintptr_t>(bump_pointer_space_->Limit())));
405
406 // Create one MemMap for all the data structures
407 size_t moving_space_size = bump_pointer_space_->Capacity();
408 size_t chunk_info_vec_size = moving_space_size / kOffsetChunkSize;
409 size_t nr_moving_pages = moving_space_size / kPageSize;
410 size_t nr_non_moving_pages = heap->GetNonMovingSpace()->Capacity() / kPageSize;
411
412 std::string err_msg;
413 info_map_ = MemMap::MapAnonymous("Concurrent mark-compact chunk-info vector",
414 chunk_info_vec_size * sizeof(uint32_t)
415 + nr_non_moving_pages * sizeof(ObjReference)
416 + nr_moving_pages * sizeof(ObjReference)
417 + nr_moving_pages * sizeof(uint32_t),
418 PROT_READ | PROT_WRITE,
419 /*low_4gb=*/ false,
420 &err_msg);
421 if (UNLIKELY(!info_map_.IsValid())) {
422 LOG(FATAL) << "Failed to allocate concurrent mark-compact chunk-info vector: " << err_msg;
423 } else {
424 uint8_t* p = info_map_.Begin();
425 chunk_info_vec_ = reinterpret_cast<uint32_t*>(p);
426 vector_length_ = chunk_info_vec_size;
427
428 p += chunk_info_vec_size * sizeof(uint32_t);
429 first_objs_non_moving_space_ = reinterpret_cast<ObjReference*>(p);
430
431 p += nr_non_moving_pages * sizeof(ObjReference);
432 first_objs_moving_space_ = reinterpret_cast<ObjReference*>(p);
433
434 p += nr_moving_pages * sizeof(ObjReference);
435 pre_compact_offset_moving_space_ = reinterpret_cast<uint32_t*>(p);
436 }
437
438 size_t moving_space_alignment = BestPageTableAlignment(moving_space_size);
439 // The moving space is created at a fixed address, which is expected to be
440 // PMD-size aligned.
441 if (!IsAlignedParam(bump_pointer_space_->Begin(), moving_space_alignment)) {
442 LOG(WARNING) << "Bump pointer space is not aligned to " << PrettySize(moving_space_alignment)
443 << ". This can lead to longer stop-the-world pauses for compaction";
444 }
445 // NOTE: PROT_NONE is used here as these mappings are for address space reservation
446 // only and will be used only after appropriately remapping them.
447 from_space_map_ = MemMap::MapAnonymousAligned("Concurrent mark-compact from-space",
448 moving_space_size,
449 PROT_NONE,
450 /*low_4gb=*/kObjPtrPoisoning,
451 moving_space_alignment,
452 &err_msg);
453 if (UNLIKELY(!from_space_map_.IsValid())) {
454 LOG(FATAL) << "Failed to allocate concurrent mark-compact from-space" << err_msg;
455 } else {
456 from_space_begin_ = from_space_map_.Begin();
457 }
458
459 // In some cases (32-bit or kObjPtrPoisoning) it's too much to ask for 3
460 // heap-sized mappings in low-4GB. So tolerate failure here by attempting to
461 // mmap again right before the compaction pause. And if even that fails, then
462 // running the GC cycle in copy-mode rather than minor-fault.
463 //
464 // This map doesn't have to be aligned to 2MB as we don't mremap on it.
465 if (!kObjPtrPoisoning && uffd_minor_fault_supported_) {
466 // We need this map only if minor-fault feature is supported. But in that case
467 // don't create the mapping if obj-ptr poisoning is enabled as then the mapping
468 // has to be created in low_4gb. Doing this here rather than later causes the
469 // Dex2oatImageTest.TestExtension gtest to fail in 64-bit platforms.
470 shadow_to_space_map_ = MemMap::MapAnonymous("Concurrent mark-compact moving-space shadow",
471 moving_space_size,
472 PROT_NONE,
473 /*low_4gb=*/false,
474 &err_msg);
475 if (!shadow_to_space_map_.IsValid()) {
476 LOG(WARNING) << "Failed to allocate concurrent mark-compact moving-space shadow: " << err_msg;
477 }
478 }
479 const size_t num_pages =
480 1 + (use_uffd_sigbus_ ? kMutatorCompactionBufferCount :
481 std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers));
482 compaction_buffers_map_ = MemMap::MapAnonymous("Concurrent mark-compact compaction buffers",
483 kPageSize * num_pages,
484 PROT_READ | PROT_WRITE,
485 /*low_4gb=*/kObjPtrPoisoning,
486 &err_msg);
487 if (UNLIKELY(!compaction_buffers_map_.IsValid())) {
488 LOG(FATAL) << "Failed to allocate concurrent mark-compact compaction buffers" << err_msg;
489 }
490 // We also use the first page-sized buffer for the purpose of terminating concurrent compaction.
491 conc_compaction_termination_page_ = compaction_buffers_map_.Begin();
492 // Touch the page deliberately to avoid userfaults on it. We madvise it in
493 // CompactionPhase() before using it to terminate concurrent compaction.
494 ForceRead(conc_compaction_termination_page_);
495
496 // In most of the cases, we don't expect more than one LinearAlloc space.
497 linear_alloc_spaces_data_.reserve(1);
498
499 // Initialize GC metrics.
500 metrics::ArtMetrics* metrics = GetMetrics();
501 // The mark-compact collector supports only full-heap collections at the moment.
502 gc_time_histogram_ = metrics->FullGcCollectionTime();
503 metrics_gc_count_ = metrics->FullGcCount();
504 metrics_gc_count_delta_ = metrics->FullGcCountDelta();
505 gc_throughput_histogram_ = metrics->FullGcThroughput();
506 gc_tracing_throughput_hist_ = metrics->FullGcTracingThroughput();
507 gc_throughput_avg_ = metrics->FullGcThroughputAvg();
508 gc_tracing_throughput_avg_ = metrics->FullGcTracingThroughputAvg();
509 gc_scanned_bytes_ = metrics->FullGcScannedBytes();
510 gc_scanned_bytes_delta_ = metrics->FullGcScannedBytesDelta();
511 gc_freed_bytes_ = metrics->FullGcFreedBytes();
512 gc_freed_bytes_delta_ = metrics->FullGcFreedBytesDelta();
513 gc_duration_ = metrics->FullGcDuration();
514 gc_duration_delta_ = metrics->FullGcDurationDelta();
515 are_metrics_initialized_ = true;
516 }
517
AddLinearAllocSpaceData(uint8_t * begin,size_t len)518 void MarkCompact::AddLinearAllocSpaceData(uint8_t* begin, size_t len) {
519 DCHECK_ALIGNED(begin, kPageSize);
520 DCHECK_ALIGNED(len, kPageSize);
521 DCHECK_GE(len, kPMDSize);
522 size_t alignment = BestPageTableAlignment(len);
523 bool is_shared = false;
524 // We use MAP_SHARED on non-zygote processes for leveraging userfaultfd's minor-fault feature.
525 if (map_linear_alloc_shared_) {
526 void* ret = mmap(begin,
527 len,
528 PROT_READ | PROT_WRITE,
529 MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
530 /*fd=*/-1,
531 /*offset=*/0);
532 CHECK_EQ(ret, begin) << "mmap failed: " << strerror(errno);
533 is_shared = true;
534 }
535 std::string err_msg;
536 MemMap shadow(MemMap::MapAnonymousAligned("linear-alloc shadow map",
537 len,
538 PROT_NONE,
539 /*low_4gb=*/false,
540 alignment,
541 &err_msg));
542 if (!shadow.IsValid()) {
543 LOG(FATAL) << "Failed to allocate linear-alloc shadow map: " << err_msg;
544 UNREACHABLE();
545 }
546
547 MemMap page_status_map(MemMap::MapAnonymous("linear-alloc page-status map",
548 len / kPageSize,
549 PROT_READ | PROT_WRITE,
550 /*low_4gb=*/false,
551 &err_msg));
552 if (!page_status_map.IsValid()) {
553 LOG(FATAL) << "Failed to allocate linear-alloc page-status shadow map: " << err_msg;
554 UNREACHABLE();
555 }
556 linear_alloc_spaces_data_.emplace_back(std::forward<MemMap>(shadow),
557 std::forward<MemMap>(page_status_map),
558 begin,
559 begin + len,
560 is_shared);
561 }
562
BindAndResetBitmaps()563 void MarkCompact::BindAndResetBitmaps() {
564 // TODO: We need to hold heap_bitmap_lock_ only for populating immune_spaces.
565 // The card-table and mod-union-table processing can be done without it. So
566 // change the logic below. Note that the bitmap clearing would require the
567 // lock.
568 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
569 accounting::CardTable* const card_table = heap_->GetCardTable();
570 // Mark all of the spaces we never collect as immune.
571 for (const auto& space : GetHeap()->GetContinuousSpaces()) {
572 if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
573 space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
574 CHECK(space->IsZygoteSpace() || space->IsImageSpace());
575 immune_spaces_.AddSpace(space);
576 accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
577 if (table != nullptr) {
578 table->ProcessCards();
579 } else {
580 // Keep cards aged if we don't have a mod-union table since we may need
581 // to scan them in future GCs. This case is for app images.
582 // TODO: We could probably scan the objects right here to avoid doing
583 // another scan through the card-table.
584 card_table->ModifyCardsAtomic(
585 space->Begin(),
586 space->End(),
587 [](uint8_t card) {
588 return (card == gc::accounting::CardTable::kCardClean)
589 ? card
590 : gc::accounting::CardTable::kCardAged;
591 },
592 /* card modified visitor */ VoidFunctor());
593 }
594 } else {
595 CHECK(!space->IsZygoteSpace());
596 CHECK(!space->IsImageSpace());
597 // The card-table corresponding to bump-pointer and non-moving space can
598 // be cleared, because we are going to traverse all the reachable objects
599 // in these spaces. This card-table will eventually be used to track
600 // mutations while concurrent marking is going on.
601 card_table->ClearCardRange(space->Begin(), space->Limit());
602 if (space != bump_pointer_space_) {
603 CHECK_EQ(space, heap_->GetNonMovingSpace());
604 non_moving_space_ = space;
605 non_moving_space_bitmap_ = space->GetMarkBitmap();
606 }
607 }
608 }
609 }
610
MarkZygoteLargeObjects()611 void MarkCompact::MarkZygoteLargeObjects() {
612 Thread* self = thread_running_gc_;
613 DCHECK_EQ(self, Thread::Current());
614 space::LargeObjectSpace* const los = heap_->GetLargeObjectsSpace();
615 if (los != nullptr) {
616 // Pick the current live bitmap (mark bitmap if swapped).
617 accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap();
618 accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap();
619 // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept.
620 std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic();
621 live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first),
622 reinterpret_cast<uintptr_t>(range.second),
623 [mark_bitmap, los, self](mirror::Object* obj)
624 REQUIRES(Locks::heap_bitmap_lock_)
625 REQUIRES_SHARED(Locks::mutator_lock_) {
626 if (los->IsZygoteLargeObject(self, obj)) {
627 mark_bitmap->Set(obj);
628 }
629 });
630 }
631 }
632
InitializePhase()633 void MarkCompact::InitializePhase() {
634 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
635 mark_stack_ = heap_->GetMarkStack();
636 CHECK(mark_stack_->IsEmpty());
637 immune_spaces_.Reset();
638 moving_first_objs_count_ = 0;
639 non_moving_first_objs_count_ = 0;
640 black_page_count_ = 0;
641 bytes_scanned_ = 0;
642 freed_objects_ = 0;
643 // The first buffer is used by gc-thread.
644 compaction_buffer_counter_.store(1, std::memory_order_relaxed);
645 from_space_slide_diff_ = from_space_begin_ - bump_pointer_space_->Begin();
646 black_allocations_begin_ = bump_pointer_space_->Limit();
647 walk_super_class_cache_ = nullptr;
648 // TODO: Would it suffice to read it once in the constructor, which is called
649 // in zygote process?
650 pointer_size_ = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
651 }
652
653 class MarkCompact::ThreadFlipVisitor : public Closure {
654 public:
ThreadFlipVisitor(MarkCompact * collector)655 explicit ThreadFlipVisitor(MarkCompact* collector) : collector_(collector) {}
656
Run(Thread * thread)657 void Run(Thread* thread) override REQUIRES_SHARED(Locks::mutator_lock_) {
658 // Note: self is not necessarily equal to thread since thread may be suspended.
659 Thread* self = Thread::Current();
660 CHECK(thread == self || thread->GetState() != ThreadState::kRunnable)
661 << thread->GetState() << " thread " << thread << " self " << self;
662 thread->VisitRoots(collector_, kVisitRootFlagAllRoots);
663 // Interpreter cache is thread-local so it needs to be swept either in a
664 // flip, or a stop-the-world pause.
665 CHECK(collector_->compacting_);
666 thread->SweepInterpreterCache(collector_);
667 thread->AdjustTlab(collector_->black_objs_slide_diff_);
668 collector_->GetBarrier().Pass(self);
669 }
670
671 private:
672 MarkCompact* const collector_;
673 };
674
675 class MarkCompact::FlipCallback : public Closure {
676 public:
FlipCallback(MarkCompact * collector)677 explicit FlipCallback(MarkCompact* collector) : collector_(collector) {}
678
Run(Thread * thread ATTRIBUTE_UNUSED)679 void Run(Thread* thread ATTRIBUTE_UNUSED) override REQUIRES(Locks::mutator_lock_) {
680 collector_->CompactionPause();
681 }
682
683 private:
684 MarkCompact* const collector_;
685 };
686
RunPhases()687 void MarkCompact::RunPhases() {
688 Thread* self = Thread::Current();
689 thread_running_gc_ = self;
690 Runtime* runtime = Runtime::Current();
691 InitializePhase();
692 GetHeap()->PreGcVerification(this);
693 {
694 ReaderMutexLock mu(self, *Locks::mutator_lock_);
695 MarkingPhase();
696 }
697 {
698 // Marking pause
699 ScopedPause pause(this);
700 MarkingPause();
701 if (kIsDebugBuild) {
702 bump_pointer_space_->AssertAllThreadLocalBuffersAreRevoked();
703 }
704 }
705 // To increase likelihood of black allocations. For testing purposes only.
706 if (kIsDebugBuild && heap_->GetTaskProcessor()->GetRunningThread() == thread_running_gc_) {
707 usleep(500'000);
708 }
709 {
710 ReaderMutexLock mu(self, *Locks::mutator_lock_);
711 ReclaimPhase();
712 PrepareForCompaction();
713 }
714 if (uffd_ != kFallbackMode && !use_uffd_sigbus_) {
715 heap_->GetThreadPool()->WaitForWorkersToBeCreated();
716 }
717
718 {
719 // Compaction pause
720 gc_barrier_.Init(self, 0);
721 ThreadFlipVisitor visitor(this);
722 FlipCallback callback(this);
723 size_t barrier_count = runtime->GetThreadList()->FlipThreadRoots(
724 &visitor, &callback, this, GetHeap()->GetGcPauseListener());
725 {
726 ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun);
727 gc_barrier_.Increment(self, barrier_count);
728 }
729 }
730
731 if (IsValidFd(uffd_)) {
732 ReaderMutexLock mu(self, *Locks::mutator_lock_);
733 CompactionPhase();
734 }
735
736 FinishPhase();
737 thread_running_gc_ = nullptr;
738 GetHeap()->PostGcVerification(this);
739 }
740
InitMovingSpaceFirstObjects(const size_t vec_len)741 void MarkCompact::InitMovingSpaceFirstObjects(const size_t vec_len) {
742 // Find the first live word first.
743 size_t to_space_page_idx = 0;
744 uint32_t offset_in_chunk_word;
745 uint32_t offset;
746 mirror::Object* obj;
747 const uintptr_t heap_begin = moving_space_bitmap_->HeapBegin();
748
749 size_t chunk_idx;
750 // Find the first live word in the space
751 for (chunk_idx = 0; chunk_info_vec_[chunk_idx] == 0; chunk_idx++) {
752 if (chunk_idx > vec_len) {
753 // We don't have any live data on the moving-space.
754 return;
755 }
756 }
757 // Use live-words bitmap to find the first word
758 offset_in_chunk_word = live_words_bitmap_->FindNthLiveWordOffset(chunk_idx, /*n*/ 0);
759 offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
760 DCHECK(live_words_bitmap_->Test(offset)) << "offset=" << offset
761 << " chunk_idx=" << chunk_idx
762 << " N=0"
763 << " offset_in_word=" << offset_in_chunk_word
764 << " word=" << std::hex
765 << live_words_bitmap_->GetWord(chunk_idx);
766 // The first object doesn't require using FindPrecedingObject().
767 obj = reinterpret_cast<mirror::Object*>(heap_begin + offset * kAlignment);
768 // TODO: add a check to validate the object.
769
770 pre_compact_offset_moving_space_[to_space_page_idx] = offset;
771 first_objs_moving_space_[to_space_page_idx].Assign(obj);
772 to_space_page_idx++;
773
774 uint32_t page_live_bytes = 0;
775 while (true) {
776 for (; page_live_bytes <= kPageSize; chunk_idx++) {
777 if (chunk_idx > vec_len) {
778 moving_first_objs_count_ = to_space_page_idx;
779 return;
780 }
781 page_live_bytes += chunk_info_vec_[chunk_idx];
782 }
783 chunk_idx--;
784 page_live_bytes -= kPageSize;
785 DCHECK_LE(page_live_bytes, kOffsetChunkSize);
786 DCHECK_LE(page_live_bytes, chunk_info_vec_[chunk_idx])
787 << " chunk_idx=" << chunk_idx
788 << " to_space_page_idx=" << to_space_page_idx
789 << " vec_len=" << vec_len;
790 DCHECK(IsAligned<kAlignment>(chunk_info_vec_[chunk_idx] - page_live_bytes));
791 offset_in_chunk_word =
792 live_words_bitmap_->FindNthLiveWordOffset(
793 chunk_idx, (chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment);
794 offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
795 DCHECK(live_words_bitmap_->Test(offset))
796 << "offset=" << offset
797 << " chunk_idx=" << chunk_idx
798 << " N=" << ((chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment)
799 << " offset_in_word=" << offset_in_chunk_word
800 << " word=" << std::hex << live_words_bitmap_->GetWord(chunk_idx);
801 // TODO: Can we optimize this for large objects? If we are continuing a
802 // large object that spans multiple pages, then we may be able to do without
803 // calling FindPrecedingObject().
804 //
805 // Find the object which encapsulates offset in it, which could be
806 // starting at offset itself.
807 obj = moving_space_bitmap_->FindPrecedingObject(heap_begin + offset * kAlignment);
808 // TODO: add a check to validate the object.
809 pre_compact_offset_moving_space_[to_space_page_idx] = offset;
810 first_objs_moving_space_[to_space_page_idx].Assign(obj);
811 to_space_page_idx++;
812 chunk_idx++;
813 }
814 }
815
InitNonMovingSpaceFirstObjects()816 void MarkCompact::InitNonMovingSpaceFirstObjects() {
817 accounting::ContinuousSpaceBitmap* bitmap = non_moving_space_->GetLiveBitmap();
818 uintptr_t begin = reinterpret_cast<uintptr_t>(non_moving_space_->Begin());
819 const uintptr_t end = reinterpret_cast<uintptr_t>(non_moving_space_->End());
820 mirror::Object* prev_obj;
821 size_t page_idx;
822 {
823 // Find first live object
824 mirror::Object* obj = nullptr;
825 bitmap->VisitMarkedRange</*kVisitOnce*/ true>(begin,
826 end,
827 [&obj] (mirror::Object* o) {
828 obj = o;
829 });
830 if (obj == nullptr) {
831 // There are no live objects in the non-moving space
832 return;
833 }
834 page_idx = (reinterpret_cast<uintptr_t>(obj) - begin) / kPageSize;
835 first_objs_non_moving_space_[page_idx++].Assign(obj);
836 prev_obj = obj;
837 }
838 // TODO: check obj is valid
839 uintptr_t prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
840 + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
841 // For every page find the object starting from which we need to call
842 // VisitReferences. It could either be an object that started on some
843 // preceding page, or some object starting within this page.
844 begin = RoundDown(reinterpret_cast<uintptr_t>(prev_obj) + kPageSize, kPageSize);
845 while (begin < end) {
846 // Utilize, if any, large object that started in some preceding page, but
847 // overlaps with this page as well.
848 if (prev_obj != nullptr && prev_obj_end > begin) {
849 DCHECK_LT(prev_obj, reinterpret_cast<mirror::Object*>(begin));
850 first_objs_non_moving_space_[page_idx].Assign(prev_obj);
851 mirror::Class* klass = prev_obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
852 if (bump_pointer_space_->HasAddress(klass)) {
853 LOG(WARNING) << "found inter-page object " << prev_obj
854 << " in non-moving space with klass " << klass
855 << " in moving space";
856 }
857 } else {
858 prev_obj_end = 0;
859 // It's sufficient to only search for previous object in the preceding page.
860 // If no live object started in that page and some object had started in
861 // the page preceding to that page, which was big enough to overlap with
862 // the current page, then we wouldn't be in the else part.
863 prev_obj = bitmap->FindPrecedingObject(begin, begin - kPageSize);
864 if (prev_obj != nullptr) {
865 prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
866 + RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
867 }
868 if (prev_obj_end > begin) {
869 mirror::Class* klass = prev_obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
870 if (bump_pointer_space_->HasAddress(klass)) {
871 LOG(WARNING) << "found inter-page object " << prev_obj
872 << " in non-moving space with klass " << klass
873 << " in moving space";
874 }
875 first_objs_non_moving_space_[page_idx].Assign(prev_obj);
876 } else {
877 // Find the first live object in this page
878 bitmap->VisitMarkedRange</*kVisitOnce*/ true>(
879 begin,
880 begin + kPageSize,
881 [this, page_idx] (mirror::Object* obj) {
882 first_objs_non_moving_space_[page_idx].Assign(obj);
883 });
884 }
885 // An empty entry indicates that the page has no live objects and hence
886 // can be skipped.
887 }
888 begin += kPageSize;
889 page_idx++;
890 }
891 non_moving_first_objs_count_ = page_idx;
892 }
893
CanCompactMovingSpaceWithMinorFault()894 bool MarkCompact::CanCompactMovingSpaceWithMinorFault() {
895 size_t min_size = (moving_first_objs_count_ + black_page_count_) * kPageSize;
896 return minor_fault_initialized_ && shadow_to_space_map_.IsValid() &&
897 shadow_to_space_map_.Size() >= min_size;
898 }
899
900 class MarkCompact::ConcurrentCompactionGcTask : public SelfDeletingTask {
901 public:
ConcurrentCompactionGcTask(MarkCompact * collector,size_t idx)902 explicit ConcurrentCompactionGcTask(MarkCompact* collector, size_t idx)
903 : collector_(collector), index_(idx) {}
904
Run(Thread * self ATTRIBUTE_UNUSED)905 void Run(Thread* self ATTRIBUTE_UNUSED) override REQUIRES_SHARED(Locks::mutator_lock_) {
906 if (collector_->CanCompactMovingSpaceWithMinorFault()) {
907 collector_->ConcurrentCompaction<MarkCompact::kMinorFaultMode>(/*buf=*/nullptr);
908 } else {
909 // The passed page/buf to ConcurrentCompaction is used by the thread as a
910 // kPageSize buffer for compacting and updating objects into and then
911 // passing the buf to uffd ioctls.
912 uint8_t* buf = collector_->compaction_buffers_map_.Begin() + index_ * kPageSize;
913 collector_->ConcurrentCompaction<MarkCompact::kCopyMode>(buf);
914 }
915 }
916
917 private:
918 MarkCompact* const collector_;
919 size_t index_;
920 };
921
PrepareForCompaction()922 void MarkCompact::PrepareForCompaction() {
923 uint8_t* space_begin = bump_pointer_space_->Begin();
924 size_t vector_len = (black_allocations_begin_ - space_begin) / kOffsetChunkSize;
925 DCHECK_LE(vector_len, vector_length_);
926 for (size_t i = 0; i < vector_len; i++) {
927 DCHECK_LE(chunk_info_vec_[i], kOffsetChunkSize);
928 DCHECK_EQ(chunk_info_vec_[i], live_words_bitmap_->LiveBytesInBitmapWord(i));
929 }
930 InitMovingSpaceFirstObjects(vector_len);
931 InitNonMovingSpaceFirstObjects();
932
933 // TODO: We can do a lot of neat tricks with this offset vector to tune the
934 // compaction as we wish. Originally, the compaction algorithm slides all
935 // live objects towards the beginning of the heap. This is nice because it
936 // keeps the spatial locality of objects intact.
937 // However, sometimes it's desired to compact objects in certain portions
938 // of the heap. For instance, it is expected that, over time,
939 // objects towards the beginning of the heap are long lived and are always
940 // densely packed. In this case, it makes sense to only update references in
941 // there and not try to compact it.
942 // Furthermore, we might have some large objects and may not want to move such
943 // objects.
944 // We can adjust, without too much effort, the values in the chunk_info_vec_ such
945 // that the objects in the dense beginning area aren't moved. OTOH, large
946 // objects, which could be anywhere in the heap, could also be kept from
947 // moving by using a similar trick. The only issue is that by doing this we will
948 // leave an unused hole in the middle of the heap which can't be used for
949 // allocations until we do a *full* compaction.
950 //
951 // At this point every element in the chunk_info_vec_ contains the live-bytes
952 // of the corresponding chunk. For old-to-new address computation we need
953 // every element to reflect total live-bytes till the corresponding chunk.
954
955 // Live-bytes count is required to compute post_compact_end_ below.
956 uint32_t total;
957 // Update the vector one past the heap usage as it is required for black
958 // allocated objects' post-compact address computation.
959 if (vector_len < vector_length_) {
960 vector_len++;
961 total = 0;
962 } else {
963 // Fetch the value stored in the last element before it gets overwritten by
964 // std::exclusive_scan().
965 total = chunk_info_vec_[vector_len - 1];
966 }
967 std::exclusive_scan(chunk_info_vec_, chunk_info_vec_ + vector_len, chunk_info_vec_, 0);
968 total += chunk_info_vec_[vector_len - 1];
969
970 for (size_t i = vector_len; i < vector_length_; i++) {
971 DCHECK_EQ(chunk_info_vec_[i], 0u);
972 }
973 post_compact_end_ = AlignUp(space_begin + total, kPageSize);
974 CHECK_EQ(post_compact_end_, space_begin + moving_first_objs_count_ * kPageSize);
975 black_objs_slide_diff_ = black_allocations_begin_ - post_compact_end_;
976 // How do we handle compaction of heap portion used for allocations after the
977 // marking-pause?
978 // All allocations after the marking-pause are considered black (reachable)
979 // for this GC cycle. However, they need not be allocated contiguously as
980 // different mutators use TLABs. So we will compact the heap till the point
981 // where allocations took place before the marking-pause. And everything after
982 // that will be slid with TLAB holes, and then TLAB info in TLS will be
983 // appropriately updated in the pre-compaction pause.
984 // The chunk-info vector entries for the post marking-pause allocations will be
985 // also updated in the pre-compaction pause.
986
987 bool is_zygote = Runtime::Current()->IsZygote();
988 if (!uffd_initialized_ && CreateUserfaultfd(/*post_fork*/false)) {
989 if (!use_uffd_sigbus_) {
990 // Register the buffer that we use for terminating concurrent compaction
991 struct uffdio_register uffd_register;
992 uffd_register.range.start = reinterpret_cast<uintptr_t>(conc_compaction_termination_page_);
993 uffd_register.range.len = kPageSize;
994 uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
995 CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0)
996 << "ioctl_userfaultfd: register compaction termination page: " << strerror(errno);
997 }
998 if (!uffd_minor_fault_supported_ && shadow_to_space_map_.IsValid()) {
999 // A valid shadow-map for moving space is only possible if we
1000 // were able to map it in the constructor. That also means that its size
1001 // matches the moving-space.
1002 CHECK_EQ(shadow_to_space_map_.Size(), bump_pointer_space_->Capacity());
1003 // Release the shadow map for moving-space if we don't support minor-fault
1004 // as it's not required.
1005 shadow_to_space_map_.Reset();
1006 }
1007 }
1008 // For zygote we create the thread pool each time before starting compaction,
1009 // and get rid of it when finished. This is expected to happen rarely as
1010 // zygote spends most of the time in native fork loop.
1011 if (uffd_ != kFallbackMode) {
1012 if (!use_uffd_sigbus_) {
1013 ThreadPool* pool = heap_->GetThreadPool();
1014 if (UNLIKELY(pool == nullptr)) {
1015 // On devices with 2 cores, GetParallelGCThreadCount() will return 1,
1016 // which is desired number of workers on such devices.
1017 heap_->CreateThreadPool(std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers));
1018 pool = heap_->GetThreadPool();
1019 }
1020 size_t num_threads = pool->GetThreadCount();
1021 thread_pool_counter_ = num_threads;
1022 for (size_t i = 0; i < num_threads; i++) {
1023 pool->AddTask(thread_running_gc_, new ConcurrentCompactionGcTask(this, i + 1));
1024 }
1025 CHECK_EQ(pool->GetTaskCount(thread_running_gc_), num_threads);
1026 }
1027 /*
1028 * Possible scenarios for mappings:
1029 * A) All zygote GCs (or if minor-fault feature isn't available): uses
1030 * uffd's copy mode
1031 * 1) For moving-space ('to' space is same as the moving-space):
1032 * a) Private-anonymous mappings for 'to' and 'from' space are created in
1033 * the constructor.
1034 * b) In the compaction pause, we mremap(dontunmap) from 'to' space to
1035 * 'from' space. This results in moving all pages to 'from' space and
1036 * emptying the 'to' space, thereby preparing it for userfaultfd
1037 * registration.
1038 *
1039 * 2) For linear-alloc space:
1040 * a) Private-anonymous mappings for the linear-alloc and its 'shadow'
1041 * are created by the arena-pool.
1042 * b) In the compaction pause, we mremap(dontumap) with similar effect as
1043 * (A.1.b) above.
1044 *
1045 * B) First GC after zygote: uses uffd's copy-mode
1046 * 1) For moving-space:
1047 * a) If the mmap for shadow-map has been successful in the constructor,
1048 * then we remap it (mmap with MAP_FIXED) to get a shared-anonymous
1049 * mapping.
1050 * b) Else, we create two memfd and ftruncate them to the moving-space
1051 * size.
1052 * c) Same as (A.1.b)
1053 * d) If (B.1.a), then mremap(dontunmap) from shadow-map to
1054 * 'to' space. This will make both of them map to the same pages
1055 * e) If (B.1.b), then mmap with the first memfd in shared mode on the
1056 * 'to' space.
1057 * f) At the end of compaction, we will have moved the moving-space
1058 * objects to a MAP_SHARED mapping, readying it for minor-fault from next
1059 * GC cycle.
1060 *
1061 * 2) For linear-alloc space:
1062 * a) Same as (A.2.b)
1063 * b) mmap a shared-anonymous mapping onto the linear-alloc space.
1064 * c) Same as (B.1.f)
1065 *
1066 * C) All subsequent GCs: preferable minor-fault mode. But may also require
1067 * using copy-mode.
1068 * 1) For moving-space:
1069 * a) If the shadow-map is created and no memfd was used, then that means
1070 * we are using shared-anonymous. Therefore, mmap a shared-anonymous on
1071 * the shadow-space.
1072 * b) If the shadow-map is not mapped yet, then mmap one with a size
1073 * big enough to hold the compacted moving space. This may fail, in which
1074 * case we will use uffd's copy-mode.
1075 * c) If (b) is successful, then mmap the free memfd onto shadow-map.
1076 * d) Same as (A.1.b)
1077 * e) In compaction pause, if the shadow-map was not created, then use
1078 * copy-mode.
1079 * f) Else, if the created map is smaller than the required-size, then
1080 * use mremap (without dontunmap) to expand the size. If failed, then use
1081 * copy-mode.
1082 * g) Otherwise, same as (B.1.d) and use minor-fault mode.
1083 *
1084 * 2) For linear-alloc space:
1085 * a) Same as (A.2.b)
1086 * b) Use minor-fault mode
1087 */
1088 auto mmap_shadow_map = [this](int flags, int fd) {
1089 void* ret = mmap(shadow_to_space_map_.Begin(),
1090 shadow_to_space_map_.Size(),
1091 PROT_READ | PROT_WRITE,
1092 flags,
1093 fd,
1094 /*offset=*/0);
1095 DCHECK_NE(ret, MAP_FAILED) << "mmap for moving-space shadow failed:" << strerror(errno);
1096 };
1097 // Setup all the virtual memory ranges required for concurrent compaction.
1098 if (minor_fault_initialized_) {
1099 DCHECK(!is_zygote);
1100 if (UNLIKELY(!shadow_to_space_map_.IsValid())) {
1101 // This case happens only once on the first GC in minor-fault mode, if
1102 // we were unable to reserve shadow-map for moving-space in the
1103 // beginning.
1104 DCHECK_GE(moving_to_space_fd_, 0);
1105 // Take extra 4MB to reduce the likelihood of requiring resizing this
1106 // map in the pause due to black allocations.
1107 size_t reqd_size = std::min(moving_first_objs_count_ * kPageSize + 4 * MB,
1108 bump_pointer_space_->Capacity());
1109 // We cannot support memory-tool with shadow-map (as it requires
1110 // appending a redzone) in this case because the mapping may have to be expanded
1111 // using mremap (in KernelPreparation()), which would ignore the redzone.
1112 // MemMap::MapFile() appends a redzone, but MemMap::MapAnonymous() doesn't.
1113 std::string err_msg;
1114 shadow_to_space_map_ = MemMap::MapAnonymous("moving-space-shadow",
1115 reqd_size,
1116 PROT_NONE,
1117 /*low_4gb=*/kObjPtrPoisoning,
1118 &err_msg);
1119
1120 if (shadow_to_space_map_.IsValid()) {
1121 CHECK(!kMemoryToolAddsRedzones || shadow_to_space_map_.GetRedzoneSize() == 0u);
1122 // We want to use MemMap to get low-4GB mapping, if required, but then also
1123 // want to have its ownership as we may grow it (in
1124 // KernelPreparation()). If the ownership is not taken and we try to
1125 // resize MemMap, then it unmaps the virtual range.
1126 MemMap temp = shadow_to_space_map_.TakeReservedMemory(shadow_to_space_map_.Size(),
1127 /*reuse*/ true);
1128 std::swap(temp, shadow_to_space_map_);
1129 DCHECK(!temp.IsValid());
1130 } else {
1131 LOG(WARNING) << "Failed to create moving space's shadow map of " << PrettySize(reqd_size)
1132 << " size. " << err_msg;
1133 }
1134 }
1135
1136 if (LIKELY(shadow_to_space_map_.IsValid())) {
1137 int fd = moving_to_space_fd_;
1138 int mmap_flags = MAP_SHARED | MAP_FIXED;
1139 if (fd == kFdUnused) {
1140 // Unused moving-to-space fd means we are using anonymous shared
1141 // mapping.
1142 DCHECK_EQ(shadow_to_space_map_.Size(), bump_pointer_space_->Capacity());
1143 mmap_flags |= MAP_ANONYMOUS;
1144 fd = -1;
1145 }
1146 // If the map is smaller than required, then we'll do mremap in the
1147 // compaction pause to increase the size.
1148 mmap_shadow_map(mmap_flags, fd);
1149 }
1150
1151 for (auto& data : linear_alloc_spaces_data_) {
1152 DCHECK_EQ(mprotect(data.shadow_.Begin(), data.shadow_.Size(), PROT_READ | PROT_WRITE), 0)
1153 << "mprotect failed: " << strerror(errno);
1154 }
1155 } else if (!is_zygote && uffd_minor_fault_supported_) {
1156 // First GC after zygote-fork. We will still use uffd's copy mode but will
1157 // use it to move objects to MAP_SHARED (to prepare for subsequent GCs, which
1158 // will use uffd's minor-fault feature).
1159 if (shadow_to_space_map_.IsValid() &&
1160 shadow_to_space_map_.Size() == bump_pointer_space_->Capacity()) {
1161 mmap_shadow_map(MAP_SHARED | MAP_FIXED | MAP_ANONYMOUS, /*fd=*/-1);
1162 } else {
1163 size_t size = bump_pointer_space_->Capacity();
1164 DCHECK_EQ(moving_to_space_fd_, kFdUnused);
1165 DCHECK_EQ(moving_from_space_fd_, kFdUnused);
1166 const char* name = bump_pointer_space_->GetName();
1167 moving_to_space_fd_ = memfd_create(name, MFD_CLOEXEC);
1168 CHECK_NE(moving_to_space_fd_, -1)
1169 << "memfd_create: failed for " << name << ": " << strerror(errno);
1170 moving_from_space_fd_ = memfd_create(name, MFD_CLOEXEC);
1171 CHECK_NE(moving_from_space_fd_, -1)
1172 << "memfd_create: failed for " << name << ": " << strerror(errno);
1173
1174 // memfds are considered as files from resource limits point of view.
1175 // And the moving space could be several hundred MBs. So increase the
1176 // limit, if it's lower than moving-space size.
1177 bool rlimit_changed = false;
1178 rlimit rlim_read;
1179 CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlim_read), 0) << "getrlimit failed: " << strerror(errno);
1180 if (rlim_read.rlim_cur < size) {
1181 rlimit_changed = true;
1182 rlimit rlim = rlim_read;
1183 rlim.rlim_cur = size;
1184 CHECK_EQ(setrlimit(RLIMIT_FSIZE, &rlim), 0) << "setrlimit failed: " << strerror(errno);
1185 }
1186
1187 // moving-space will map this fd so that we compact objects into it.
1188 int ret = ftruncate(moving_to_space_fd_, size);
1189 CHECK_EQ(ret, 0) << "ftruncate failed for moving-space:" << strerror(errno);
1190 ret = ftruncate(moving_from_space_fd_, size);
1191 CHECK_EQ(ret, 0) << "ftruncate failed for moving-space:" << strerror(errno);
1192
1193 if (rlimit_changed) {
1194 // reset the rlimit to the original limits.
1195 CHECK_EQ(setrlimit(RLIMIT_FSIZE, &rlim_read), 0)
1196 << "setrlimit failed: " << strerror(errno);
1197 }
1198 }
1199 }
1200 }
1201 }
1202
1203 class MarkCompact::VerifyRootMarkedVisitor : public SingleRootVisitor {
1204 public:
VerifyRootMarkedVisitor(MarkCompact * collector)1205 explicit VerifyRootMarkedVisitor(MarkCompact* collector) : collector_(collector) { }
1206
VisitRoot(mirror::Object * root,const RootInfo & info)1207 void VisitRoot(mirror::Object* root, const RootInfo& info) override
1208 REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
1209 CHECK(collector_->IsMarked(root) != nullptr) << info.ToString();
1210 }
1211
1212 private:
1213 MarkCompact* const collector_;
1214 };
1215
ReMarkRoots(Runtime * runtime)1216 void MarkCompact::ReMarkRoots(Runtime* runtime) {
1217 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1218 DCHECK_EQ(thread_running_gc_, Thread::Current());
1219 Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
1220 MarkNonThreadRoots(runtime);
1221 MarkConcurrentRoots(static_cast<VisitRootFlags>(kVisitRootFlagNewRoots
1222 | kVisitRootFlagStopLoggingNewRoots
1223 | kVisitRootFlagClearRootLog),
1224 runtime);
1225
1226 if (kVerifyRootsMarked) {
1227 TimingLogger::ScopedTiming t2("(Paused)VerifyRoots", GetTimings());
1228 VerifyRootMarkedVisitor visitor(this);
1229 runtime->VisitRoots(&visitor);
1230 }
1231 }
1232
MarkingPause()1233 void MarkCompact::MarkingPause() {
1234 TimingLogger::ScopedTiming t("(Paused)MarkingPause", GetTimings());
1235 Runtime* runtime = Runtime::Current();
1236 Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
1237 {
1238 // Handle the dirty objects as we are a concurrent GC
1239 WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1240 {
1241 MutexLock mu2(thread_running_gc_, *Locks::runtime_shutdown_lock_);
1242 MutexLock mu3(thread_running_gc_, *Locks::thread_list_lock_);
1243 std::list<Thread*> thread_list = runtime->GetThreadList()->GetList();
1244 for (Thread* thread : thread_list) {
1245 thread->VisitRoots(this, static_cast<VisitRootFlags>(0));
1246 DCHECK_EQ(thread->GetThreadLocalGcBuffer(), nullptr);
1247 // Need to revoke all the thread-local allocation stacks since we will
1248 // swap the allocation stacks (below) and don't want anybody to allocate
1249 // into the live stack.
1250 thread->RevokeThreadLocalAllocationStack();
1251 bump_pointer_space_->RevokeThreadLocalBuffers(thread);
1252 }
1253 }
1254 // Fetch only the accumulated objects-allocated count as it is guaranteed to
1255 // be up-to-date after the TLAB revocation above.
1256 freed_objects_ += bump_pointer_space_->GetAccumulatedObjectsAllocated();
1257 // Capture 'end' of moving-space at this point. Every allocation beyond this
1258 // point will be considered as black.
1259 // Align-up to page boundary so that black allocations happen from next page
1260 // onwards. Also, it ensures that 'end' is aligned for card-table's
1261 // ClearCardRange().
1262 black_allocations_begin_ = bump_pointer_space_->AlignEnd(thread_running_gc_, kPageSize);
1263 DCHECK(IsAligned<kAlignment>(black_allocations_begin_));
1264 black_allocations_begin_ = AlignUp(black_allocations_begin_, kPageSize);
1265
1266 // Re-mark root set. Doesn't include thread-roots as they are already marked
1267 // above.
1268 ReMarkRoots(runtime);
1269 // Scan dirty objects.
1270 RecursiveMarkDirtyObjects(/*paused*/ true, accounting::CardTable::kCardDirty);
1271 {
1272 TimingLogger::ScopedTiming t2("SwapStacks", GetTimings());
1273 heap_->SwapStacks();
1274 live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
1275 }
1276 }
1277 // TODO: For PreSweepingGcVerification(), find correct strategy to visit/walk
1278 // objects in bump-pointer space when we have a mark-bitmap to indicate live
1279 // objects. At the same time we also need to be able to visit black allocations,
1280 // even though they are not marked in the bitmap. Without both of these we fail
1281 // pre-sweeping verification. As well as we leave windows open wherein a
1282 // VisitObjects/Walk on the space would either miss some objects or visit
1283 // unreachable ones. These windows are when we are switching from shared
1284 // mutator-lock to exclusive and vice-versa starting from here till compaction pause.
1285 // heap_->PreSweepingGcVerification(this);
1286
1287 // Disallow new system weaks to prevent a race which occurs when someone adds
1288 // a new system weak before we sweep them. Since this new system weak may not
1289 // be marked, the GC may incorrectly sweep it. This also fixes a race where
1290 // interning may attempt to return a strong reference to a string that is
1291 // about to be swept.
1292 runtime->DisallowNewSystemWeaks();
1293 // Enable the reference processing slow path, needs to be done with mutators
1294 // paused since there is no lock in the GetReferent fast path.
1295 heap_->GetReferenceProcessor()->EnableSlowPath();
1296 }
1297
SweepSystemWeaks(Thread * self,Runtime * runtime,const bool paused)1298 void MarkCompact::SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused) {
1299 TimingLogger::ScopedTiming t(paused ? "(Paused)SweepSystemWeaks" : "SweepSystemWeaks",
1300 GetTimings());
1301 ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
1302 runtime->SweepSystemWeaks(this);
1303 }
1304
ProcessReferences(Thread * self)1305 void MarkCompact::ProcessReferences(Thread* self) {
1306 WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
1307 GetHeap()->GetReferenceProcessor()->ProcessReferences(self, GetTimings());
1308 }
1309
Sweep(bool swap_bitmaps)1310 void MarkCompact::Sweep(bool swap_bitmaps) {
1311 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1312 // Ensure that nobody inserted objects in the live stack after we swapped the
1313 // stacks.
1314 CHECK_GE(live_stack_freeze_size_, GetHeap()->GetLiveStack()->Size());
1315 {
1316 TimingLogger::ScopedTiming t2("MarkAllocStackAsLive", GetTimings());
1317 // Mark everything allocated since the last GC as live so that we can sweep
1318 // concurrently, knowing that new allocations won't be marked as live.
1319 accounting::ObjectStack* live_stack = heap_->GetLiveStack();
1320 heap_->MarkAllocStackAsLive(live_stack);
1321 live_stack->Reset();
1322 DCHECK(mark_stack_->IsEmpty());
1323 }
1324 for (const auto& space : GetHeap()->GetContinuousSpaces()) {
1325 if (space->IsContinuousMemMapAllocSpace() && space != bump_pointer_space_) {
1326 space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
1327 TimingLogger::ScopedTiming split(
1328 alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepMallocSpace",
1329 GetTimings());
1330 RecordFree(alloc_space->Sweep(swap_bitmaps));
1331 }
1332 }
1333 SweepLargeObjects(swap_bitmaps);
1334 }
1335
SweepLargeObjects(bool swap_bitmaps)1336 void MarkCompact::SweepLargeObjects(bool swap_bitmaps) {
1337 space::LargeObjectSpace* los = heap_->GetLargeObjectsSpace();
1338 if (los != nullptr) {
1339 TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
1340 RecordFreeLOS(los->Sweep(swap_bitmaps));
1341 }
1342 }
1343
ReclaimPhase()1344 void MarkCompact::ReclaimPhase() {
1345 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
1346 DCHECK(thread_running_gc_ == Thread::Current());
1347 Runtime* const runtime = Runtime::Current();
1348 // Process the references concurrently.
1349 ProcessReferences(thread_running_gc_);
1350 // TODO: Try to merge this system-weak sweeping with the one while updating
1351 // references during the compaction pause.
1352 SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ false);
1353 runtime->AllowNewSystemWeaks();
1354 // Clean up class loaders after system weaks are swept since that is how we know if class
1355 // unloading occurred.
1356 runtime->GetClassLinker()->CleanupClassLoaders();
1357 {
1358 WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
1359 // Reclaim unmarked objects.
1360 Sweep(false);
1361 // Swap the live and mark bitmaps for each space which we modified space. This is an
1362 // optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
1363 // bitmaps.
1364 SwapBitmaps();
1365 // Unbind the live and mark bitmaps.
1366 GetHeap()->UnBindBitmaps();
1367 }
1368 }
1369
1370 // We want to avoid checking for every reference if it's within the page or
1371 // not. This can be done if we know where in the page the holder object lies.
1372 // If it doesn't overlap either boundaries then we can skip the checks.
1373 template <bool kCheckBegin, bool kCheckEnd>
1374 class MarkCompact::RefsUpdateVisitor {
1375 public:
RefsUpdateVisitor(MarkCompact * collector,mirror::Object * obj,uint8_t * begin,uint8_t * end)1376 explicit RefsUpdateVisitor(MarkCompact* collector,
1377 mirror::Object* obj,
1378 uint8_t* begin,
1379 uint8_t* end)
1380 : collector_(collector), obj_(obj), begin_(begin), end_(end) {
1381 DCHECK(!kCheckBegin || begin != nullptr);
1382 DCHECK(!kCheckEnd || end != nullptr);
1383 }
1384
operator ()(mirror::Object * old ATTRIBUTE_UNUSED,MemberOffset offset,bool) const1385 void operator()(mirror::Object* old ATTRIBUTE_UNUSED, MemberOffset offset, bool /* is_static */)
1386 const ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_)
1387 REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
1388 bool update = true;
1389 if (kCheckBegin || kCheckEnd) {
1390 uint8_t* ref = reinterpret_cast<uint8_t*>(obj_) + offset.Int32Value();
1391 update = (!kCheckBegin || ref >= begin_) && (!kCheckEnd || ref < end_);
1392 }
1393 if (update) {
1394 collector_->UpdateRef(obj_, offset);
1395 }
1396 }
1397
1398 // For object arrays we don't need to check boundaries here as it's done in
1399 // VisitReferenes().
1400 // TODO: Optimize reference updating using SIMD instructions. Object arrays
1401 // are perfect as all references are tightly packed.
operator ()(mirror::Object * old ATTRIBUTE_UNUSED,MemberOffset offset,bool,bool) const1402 void operator()(mirror::Object* old ATTRIBUTE_UNUSED,
1403 MemberOffset offset,
1404 bool /*is_static*/,
1405 bool /*is_obj_array*/)
1406 const ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_)
1407 REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
1408 collector_->UpdateRef(obj_, offset);
1409 }
1410
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const1411 void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
1412 ALWAYS_INLINE
1413 REQUIRES_SHARED(Locks::mutator_lock_) {
1414 if (!root->IsNull()) {
1415 VisitRoot(root);
1416 }
1417 }
1418
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const1419 void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
1420 ALWAYS_INLINE
1421 REQUIRES_SHARED(Locks::mutator_lock_) {
1422 collector_->UpdateRoot(root);
1423 }
1424
1425 private:
1426 MarkCompact* const collector_;
1427 mirror::Object* const obj_;
1428 uint8_t* const begin_;
1429 uint8_t* const end_;
1430 };
1431
IsValidObject(mirror::Object * obj) const1432 bool MarkCompact::IsValidObject(mirror::Object* obj) const {
1433 mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
1434 if (!heap_->GetVerification()->IsValidHeapObjectAddress(klass)) {
1435 return false;
1436 }
1437 return heap_->GetVerification()->IsValidClassUnchecked<kWithFromSpaceBarrier>(
1438 obj->GetClass<kVerifyNone, kWithFromSpaceBarrier>());
1439 }
1440
1441 template <typename Callback>
VerifyObject(mirror::Object * ref,Callback & callback) const1442 void MarkCompact::VerifyObject(mirror::Object* ref, Callback& callback) const {
1443 if (kIsDebugBuild) {
1444 mirror::Class* klass = ref->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1445 mirror::Class* pre_compact_klass = ref->GetClass<kVerifyNone, kWithoutReadBarrier>();
1446 mirror::Class* klass_klass = klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1447 mirror::Class* klass_klass_klass = klass_klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
1448 if (bump_pointer_space_->HasAddress(pre_compact_klass) &&
1449 reinterpret_cast<uint8_t*>(pre_compact_klass) < black_allocations_begin_) {
1450 CHECK(moving_space_bitmap_->Test(pre_compact_klass))
1451 << "ref=" << ref
1452 << " post_compact_end=" << static_cast<void*>(post_compact_end_)
1453 << " pre_compact_klass=" << pre_compact_klass
1454 << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
1455 CHECK(live_words_bitmap_->Test(pre_compact_klass));
1456 }
1457 if (!IsValidObject(ref)) {
1458 std::ostringstream oss;
1459 oss << "Invalid object: "
1460 << "ref=" << ref
1461 << " klass=" << klass
1462 << " klass_klass=" << klass_klass
1463 << " klass_klass_klass=" << klass_klass_klass
1464 << " pre_compact_klass=" << pre_compact_klass
1465 << " from_space_begin=" << static_cast<void*>(from_space_begin_)
1466 << " pre_compact_begin=" << static_cast<void*>(bump_pointer_space_->Begin())
1467 << " post_compact_end=" << static_cast<void*>(post_compact_end_)
1468 << " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
1469
1470 // Call callback before dumping larger data like RAM and space dumps.
1471 callback(oss);
1472
1473 oss << " \nobject="
1474 << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(ref), 128)
1475 << " \nklass(from)="
1476 << heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(klass), 128)
1477 << "spaces:\n";
1478 heap_->DumpSpaces(oss);
1479 LOG(FATAL) << oss.str();
1480 }
1481 }
1482 }
1483
CompactPage(mirror::Object * obj,uint32_t offset,uint8_t * addr,bool needs_memset_zero)1484 void MarkCompact::CompactPage(mirror::Object* obj,
1485 uint32_t offset,
1486 uint8_t* addr,
1487 bool needs_memset_zero) {
1488 DCHECK(moving_space_bitmap_->Test(obj)
1489 && live_words_bitmap_->Test(obj));
1490 DCHECK(live_words_bitmap_->Test(offset)) << "obj=" << obj
1491 << " offset=" << offset
1492 << " addr=" << static_cast<void*>(addr)
1493 << " black_allocs_begin="
1494 << static_cast<void*>(black_allocations_begin_)
1495 << " post_compact_addr="
1496 << static_cast<void*>(post_compact_end_);
1497 uint8_t* const start_addr = addr;
1498 // How many distinct live-strides do we have.
1499 size_t stride_count = 0;
1500 uint8_t* last_stride = addr;
1501 uint32_t last_stride_begin = 0;
1502 auto verify_obj_callback = [&] (std::ostream& os) {
1503 os << " stride_count=" << stride_count
1504 << " last_stride=" << static_cast<void*>(last_stride)
1505 << " offset=" << offset
1506 << " start_addr=" << static_cast<void*>(start_addr);
1507 };
1508 obj = GetFromSpaceAddr(obj);
1509 live_words_bitmap_->VisitLiveStrides(offset,
1510 black_allocations_begin_,
1511 kPageSize,
1512 [&addr,
1513 &last_stride,
1514 &stride_count,
1515 &last_stride_begin,
1516 verify_obj_callback,
1517 this] (uint32_t stride_begin,
1518 size_t stride_size,
1519 bool /*is_last*/)
1520 REQUIRES_SHARED(Locks::mutator_lock_) {
1521 const size_t stride_in_bytes = stride_size * kAlignment;
1522 DCHECK_LE(stride_in_bytes, kPageSize);
1523 last_stride_begin = stride_begin;
1524 DCHECK(IsAligned<kAlignment>(addr));
1525 memcpy(addr,
1526 from_space_begin_ + stride_begin * kAlignment,
1527 stride_in_bytes);
1528 if (kIsDebugBuild) {
1529 uint8_t* space_begin = bump_pointer_space_->Begin();
1530 // We can interpret the first word of the stride as an
1531 // obj only from second stride onwards, as the first
1532 // stride's first-object may have started on previous
1533 // page. The only exception is the first page of the
1534 // moving space.
1535 if (stride_count > 0
1536 || stride_begin * kAlignment < kPageSize) {
1537 mirror::Object* o =
1538 reinterpret_cast<mirror::Object*>(space_begin
1539 + stride_begin
1540 * kAlignment);
1541 CHECK(live_words_bitmap_->Test(o)) << "ref=" << o;
1542 CHECK(moving_space_bitmap_->Test(o))
1543 << "ref=" << o
1544 << " bitmap: "
1545 << moving_space_bitmap_->DumpMemAround(o);
1546 VerifyObject(reinterpret_cast<mirror::Object*>(addr),
1547 verify_obj_callback);
1548 }
1549 }
1550 last_stride = addr;
1551 addr += stride_in_bytes;
1552 stride_count++;
1553 });
1554 DCHECK_LT(last_stride, start_addr + kPageSize);
1555 DCHECK_GT(stride_count, 0u);
1556 size_t obj_size = 0;
1557 uint32_t offset_within_obj = offset * kAlignment
1558 - (reinterpret_cast<uint8_t*>(obj) - from_space_begin_);
1559 // First object
1560 if (offset_within_obj > 0) {
1561 mirror::Object* to_ref = reinterpret_cast<mirror::Object*>(start_addr - offset_within_obj);
1562 if (stride_count > 1) {
1563 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this,
1564 to_ref,
1565 start_addr,
1566 nullptr);
1567 obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(
1568 visitor, MemberOffset(offset_within_obj), MemberOffset(-1));
1569 } else {
1570 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this,
1571 to_ref,
1572 start_addr,
1573 start_addr + kPageSize);
1574 obj_size = obj->VisitRefsForCompaction</*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(
1575 visitor, MemberOffset(offset_within_obj), MemberOffset(offset_within_obj
1576 + kPageSize));
1577 }
1578 obj_size = RoundUp(obj_size, kAlignment);
1579 DCHECK_GT(obj_size, offset_within_obj)
1580 << "obj:" << obj
1581 << " class:"
1582 << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
1583 << " to_addr:" << to_ref
1584 << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
1585 << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
1586 << " offset:" << offset * kAlignment
1587 << " class-after-obj-iter:"
1588 << (class_after_obj_iter_ != class_after_obj_ordered_map_.rend() ?
1589 class_after_obj_iter_->first.AsMirrorPtr() : nullptr)
1590 << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
1591 << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
1592 << " offset-of-last-idx:"
1593 << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
1594 << " first-obj-of-last-idx:"
1595 << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
1596
1597 obj_size -= offset_within_obj;
1598 // If there is only one stride, then adjust last_stride_begin to the
1599 // end of the first object.
1600 if (stride_count == 1) {
1601 last_stride_begin += obj_size / kAlignment;
1602 }
1603 }
1604
1605 // Except for the last page being compacted, the pages will have addr ==
1606 // start_addr + kPageSize.
1607 uint8_t* const end_addr = addr;
1608 addr = start_addr;
1609 size_t bytes_done = obj_size;
1610 // All strides except the last one can be updated without any boundary
1611 // checks.
1612 DCHECK_LE(addr, last_stride);
1613 size_t bytes_to_visit = last_stride - addr;
1614 DCHECK_LE(bytes_to_visit, kPageSize);
1615 while (bytes_to_visit > bytes_done) {
1616 mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
1617 VerifyObject(ref, verify_obj_callback);
1618 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
1619 visitor(this, ref, nullptr, nullptr);
1620 obj_size = ref->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
1621 obj_size = RoundUp(obj_size, kAlignment);
1622 bytes_done += obj_size;
1623 }
1624 // Last stride may have multiple objects in it and we don't know where the
1625 // last object which crosses the page boundary starts, therefore check
1626 // page-end in all of these objects. Also, we need to call
1627 // VisitRefsForCompaction() with from-space object as we fetch object size,
1628 // which in case of klass requires 'class_size_'.
1629 uint8_t* from_addr = from_space_begin_ + last_stride_begin * kAlignment;
1630 bytes_to_visit = end_addr - addr;
1631 DCHECK_LE(bytes_to_visit, kPageSize);
1632 while (bytes_to_visit > bytes_done) {
1633 mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
1634 obj = reinterpret_cast<mirror::Object*>(from_addr);
1635 VerifyObject(ref, verify_obj_callback);
1636 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true>
1637 visitor(this, ref, nullptr, start_addr + kPageSize);
1638 obj_size = obj->VisitRefsForCompaction(visitor,
1639 MemberOffset(0),
1640 MemberOffset(end_addr - (addr + bytes_done)));
1641 obj_size = RoundUp(obj_size, kAlignment);
1642 DCHECK_GT(obj_size, 0u)
1643 << "from_addr:" << obj
1644 << " from-space-class:"
1645 << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
1646 << " to_addr:" << ref
1647 << " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
1648 << " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
1649 << " offset:" << offset * kAlignment
1650 << " bytes_done:" << bytes_done
1651 << " class-after-obj-iter:"
1652 << (class_after_obj_iter_ != class_after_obj_ordered_map_.rend() ?
1653 class_after_obj_iter_->first.AsMirrorPtr() : nullptr)
1654 << " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
1655 << " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
1656 << " offset-of-last-idx:"
1657 << pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
1658 << " first-obj-of-last-idx:"
1659 << first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
1660
1661 from_addr += obj_size;
1662 bytes_done += obj_size;
1663 }
1664 // The last page that we compact may have some bytes left untouched in the
1665 // end, we should zero them as the kernel copies at page granularity.
1666 if (needs_memset_zero && UNLIKELY(bytes_done < kPageSize)) {
1667 std::memset(addr + bytes_done, 0x0, kPageSize - bytes_done);
1668 }
1669 }
1670
1671 // We store the starting point (pre_compact_page - first_obj) and first-chunk's
1672 // size. If more TLAB(s) started in this page, then those chunks are identified
1673 // using mark bitmap. All this info is prepared in UpdateMovingSpaceBlackAllocations().
1674 // If we find a set bit in the bitmap, then we copy the remaining page and then
1675 // use the bitmap to visit each object for updating references.
SlideBlackPage(mirror::Object * first_obj,const size_t page_idx,uint8_t * const pre_compact_page,uint8_t * dest,bool needs_memset_zero)1676 void MarkCompact::SlideBlackPage(mirror::Object* first_obj,
1677 const size_t page_idx,
1678 uint8_t* const pre_compact_page,
1679 uint8_t* dest,
1680 bool needs_memset_zero) {
1681 DCHECK(IsAligned<kPageSize>(pre_compact_page));
1682 size_t bytes_copied;
1683 const uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[page_idx];
1684 mirror::Object* next_page_first_obj = first_objs_moving_space_[page_idx + 1].AsMirrorPtr();
1685 uint8_t* src_addr = reinterpret_cast<uint8_t*>(GetFromSpaceAddr(first_obj));
1686 uint8_t* pre_compact_addr = reinterpret_cast<uint8_t*>(first_obj);
1687 uint8_t* const pre_compact_page_end = pre_compact_page + kPageSize;
1688 uint8_t* const dest_page_end = dest + kPageSize;
1689
1690 auto verify_obj_callback = [&] (std::ostream& os) {
1691 os << " first_obj=" << first_obj
1692 << " next_page_first_obj=" << next_page_first_obj
1693 << " first_chunk_sie=" << first_chunk_size
1694 << " dest=" << static_cast<void*>(dest)
1695 << " pre_compact_page="
1696 << static_cast<void* const>(pre_compact_page);
1697 };
1698 // We have empty portion at the beginning of the page. Zero it.
1699 if (pre_compact_addr > pre_compact_page) {
1700 bytes_copied = pre_compact_addr - pre_compact_page;
1701 DCHECK_LT(bytes_copied, kPageSize);
1702 if (needs_memset_zero) {
1703 std::memset(dest, 0x0, bytes_copied);
1704 }
1705 dest += bytes_copied;
1706 } else {
1707 bytes_copied = 0;
1708 size_t offset = pre_compact_page - pre_compact_addr;
1709 pre_compact_addr = pre_compact_page;
1710 src_addr += offset;
1711 DCHECK(IsAligned<kPageSize>(src_addr));
1712 }
1713 // Copy the first chunk of live words
1714 std::memcpy(dest, src_addr, first_chunk_size);
1715 // Update references in the first chunk. Use object size to find next object.
1716 {
1717 size_t bytes_to_visit = first_chunk_size;
1718 size_t obj_size;
1719 // The first object started in some previous page. So we need to check the
1720 // beginning.
1721 DCHECK_LE(reinterpret_cast<uint8_t*>(first_obj), pre_compact_addr);
1722 size_t offset = pre_compact_addr - reinterpret_cast<uint8_t*>(first_obj);
1723 if (bytes_copied == 0 && offset > 0) {
1724 mirror::Object* to_obj = reinterpret_cast<mirror::Object*>(dest - offset);
1725 mirror::Object* from_obj = reinterpret_cast<mirror::Object*>(src_addr - offset);
1726 // If the next page's first-obj is in this page or nullptr, then we don't
1727 // need to check end boundary
1728 if (next_page_first_obj == nullptr
1729 || (first_obj != next_page_first_obj
1730 && reinterpret_cast<uint8_t*>(next_page_first_obj) <= pre_compact_page_end)) {
1731 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this,
1732 to_obj,
1733 dest,
1734 nullptr);
1735 obj_size = from_obj->VisitRefsForCompaction<
1736 /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
1737 MemberOffset(offset),
1738 MemberOffset(-1));
1739 } else {
1740 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this,
1741 to_obj,
1742 dest,
1743 dest_page_end);
1744 obj_size = from_obj->VisitRefsForCompaction<
1745 /*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
1746 MemberOffset(offset),
1747 MemberOffset(offset
1748 + kPageSize));
1749 if (first_obj == next_page_first_obj) {
1750 // First object is the only object on this page. So there's nothing else left to do.
1751 return;
1752 }
1753 }
1754 obj_size = RoundUp(obj_size, kAlignment);
1755 obj_size -= offset;
1756 dest += obj_size;
1757 bytes_to_visit -= obj_size;
1758 }
1759 bytes_copied += first_chunk_size;
1760 // If the last object in this page is next_page_first_obj, then we need to check end boundary
1761 bool check_last_obj = false;
1762 if (next_page_first_obj != nullptr
1763 && reinterpret_cast<uint8_t*>(next_page_first_obj) < pre_compact_page_end
1764 && bytes_copied == kPageSize) {
1765 size_t diff = pre_compact_page_end - reinterpret_cast<uint8_t*>(next_page_first_obj);
1766 DCHECK_LE(diff, kPageSize);
1767 DCHECK_LE(diff, bytes_to_visit);
1768 bytes_to_visit -= diff;
1769 check_last_obj = true;
1770 }
1771 while (bytes_to_visit > 0) {
1772 mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
1773 VerifyObject(dest_obj, verify_obj_callback);
1774 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(this,
1775 dest_obj,
1776 nullptr,
1777 nullptr);
1778 obj_size = dest_obj->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
1779 obj_size = RoundUp(obj_size, kAlignment);
1780 bytes_to_visit -= obj_size;
1781 dest += obj_size;
1782 }
1783 DCHECK_EQ(bytes_to_visit, 0u);
1784 if (check_last_obj) {
1785 mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
1786 VerifyObject(dest_obj, verify_obj_callback);
1787 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> visitor(this,
1788 dest_obj,
1789 nullptr,
1790 dest_page_end);
1791 mirror::Object* obj = GetFromSpaceAddr(next_page_first_obj);
1792 obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
1793 MemberOffset(0),
1794 MemberOffset(dest_page_end - dest));
1795 return;
1796 }
1797 }
1798
1799 // Probably a TLAB finished on this page and/or a new TLAB started as well.
1800 if (bytes_copied < kPageSize) {
1801 src_addr += first_chunk_size;
1802 pre_compact_addr += first_chunk_size;
1803 // Use mark-bitmap to identify where objects are. First call
1804 // VisitMarkedRange for only the first marked bit. If found, zero all bytes
1805 // until that object and then call memcpy on the rest of the page.
1806 // Then call VisitMarkedRange for all marked bits *after* the one found in
1807 // this invocation. This time to visit references.
1808 uintptr_t start_visit = reinterpret_cast<uintptr_t>(pre_compact_addr);
1809 uintptr_t page_end = reinterpret_cast<uintptr_t>(pre_compact_page_end);
1810 mirror::Object* found_obj = nullptr;
1811 moving_space_bitmap_->VisitMarkedRange</*kVisitOnce*/true>(start_visit,
1812 page_end,
1813 [&found_obj](mirror::Object* obj) {
1814 found_obj = obj;
1815 });
1816 size_t remaining_bytes = kPageSize - bytes_copied;
1817 if (found_obj == nullptr) {
1818 if (needs_memset_zero) {
1819 // No more black objects in this page. Zero the remaining bytes and return.
1820 std::memset(dest, 0x0, remaining_bytes);
1821 }
1822 return;
1823 }
1824 // Copy everything in this page, which includes any zeroed regions
1825 // in-between.
1826 std::memcpy(dest, src_addr, remaining_bytes);
1827 DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
1828 moving_space_bitmap_->VisitMarkedRange(
1829 reinterpret_cast<uintptr_t>(found_obj) + mirror::kObjectHeaderSize,
1830 page_end,
1831 [&found_obj, pre_compact_addr, dest, this, verify_obj_callback] (mirror::Object* obj)
1832 REQUIRES_SHARED(Locks::mutator_lock_) {
1833 ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
1834 mirror::Object* ref = reinterpret_cast<mirror::Object*>(dest + diff);
1835 VerifyObject(ref, verify_obj_callback);
1836 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
1837 visitor(this, ref, nullptr, nullptr);
1838 ref->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
1839 MemberOffset(0),
1840 MemberOffset(-1));
1841 // Remember for next round.
1842 found_obj = obj;
1843 });
1844 // found_obj may have been updated in VisitMarkedRange. Visit the last found
1845 // object.
1846 DCHECK_GT(reinterpret_cast<uint8_t*>(found_obj), pre_compact_addr);
1847 DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
1848 ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
1849 mirror::Object* ref = reinterpret_cast<mirror::Object*>(dest + diff);
1850 VerifyObject(ref, verify_obj_callback);
1851 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> visitor(this,
1852 ref,
1853 nullptr,
1854 dest_page_end);
1855 ref->VisitRefsForCompaction</*kFetchObjSize*/false>(
1856 visitor, MemberOffset(0), MemberOffset(page_end -
1857 reinterpret_cast<uintptr_t>(found_obj)));
1858 }
1859 }
1860
1861 template <bool kFirstPageMapping>
MapProcessedPages(uint8_t * to_space_start,Atomic<PageState> * state_arr,size_t arr_idx,size_t arr_len)1862 void MarkCompact::MapProcessedPages(uint8_t* to_space_start,
1863 Atomic<PageState>* state_arr,
1864 size_t arr_idx,
1865 size_t arr_len) {
1866 DCHECK(minor_fault_initialized_);
1867 DCHECK_LT(arr_idx, arr_len);
1868 DCHECK_ALIGNED(to_space_start, kPageSize);
1869 // Claim all the contiguous pages, which are ready to be mapped, and then do
1870 // so in a single ioctl. This helps avoid the overhead of invoking syscall
1871 // several times and also maps the already-processed pages, avoiding
1872 // unnecessary faults on them.
1873 size_t length = kFirstPageMapping ? kPageSize : 0;
1874 if (kFirstPageMapping) {
1875 arr_idx++;
1876 }
1877 // We need to guarantee that we don't end up sucsessfully marking a later
1878 // page 'mapping' and then fail to mark an earlier page. To guarantee that
1879 // we use acq_rel order.
1880 for (; arr_idx < arr_len; arr_idx++, length += kPageSize) {
1881 PageState expected_state = PageState::kProcessed;
1882 if (!state_arr[arr_idx].compare_exchange_strong(
1883 expected_state, PageState::kProcessedAndMapping, std::memory_order_acq_rel)) {
1884 break;
1885 }
1886 }
1887 if (length > 0) {
1888 // Note: We need the first page to be attempted (to be mapped) by the ioctl
1889 // as this function is called due to some mutator thread waiting on the
1890 // 'to_space_start' page. Therefore, the ioctl must always be called
1891 // with 'to_space_start' as the 'start' address because it can bail out in
1892 // the middle (not attempting to map the subsequent pages) if it finds any
1893 // page either already mapped in between, or missing on the shadow-map.
1894 struct uffdio_continue uffd_continue;
1895 uffd_continue.range.start = reinterpret_cast<uintptr_t>(to_space_start);
1896 uffd_continue.range.len = length;
1897 uffd_continue.mode = 0;
1898 int ret = ioctl(uffd_, UFFDIO_CONTINUE, &uffd_continue);
1899 if (UNLIKELY(ret == -1 && errno == EAGAIN)) {
1900 // This can happen only in linear-alloc.
1901 DCHECK(linear_alloc_spaces_data_.end() !=
1902 std::find_if(linear_alloc_spaces_data_.begin(),
1903 linear_alloc_spaces_data_.end(),
1904 [to_space_start](const LinearAllocSpaceData& data) {
1905 return data.begin_ <= to_space_start && to_space_start < data.end_;
1906 }));
1907
1908 // This could happen if userfaultfd couldn't find any pages mapped in the
1909 // shadow map. For instance, if there are certain (contiguous) pages on
1910 // linear-alloc which are allocated and have first-object set-up but have
1911 // not been accessed yet.
1912 // Bail out by setting the remaining pages' state back to kProcessed and
1913 // then waking up any waiting threads.
1914 DCHECK_GE(uffd_continue.mapped, 0);
1915 DCHECK_ALIGNED(uffd_continue.mapped, kPageSize);
1916 DCHECK_LT(uffd_continue.mapped, static_cast<ssize_t>(length));
1917 if (kFirstPageMapping) {
1918 // In this case the first page must be mapped.
1919 DCHECK_GE(uffd_continue.mapped, static_cast<ssize_t>(kPageSize));
1920 }
1921 // Nobody would modify these pages' state simultaneously so only atomic
1922 // store is sufficient. Use 'release' order to ensure that all states are
1923 // modified sequentially.
1924 for (size_t remaining_len = length - uffd_continue.mapped; remaining_len > 0;
1925 remaining_len -= kPageSize) {
1926 arr_idx--;
1927 DCHECK_EQ(state_arr[arr_idx].load(std::memory_order_relaxed),
1928 PageState::kProcessedAndMapping);
1929 state_arr[arr_idx].store(PageState::kProcessed, std::memory_order_release);
1930 }
1931 uffd_continue.range.start =
1932 reinterpret_cast<uintptr_t>(to_space_start) + uffd_continue.mapped;
1933 uffd_continue.range.len = length - uffd_continue.mapped;
1934 ret = ioctl(uffd_, UFFDIO_WAKE, &uffd_continue.range);
1935 CHECK_EQ(ret, 0) << "ioctl_userfaultfd: wake failed: " << strerror(errno);
1936 } else {
1937 // We may receive ENOENT if gc-thread unregisters the
1938 // range behind our back, which is fine because that
1939 // happens only when it knows compaction is done.
1940 CHECK(ret == 0 || !kFirstPageMapping || errno == ENOENT)
1941 << "ioctl_userfaultfd: continue failed: " << strerror(errno);
1942 if (ret == 0) {
1943 DCHECK_EQ(uffd_continue.mapped, static_cast<ssize_t>(length));
1944 }
1945 }
1946 if (use_uffd_sigbus_) {
1947 // Nobody else would modify these pages' state simultaneously so atomic
1948 // store is sufficient.
1949 for (; uffd_continue.mapped > 0; uffd_continue.mapped -= kPageSize) {
1950 arr_idx--;
1951 DCHECK_EQ(state_arr[arr_idx].load(std::memory_order_relaxed),
1952 PageState::kProcessedAndMapping);
1953 state_arr[arr_idx].store(PageState::kProcessedAndMapped, std::memory_order_release);
1954 }
1955 }
1956 }
1957 }
1958
ZeropageIoctl(void * addr,bool tolerate_eexist,bool tolerate_enoent)1959 void MarkCompact::ZeropageIoctl(void* addr, bool tolerate_eexist, bool tolerate_enoent) {
1960 struct uffdio_zeropage uffd_zeropage;
1961 DCHECK(IsAligned<kPageSize>(addr));
1962 uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(addr);
1963 uffd_zeropage.range.len = kPageSize;
1964 uffd_zeropage.mode = 0;
1965 int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage);
1966 if (LIKELY(ret == 0)) {
1967 DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(kPageSize));
1968 } else {
1969 CHECK((tolerate_enoent && errno == ENOENT) || (tolerate_eexist && errno == EEXIST))
1970 << "ioctl_userfaultfd: zeropage failed: " << strerror(errno) << ". addr:" << addr;
1971 }
1972 }
1973
CopyIoctl(void * dst,void * buffer)1974 void MarkCompact::CopyIoctl(void* dst, void* buffer) {
1975 struct uffdio_copy uffd_copy;
1976 uffd_copy.src = reinterpret_cast<uintptr_t>(buffer);
1977 uffd_copy.dst = reinterpret_cast<uintptr_t>(dst);
1978 uffd_copy.len = kPageSize;
1979 uffd_copy.mode = 0;
1980 CHECK_EQ(ioctl(uffd_, UFFDIO_COPY, &uffd_copy), 0)
1981 << "ioctl_userfaultfd: copy failed: " << strerror(errno) << ". src:" << buffer
1982 << " dst:" << dst;
1983 DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(kPageSize));
1984 }
1985
1986 template <int kMode, typename CompactionFn>
DoPageCompactionWithStateChange(size_t page_idx,size_t status_arr_len,uint8_t * to_space_page,uint8_t * page,CompactionFn func)1987 void MarkCompact::DoPageCompactionWithStateChange(size_t page_idx,
1988 size_t status_arr_len,
1989 uint8_t* to_space_page,
1990 uint8_t* page,
1991 CompactionFn func) {
1992 PageState expected_state = PageState::kUnprocessed;
1993 PageState desired_state =
1994 kMode == kCopyMode ? PageState::kProcessingAndMapping : PageState::kProcessing;
1995 // In the concurrent case (kMode != kFallbackMode) we need to ensure that the update
1996 // to moving_spaces_status_[page_idx] is released before the contents of the page are
1997 // made accessible to other threads.
1998 //
1999 // We need acquire ordering here to ensure that when the CAS fails, another thread
2000 // has completed processing the page, which is guaranteed by the release below.
2001 if (kMode == kFallbackMode || moving_pages_status_[page_idx].compare_exchange_strong(
2002 expected_state, desired_state, std::memory_order_acquire)) {
2003 func();
2004 if (kMode == kCopyMode) {
2005 CopyIoctl(to_space_page, page);
2006 if (use_uffd_sigbus_) {
2007 // Store is sufficient as no other thread would modify the status at this point.
2008 moving_pages_status_[page_idx].store(PageState::kProcessedAndMapped,
2009 std::memory_order_release);
2010 }
2011 } else if (kMode == kMinorFaultMode) {
2012 expected_state = PageState::kProcessing;
2013 desired_state = PageState::kProcessed;
2014 // the CAS needs to be with release order to ensure that stores to the
2015 // page makes it to memory *before* other threads observe that it's
2016 // ready to be mapped.
2017 if (!moving_pages_status_[page_idx].compare_exchange_strong(
2018 expected_state, desired_state, std::memory_order_release)) {
2019 // Some mutator has requested to map the page after processing it.
2020 DCHECK_EQ(expected_state, PageState::kProcessingAndMapping);
2021 MapProcessedPages</*kFirstPageMapping=*/true>(
2022 to_space_page, moving_pages_status_, page_idx, status_arr_len);
2023 }
2024 }
2025 } else {
2026 DCHECK_GT(expected_state, PageState::kProcessed);
2027 }
2028 }
2029
FreeFromSpacePages(size_t cur_page_idx,int mode)2030 void MarkCompact::FreeFromSpacePages(size_t cur_page_idx, int mode) {
2031 // Thanks to sliding compaction, bump-pointer allocations, and reverse
2032 // compaction (see CompactMovingSpace) the logic here is pretty simple: find
2033 // the to-space page up to which compaction has finished, all the from-space
2034 // pages corresponding to this onwards can be freed. There are some corner
2035 // cases to be taken care of, which are described below.
2036 size_t idx = last_checked_reclaim_page_idx_;
2037 // Find the to-space page up to which the corresponding from-space pages can be
2038 // freed.
2039 for (; idx > cur_page_idx; idx--) {
2040 PageState state = moving_pages_status_[idx - 1].load(std::memory_order_acquire);
2041 if (state == PageState::kMutatorProcessing) {
2042 // Some mutator is working on the page.
2043 break;
2044 }
2045 DCHECK(state >= PageState::kProcessed ||
2046 (state == PageState::kUnprocessed &&
2047 (mode == kFallbackMode || idx > moving_first_objs_count_)));
2048 }
2049 DCHECK_LE(idx, last_checked_reclaim_page_idx_);
2050 if (idx == last_checked_reclaim_page_idx_) {
2051 // Nothing to do.
2052 return;
2053 }
2054
2055 uint8_t* reclaim_begin;
2056 uint8_t* idx_addr;
2057 // Calculate the first from-space page to be freed using 'idx'. If the
2058 // first-object of the idx'th to-space page started before the corresponding
2059 // from-space page, which is almost always the case in the compaction portion
2060 // of the moving-space, then it indicates that the subsequent pages that are
2061 // yet to be compacted will need the from-space pages. Therefore, find the page
2062 // (from the already compacted pages) whose first-object is different from
2063 // ours. All the from-space pages starting from that one are safe to be
2064 // removed. Please note that this iteration is not expected to be long in
2065 // normal cases as objects are smaller than page size.
2066 if (idx >= moving_first_objs_count_) {
2067 // black-allocated portion of the moving-space
2068 idx_addr = black_allocations_begin_ + (idx - moving_first_objs_count_) * kPageSize;
2069 reclaim_begin = idx_addr;
2070 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2071 if (first_obj != nullptr && reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
2072 size_t idx_len = moving_first_objs_count_ + black_page_count_;
2073 for (size_t i = idx + 1; i < idx_len; i++) {
2074 mirror::Object* obj = first_objs_moving_space_[i].AsMirrorPtr();
2075 // A null first-object indicates that the corresponding to-space page is
2076 // not used yet. So we can compute its from-space page and use that.
2077 if (obj != first_obj) {
2078 reclaim_begin = obj != nullptr
2079 ? AlignUp(reinterpret_cast<uint8_t*>(obj), kPageSize)
2080 : (black_allocations_begin_ + (i - moving_first_objs_count_) * kPageSize);
2081 break;
2082 }
2083 }
2084 }
2085 } else {
2086 DCHECK_GE(pre_compact_offset_moving_space_[idx], 0u);
2087 idx_addr = bump_pointer_space_->Begin() + pre_compact_offset_moving_space_[idx] * kAlignment;
2088 reclaim_begin = idx_addr;
2089 DCHECK_LE(reclaim_begin, black_allocations_begin_);
2090 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2091 if (reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
2092 DCHECK_LT(idx, moving_first_objs_count_);
2093 mirror::Object* obj = first_obj;
2094 for (size_t i = idx + 1; i < moving_first_objs_count_; i++) {
2095 obj = first_objs_moving_space_[i].AsMirrorPtr();
2096 if (first_obj != obj) {
2097 DCHECK_LT(first_obj, obj);
2098 DCHECK_LT(reclaim_begin, reinterpret_cast<uint8_t*>(obj));
2099 reclaim_begin = reinterpret_cast<uint8_t*>(obj);
2100 break;
2101 }
2102 }
2103 if (obj == first_obj) {
2104 reclaim_begin = black_allocations_begin_;
2105 }
2106 }
2107 reclaim_begin = AlignUp(reclaim_begin, kPageSize);
2108 }
2109
2110 DCHECK_NE(reclaim_begin, nullptr);
2111 DCHECK_ALIGNED(reclaim_begin, kPageSize);
2112 DCHECK_ALIGNED(last_reclaimed_page_, kPageSize);
2113 // Check if the 'class_after_obj_map_' map allows pages to be freed.
2114 for (; class_after_obj_iter_ != class_after_obj_ordered_map_.rend(); class_after_obj_iter_++) {
2115 mirror::Object* klass = class_after_obj_iter_->first.AsMirrorPtr();
2116 mirror::Class* from_klass = static_cast<mirror::Class*>(GetFromSpaceAddr(klass));
2117 // Check with class' end to ensure that, if required, the entire class survives.
2118 uint8_t* klass_end = reinterpret_cast<uint8_t*>(klass) + from_klass->SizeOf<kVerifyNone>();
2119 DCHECK_LE(klass_end, last_reclaimed_page_);
2120 if (reinterpret_cast<uint8_t*>(klass_end) >= reclaim_begin) {
2121 // Found a class which is in the reclaim range.
2122 uint8_t* obj_addr = reinterpret_cast<uint8_t*>(class_after_obj_iter_->second.AsMirrorPtr());
2123 // NOTE: Don't assert that obj is of 'klass' type as klass could instead
2124 // be its super-class.
2125 if (obj_addr < idx_addr) {
2126 // Its lowest-address object is not compacted yet. Reclaim starting from
2127 // the end of this class.
2128 reclaim_begin = AlignUp(klass_end, kPageSize);
2129 } else {
2130 // Continue consuming pairs wherein the lowest address object has already
2131 // been compacted.
2132 continue;
2133 }
2134 }
2135 // All the remaining class (and thereby corresponding object) addresses are
2136 // lower than the reclaim range.
2137 break;
2138 }
2139
2140 ssize_t size = last_reclaimed_page_ - reclaim_begin;
2141 if (size >= kMinFromSpaceMadviseSize) {
2142 int behavior = minor_fault_initialized_ ? MADV_REMOVE : MADV_DONTNEED;
2143 CHECK_EQ(madvise(reclaim_begin + from_space_slide_diff_, size, behavior), 0)
2144 << "madvise of from-space failed: " << strerror(errno);
2145 last_reclaimed_page_ = reclaim_begin;
2146 }
2147 last_checked_reclaim_page_idx_ = idx;
2148 }
2149
UpdateClassAfterObjMap()2150 void MarkCompact::UpdateClassAfterObjMap() {
2151 CHECK(class_after_obj_ordered_map_.empty());
2152 for (const auto& pair : class_after_obj_hash_map_) {
2153 auto super_class_iter = super_class_after_class_hash_map_.find(pair.first);
2154 ObjReference key = super_class_iter != super_class_after_class_hash_map_.end()
2155 ? super_class_iter->second
2156 : pair.first;
2157 if (std::less<mirror::Object*>{}(pair.second.AsMirrorPtr(), key.AsMirrorPtr()) &&
2158 bump_pointer_space_->HasAddress(key.AsMirrorPtr())) {
2159 auto [ret_iter, success] = class_after_obj_ordered_map_.try_emplace(key, pair.second);
2160 // It could fail only if the class 'key' has objects of its own, which are lower in
2161 // address order, as well of some of its derived class. In this case
2162 // choose the lowest address object.
2163 if (!success &&
2164 std::less<mirror::Object*>{}(pair.second.AsMirrorPtr(), ret_iter->second.AsMirrorPtr())) {
2165 ret_iter->second = pair.second;
2166 }
2167 }
2168 }
2169 class_after_obj_hash_map_.clear();
2170 super_class_after_class_hash_map_.clear();
2171 }
2172
2173 template <int kMode>
CompactMovingSpace(uint8_t * page)2174 void MarkCompact::CompactMovingSpace(uint8_t* page) {
2175 // For every page we have a starting object, which may have started in some
2176 // preceding page, and an offset within that object from where we must start
2177 // copying.
2178 // Consult the live-words bitmap to copy all contiguously live words at a
2179 // time. These words may constitute multiple objects. To avoid the need for
2180 // consulting mark-bitmap to find where does the next live object start, we
2181 // use the object-size returned by VisitRefsForCompaction.
2182 //
2183 // We do the compaction in reverse direction so that the pages containing
2184 // TLAB and latest allocations are processed first.
2185 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
2186 size_t page_status_arr_len = moving_first_objs_count_ + black_page_count_;
2187 size_t idx = page_status_arr_len;
2188 uint8_t* to_space_end = bump_pointer_space_->Begin() + page_status_arr_len * kPageSize;
2189 uint8_t* shadow_space_end = nullptr;
2190 if (kMode == kMinorFaultMode) {
2191 shadow_space_end = shadow_to_space_map_.Begin() + page_status_arr_len * kPageSize;
2192 }
2193 uint8_t* pre_compact_page = black_allocations_begin_ + (black_page_count_ * kPageSize);
2194
2195 DCHECK(IsAligned<kPageSize>(pre_compact_page));
2196
2197 UpdateClassAfterObjMap();
2198 // These variables are maintained by FreeFromSpacePages().
2199 last_reclaimed_page_ = pre_compact_page;
2200 last_checked_reclaim_page_idx_ = idx;
2201 class_after_obj_iter_ = class_after_obj_ordered_map_.rbegin();
2202 // Allocated-black pages
2203 while (idx > moving_first_objs_count_) {
2204 idx--;
2205 pre_compact_page -= kPageSize;
2206 to_space_end -= kPageSize;
2207 if (kMode == kMinorFaultMode) {
2208 shadow_space_end -= kPageSize;
2209 page = shadow_space_end;
2210 } else if (kMode == kFallbackMode) {
2211 page = to_space_end;
2212 }
2213 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2214 if (first_obj != nullptr) {
2215 DoPageCompactionWithStateChange<kMode>(
2216 idx,
2217 page_status_arr_len,
2218 to_space_end,
2219 page,
2220 [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2221 SlideBlackPage(first_obj, idx, pre_compact_page, page, kMode == kCopyMode);
2222 });
2223 // We are sliding here, so no point attempting to madvise for every
2224 // page. Wait for enough pages to be done.
2225 if (idx % (kMinFromSpaceMadviseSize / kPageSize) == 0) {
2226 FreeFromSpacePages(idx, kMode);
2227 }
2228 }
2229 }
2230 DCHECK_EQ(pre_compact_page, black_allocations_begin_);
2231
2232 while (idx > 0) {
2233 idx--;
2234 to_space_end -= kPageSize;
2235 if (kMode == kMinorFaultMode) {
2236 shadow_space_end -= kPageSize;
2237 page = shadow_space_end;
2238 } else if (kMode == kFallbackMode) {
2239 page = to_space_end;
2240 }
2241 mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
2242 DoPageCompactionWithStateChange<kMode>(
2243 idx, page_status_arr_len, to_space_end, page, [&]() REQUIRES_SHARED(Locks::mutator_lock_) {
2244 CompactPage(first_obj, pre_compact_offset_moving_space_[idx], page, kMode == kCopyMode);
2245 });
2246 FreeFromSpacePages(idx, kMode);
2247 }
2248 DCHECK_EQ(to_space_end, bump_pointer_space_->Begin());
2249 }
2250
UpdateNonMovingPage(mirror::Object * first,uint8_t * page)2251 void MarkCompact::UpdateNonMovingPage(mirror::Object* first, uint8_t* page) {
2252 DCHECK_LT(reinterpret_cast<uint8_t*>(first), page + kPageSize);
2253 // For every object found in the page, visit the previous object. This ensures
2254 // that we can visit without checking page-end boundary.
2255 // Call VisitRefsForCompaction with from-space read-barrier as the klass object and
2256 // super-class loads require it.
2257 // TODO: Set kVisitNativeRoots to false once we implement concurrent
2258 // compaction
2259 mirror::Object* curr_obj = first;
2260 non_moving_space_bitmap_->VisitMarkedRange(
2261 reinterpret_cast<uintptr_t>(first) + mirror::kObjectHeaderSize,
2262 reinterpret_cast<uintptr_t>(page + kPageSize),
2263 [&](mirror::Object* next_obj) {
2264 // TODO: Once non-moving space update becomes concurrent, we'll
2265 // require fetching the from-space address of 'curr_obj' and then call
2266 // visitor on that.
2267 if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
2268 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false>
2269 visitor(this, curr_obj, page, page + kPageSize);
2270 MemberOffset begin_offset(page - reinterpret_cast<uint8_t*>(curr_obj));
2271 // Native roots shouldn't be visited as they are done when this
2272 // object's beginning was visited in the preceding page.
2273 curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false, /*kVisitNativeRoots*/false>(
2274 visitor, begin_offset, MemberOffset(-1));
2275 } else {
2276 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
2277 visitor(this, curr_obj, page, page + kPageSize);
2278 curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
2279 MemberOffset(0),
2280 MemberOffset(-1));
2281 }
2282 curr_obj = next_obj;
2283 });
2284
2285 MemberOffset end_offset(page + kPageSize - reinterpret_cast<uint8_t*>(curr_obj));
2286 if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
2287 RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true>
2288 visitor(this, curr_obj, page, page + kPageSize);
2289 curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false, /*kVisitNativeRoots*/false>(
2290 visitor, MemberOffset(page - reinterpret_cast<uint8_t*>(curr_obj)), end_offset);
2291 } else {
2292 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true>
2293 visitor(this, curr_obj, page, page + kPageSize);
2294 curr_obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor, MemberOffset(0), end_offset);
2295 }
2296 }
2297
UpdateNonMovingSpace()2298 void MarkCompact::UpdateNonMovingSpace() {
2299 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
2300 // Iterating in reverse ensures that the class pointer in objects which span
2301 // across more than one page gets updated in the end. This is necessary for
2302 // VisitRefsForCompaction() to work correctly.
2303 // TODO: If and when we make non-moving space update concurrent, implement a
2304 // mechanism to remember class pointers for such objects off-heap and pass it
2305 // to VisitRefsForCompaction().
2306 uint8_t* page = non_moving_space_->Begin() + non_moving_first_objs_count_ * kPageSize;
2307 for (ssize_t i = non_moving_first_objs_count_ - 1; i >= 0; i--) {
2308 mirror::Object* obj = first_objs_non_moving_space_[i].AsMirrorPtr();
2309 page -= kPageSize;
2310 // null means there are no objects on the page to update references.
2311 if (obj != nullptr) {
2312 UpdateNonMovingPage(obj, page);
2313 }
2314 }
2315 }
2316
UpdateMovingSpaceBlackAllocations()2317 void MarkCompact::UpdateMovingSpaceBlackAllocations() {
2318 // For sliding black pages, we need the first-object, which overlaps with the
2319 // first byte of the page. Additionally, we compute the size of first chunk of
2320 // black objects. This will suffice for most black pages. Unlike, compaction
2321 // pages, here we don't need to pre-compute the offset within first-obj from
2322 // where sliding has to start. That can be calculated using the pre-compact
2323 // address of the page. Therefore, to save space, we store the first chunk's
2324 // size in black_alloc_pages_first_chunk_size_ array.
2325 // For the pages which may have holes after the first chunk, which could happen
2326 // if a new TLAB starts in the middle of the page, we mark the objects in
2327 // the mark-bitmap. So, if the first-chunk size is smaller than kPageSize,
2328 // then we use the mark-bitmap for the remainder of the page.
2329 uint8_t* const begin = bump_pointer_space_->Begin();
2330 uint8_t* black_allocs = black_allocations_begin_;
2331 DCHECK_LE(begin, black_allocs);
2332 size_t consumed_blocks_count = 0;
2333 size_t first_block_size;
2334 // Get the list of all blocks allocated in the bump-pointer space.
2335 std::vector<size_t>* block_sizes = bump_pointer_space_->GetBlockSizes(thread_running_gc_,
2336 &first_block_size);
2337 DCHECK_LE(first_block_size, (size_t)(black_allocs - begin));
2338 if (block_sizes != nullptr) {
2339 size_t black_page_idx = moving_first_objs_count_;
2340 uint8_t* block_end = begin + first_block_size;
2341 uint32_t remaining_chunk_size = 0;
2342 uint32_t first_chunk_size = 0;
2343 mirror::Object* first_obj = nullptr;
2344 for (size_t block_size : *block_sizes) {
2345 block_end += block_size;
2346 // Skip the blocks that are prior to the black allocations. These will be
2347 // merged with the main-block later.
2348 if (black_allocs >= block_end) {
2349 consumed_blocks_count++;
2350 continue;
2351 }
2352 mirror::Object* obj = reinterpret_cast<mirror::Object*>(black_allocs);
2353 bool set_mark_bit = remaining_chunk_size > 0;
2354 // We don't know how many objects are allocated in the current block. When we hit
2355 // a null assume it's the end. This works as every block is expected to
2356 // have objects allocated linearly using bump-pointer.
2357 // BumpPointerSpace::Walk() also works similarly.
2358 while (black_allocs < block_end
2359 && obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) {
2360 // Try to keep instructions which access class instance together to
2361 // avoid reloading the pointer from object.
2362 size_t obj_size = obj->SizeOf();
2363 bytes_scanned_ += obj_size;
2364 obj_size = RoundUp(obj_size, kAlignment);
2365 UpdateClassAfterObjectMap(obj);
2366 if (first_obj == nullptr) {
2367 first_obj = obj;
2368 }
2369 // We only need the mark-bitmap in the pages wherein a new TLAB starts in
2370 // the middle of the page.
2371 if (set_mark_bit) {
2372 moving_space_bitmap_->Set(obj);
2373 }
2374 // Handle objects which cross page boundary, including objects larger
2375 // than page size.
2376 if (remaining_chunk_size + obj_size >= kPageSize) {
2377 set_mark_bit = false;
2378 first_chunk_size += kPageSize - remaining_chunk_size;
2379 remaining_chunk_size += obj_size;
2380 // We should not store first-object and remaining_chunk_size if there were
2381 // unused bytes before this TLAB, in which case we must have already
2382 // stored the values (below).
2383 if (black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
2384 black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2385 first_objs_moving_space_[black_page_idx].Assign(first_obj);
2386 }
2387 black_page_idx++;
2388 remaining_chunk_size -= kPageSize;
2389 // Consume an object larger than page size.
2390 while (remaining_chunk_size >= kPageSize) {
2391 black_alloc_pages_first_chunk_size_[black_page_idx] = kPageSize;
2392 first_objs_moving_space_[black_page_idx].Assign(obj);
2393 black_page_idx++;
2394 remaining_chunk_size -= kPageSize;
2395 }
2396 first_obj = remaining_chunk_size > 0 ? obj : nullptr;
2397 first_chunk_size = remaining_chunk_size;
2398 } else {
2399 DCHECK_LE(first_chunk_size, remaining_chunk_size);
2400 first_chunk_size += obj_size;
2401 remaining_chunk_size += obj_size;
2402 }
2403 black_allocs += obj_size;
2404 obj = reinterpret_cast<mirror::Object*>(black_allocs);
2405 }
2406 DCHECK_LE(black_allocs, block_end);
2407 DCHECK_LT(remaining_chunk_size, kPageSize);
2408 // consume the unallocated portion of the block
2409 if (black_allocs < block_end) {
2410 // first-chunk of the current page ends here. Store it.
2411 if (first_chunk_size > 0 && black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
2412 black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2413 first_objs_moving_space_[black_page_idx].Assign(first_obj);
2414 }
2415 first_chunk_size = 0;
2416 first_obj = nullptr;
2417 size_t page_remaining = kPageSize - remaining_chunk_size;
2418 size_t block_remaining = block_end - black_allocs;
2419 if (page_remaining <= block_remaining) {
2420 block_remaining -= page_remaining;
2421 // current page and the subsequent empty pages in the block
2422 black_page_idx += 1 + block_remaining / kPageSize;
2423 remaining_chunk_size = block_remaining % kPageSize;
2424 } else {
2425 remaining_chunk_size += block_remaining;
2426 }
2427 black_allocs = block_end;
2428 }
2429 }
2430 if (black_page_idx < bump_pointer_space_->Size() / kPageSize) {
2431 // Store the leftover first-chunk, if any, and update page index.
2432 if (black_alloc_pages_first_chunk_size_[black_page_idx] > 0) {
2433 black_page_idx++;
2434 } else if (first_chunk_size > 0) {
2435 black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
2436 first_objs_moving_space_[black_page_idx].Assign(first_obj);
2437 black_page_idx++;
2438 }
2439 }
2440 black_page_count_ = black_page_idx - moving_first_objs_count_;
2441 delete block_sizes;
2442 }
2443 // Update bump-pointer space by consuming all the pre-black blocks into the
2444 // main one.
2445 bump_pointer_space_->SetBlockSizes(thread_running_gc_,
2446 post_compact_end_ - begin,
2447 consumed_blocks_count);
2448 }
2449
UpdateNonMovingSpaceBlackAllocations()2450 void MarkCompact::UpdateNonMovingSpaceBlackAllocations() {
2451 accounting::ObjectStack* stack = heap_->GetAllocationStack();
2452 const StackReference<mirror::Object>* limit = stack->End();
2453 uint8_t* const space_begin = non_moving_space_->Begin();
2454 for (StackReference<mirror::Object>* it = stack->Begin(); it != limit; ++it) {
2455 mirror::Object* obj = it->AsMirrorPtr();
2456 if (obj != nullptr && non_moving_space_bitmap_->HasAddress(obj)) {
2457 non_moving_space_bitmap_->Set(obj);
2458 // Clear so that we don't try to set the bit again in the next GC-cycle.
2459 it->Clear();
2460 size_t idx = (reinterpret_cast<uint8_t*>(obj) - space_begin) / kPageSize;
2461 uint8_t* page_begin = AlignDown(reinterpret_cast<uint8_t*>(obj), kPageSize);
2462 mirror::Object* first_obj = first_objs_non_moving_space_[idx].AsMirrorPtr();
2463 if (first_obj == nullptr
2464 || (obj < first_obj && reinterpret_cast<uint8_t*>(first_obj) > page_begin)) {
2465 first_objs_non_moving_space_[idx].Assign(obj);
2466 }
2467 mirror::Object* next_page_first_obj = first_objs_non_moving_space_[++idx].AsMirrorPtr();
2468 uint8_t* next_page_begin = page_begin + kPageSize;
2469 if (next_page_first_obj == nullptr
2470 || reinterpret_cast<uint8_t*>(next_page_first_obj) > next_page_begin) {
2471 size_t obj_size = RoundUp(obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
2472 uint8_t* obj_end = reinterpret_cast<uint8_t*>(obj) + obj_size;
2473 while (next_page_begin < obj_end) {
2474 first_objs_non_moving_space_[idx++].Assign(obj);
2475 next_page_begin += kPageSize;
2476 }
2477 }
2478 // update first_objs count in case we went past non_moving_first_objs_count_
2479 non_moving_first_objs_count_ = std::max(non_moving_first_objs_count_, idx);
2480 }
2481 }
2482 }
2483
2484 class MarkCompact::ImmuneSpaceUpdateObjVisitor {
2485 public:
ImmuneSpaceUpdateObjVisitor(MarkCompact * collector,bool visit_native_roots)2486 ImmuneSpaceUpdateObjVisitor(MarkCompact* collector, bool visit_native_roots)
2487 : collector_(collector), visit_native_roots_(visit_native_roots) {}
2488
operator ()(mirror::Object * obj) const2489 ALWAYS_INLINE void operator()(mirror::Object* obj) const REQUIRES(Locks::mutator_lock_) {
2490 RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(collector_,
2491 obj,
2492 /*begin_*/nullptr,
2493 /*end_*/nullptr);
2494 if (visit_native_roots_) {
2495 obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ true>(
2496 visitor, MemberOffset(0), MemberOffset(-1));
2497 } else {
2498 obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
2499 visitor, MemberOffset(0), MemberOffset(-1));
2500 }
2501 }
2502
Callback(mirror::Object * obj,void * arg)2503 static void Callback(mirror::Object* obj, void* arg) REQUIRES(Locks::mutator_lock_) {
2504 reinterpret_cast<ImmuneSpaceUpdateObjVisitor*>(arg)->operator()(obj);
2505 }
2506
2507 private:
2508 MarkCompact* const collector_;
2509 const bool visit_native_roots_;
2510 };
2511
2512 class MarkCompact::ClassLoaderRootsUpdater : public ClassLoaderVisitor {
2513 public:
ClassLoaderRootsUpdater(MarkCompact * collector)2514 explicit ClassLoaderRootsUpdater(MarkCompact* collector) : collector_(collector) {}
2515
Visit(ObjPtr<mirror::ClassLoader> class_loader)2516 void Visit(ObjPtr<mirror::ClassLoader> class_loader) override
2517 REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) {
2518 ClassTable* const class_table = class_loader->GetClassTable();
2519 if (class_table != nullptr) {
2520 class_table->VisitRoots(*this);
2521 }
2522 }
2523
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const2524 void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
2525 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
2526 if (!root->IsNull()) {
2527 VisitRoot(root);
2528 }
2529 }
2530
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const2531 void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
2532 REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
2533 collector_->VisitRoots(&root, 1, RootInfo(RootType::kRootVMInternal));
2534 }
2535
2536 private:
2537 MarkCompact* collector_;
2538 };
2539
2540 class MarkCompact::LinearAllocPageUpdater {
2541 public:
LinearAllocPageUpdater(MarkCompact * collector)2542 explicit LinearAllocPageUpdater(MarkCompact* collector) : collector_(collector) {}
2543
operator ()(uint8_t * page_begin,uint8_t * first_obj)2544 void operator()(uint8_t* page_begin, uint8_t* first_obj) ALWAYS_INLINE
2545 REQUIRES_SHARED(Locks::mutator_lock_) {
2546 DCHECK_ALIGNED(page_begin, kPageSize);
2547 uint8_t* page_end = page_begin + kPageSize;
2548 uint32_t obj_size;
2549 for (uint8_t* byte = first_obj; byte < page_end;) {
2550 TrackingHeader* header = reinterpret_cast<TrackingHeader*>(byte);
2551 obj_size = header->GetSize();
2552 if (UNLIKELY(obj_size == 0)) {
2553 // No more objects in this page to visit.
2554 last_page_touched_ = byte >= page_begin;
2555 return;
2556 }
2557 uint8_t* obj = byte + sizeof(TrackingHeader);
2558 uint8_t* obj_end = byte + obj_size;
2559 if (header->Is16Aligned()) {
2560 obj = AlignUp(obj, 16);
2561 }
2562 uint8_t* begin_boundary = std::max(obj, page_begin);
2563 uint8_t* end_boundary = std::min(obj_end, page_end);
2564 if (begin_boundary < end_boundary) {
2565 VisitObject(header->GetKind(), obj, begin_boundary, end_boundary);
2566 }
2567 if (ArenaAllocator::IsRunningOnMemoryTool()) {
2568 obj_size += ArenaAllocator::kMemoryToolRedZoneBytes;
2569 }
2570 byte += RoundUp(obj_size, LinearAlloc::kAlignment);
2571 }
2572 last_page_touched_ = true;
2573 }
2574
WasLastPageTouched() const2575 bool WasLastPageTouched() const { return last_page_touched_; }
2576
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const2577 void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
2578 ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
2579 if (!root->IsNull()) {
2580 VisitRoot(root);
2581 }
2582 }
2583
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const2584 void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
2585 ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
2586 mirror::Object* old_ref = root->AsMirrorPtr();
2587 DCHECK_NE(old_ref, nullptr);
2588 if (collector_->live_words_bitmap_->HasAddress(old_ref)) {
2589 mirror::Object* new_ref = old_ref;
2590 if (reinterpret_cast<uint8_t*>(old_ref) >= collector_->black_allocations_begin_) {
2591 new_ref = collector_->PostCompactBlackObjAddr(old_ref);
2592 } else if (collector_->live_words_bitmap_->Test(old_ref)) {
2593 DCHECK(collector_->moving_space_bitmap_->Test(old_ref)) << old_ref;
2594 new_ref = collector_->PostCompactOldObjAddr(old_ref);
2595 }
2596 if (old_ref != new_ref) {
2597 root->Assign(new_ref);
2598 }
2599 }
2600 }
2601
2602 private:
VisitObject(LinearAllocKind kind,void * obj,uint8_t * start_boundary,uint8_t * end_boundary) const2603 void VisitObject(LinearAllocKind kind,
2604 void* obj,
2605 uint8_t* start_boundary,
2606 uint8_t* end_boundary) const REQUIRES_SHARED(Locks::mutator_lock_) {
2607 switch (kind) {
2608 case LinearAllocKind::kNoGCRoots:
2609 break;
2610 case LinearAllocKind::kGCRootArray:
2611 {
2612 GcRoot<mirror::Object>* root = reinterpret_cast<GcRoot<mirror::Object>*>(start_boundary);
2613 GcRoot<mirror::Object>* last = reinterpret_cast<GcRoot<mirror::Object>*>(end_boundary);
2614 for (; root < last; root++) {
2615 VisitRootIfNonNull(root->AddressWithoutBarrier());
2616 }
2617 }
2618 break;
2619 case LinearAllocKind::kArtMethodArray:
2620 {
2621 LengthPrefixedArray<ArtMethod>* array = static_cast<LengthPrefixedArray<ArtMethod>*>(obj);
2622 // Old methods are clobbered in debug builds. Check size to confirm if the array
2623 // has any GC roots to visit. See ClassLinker::LinkMethodsHelper::ClobberOldMethods()
2624 if (array->size() > 0) {
2625 if (collector_->pointer_size_ == PointerSize::k64) {
2626 ArtMethod::VisitArrayRoots<PointerSize::k64>(
2627 *this, start_boundary, end_boundary, array);
2628 } else {
2629 DCHECK_EQ(collector_->pointer_size_, PointerSize::k32);
2630 ArtMethod::VisitArrayRoots<PointerSize::k32>(
2631 *this, start_boundary, end_boundary, array);
2632 }
2633 }
2634 }
2635 break;
2636 case LinearAllocKind::kArtMethod:
2637 ArtMethod::VisitRoots(*this, start_boundary, end_boundary, static_cast<ArtMethod*>(obj));
2638 break;
2639 case LinearAllocKind::kArtFieldArray:
2640 ArtField::VisitArrayRoots(*this,
2641 start_boundary,
2642 end_boundary,
2643 static_cast<LengthPrefixedArray<ArtField>*>(obj));
2644 break;
2645 case LinearAllocKind::kDexCacheArray:
2646 {
2647 mirror::DexCachePair<mirror::Object>* first =
2648 reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(start_boundary);
2649 mirror::DexCachePair<mirror::Object>* last =
2650 reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(end_boundary);
2651 mirror::DexCache::VisitDexCachePairRoots(*this, first, last);
2652 }
2653 }
2654 }
2655
2656 MarkCompact* const collector_;
2657 // Whether the last page was touched or not.
2658 bool last_page_touched_;
2659 };
2660
CompactionPause()2661 void MarkCompact::CompactionPause() {
2662 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
2663 Runtime* runtime = Runtime::Current();
2664 non_moving_space_bitmap_ = non_moving_space_->GetLiveBitmap();
2665 if (kIsDebugBuild) {
2666 DCHECK_EQ(thread_running_gc_, Thread::Current());
2667 stack_low_addr_ = thread_running_gc_->GetStackEnd();
2668 stack_high_addr_ =
2669 reinterpret_cast<char*>(stack_low_addr_) + thread_running_gc_->GetStackSize();
2670 }
2671 {
2672 TimingLogger::ScopedTiming t2("(Paused)UpdateCompactionDataStructures", GetTimings());
2673 ReaderMutexLock rmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
2674 // Refresh data-structures to catch-up on allocations that may have
2675 // happened since marking-phase pause.
2676 // There could be several TLABs that got allocated since marking pause. We
2677 // don't want to compact them and instead update the TLAB info in TLS and
2678 // let mutators continue to use the TLABs.
2679 // We need to set all the bits in live-words bitmap corresponding to allocated
2680 // objects. Also, we need to find the objects that are overlapping with
2681 // page-begin boundaries. Unlike objects allocated before
2682 // black_allocations_begin_, which can be identified via mark-bitmap, we can get
2683 // this info only via walking the space past black_allocations_begin_, which
2684 // involves fetching object size.
2685 // TODO: We can reduce the time spent on this in a pause by performing one
2686 // round of this concurrently prior to the pause.
2687 UpdateMovingSpaceBlackAllocations();
2688 // TODO: If we want to avoid this allocation in a pause then we will have to
2689 // allocate an array for the entire moving-space size, which can be made
2690 // part of info_map_.
2691 moving_pages_status_ = new Atomic<PageState>[moving_first_objs_count_ + black_page_count_];
2692 if (kIsDebugBuild) {
2693 size_t len = moving_first_objs_count_ + black_page_count_;
2694 for (size_t i = 0; i < len; i++) {
2695 CHECK_EQ(moving_pages_status_[i].load(std::memory_order_relaxed),
2696 PageState::kUnprocessed);
2697 }
2698 }
2699 // Iterate over the allocation_stack_, for every object in the non-moving
2700 // space:
2701 // 1. Mark the object in live bitmap
2702 // 2. Erase the object from allocation stack
2703 // 3. In the corresponding page, if the first-object vector needs updating
2704 // then do so.
2705 UpdateNonMovingSpaceBlackAllocations();
2706
2707 // This store is visible to mutator (or uffd worker threads) as the mutator
2708 // lock's unlock guarantees that.
2709 compacting_ = true;
2710 // Start updating roots and system weaks now.
2711 heap_->GetReferenceProcessor()->UpdateRoots(this);
2712 }
2713 {
2714 TimingLogger::ScopedTiming t2("(Paused)UpdateClassLoaderRoots", GetTimings());
2715 ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
2716 {
2717 ClassLoaderRootsUpdater updater(this);
2718 runtime->GetClassLinker()->VisitClassLoaders(&updater);
2719 }
2720 }
2721
2722 bool has_zygote_space = heap_->HasZygoteSpace();
2723 // TODO: Find out why it's not sufficient to visit native roots of immune
2724 // spaces, and why all the pre-zygote fork arenas have to be linearly updated.
2725 // Is it possible that some native root starts getting pointed to by some object
2726 // in moving space after fork? Or are we missing a write-barrier somewhere
2727 // when a native root is updated?
2728 GcVisitedArenaPool* arena_pool =
2729 static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
2730 if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) {
2731 // Besides fallback-mode, visit linear-alloc space in the pause for zygote
2732 // processes prior to first fork (that's when zygote space gets created).
2733 if (kIsDebugBuild && IsValidFd(uffd_)) {
2734 // All arenas allocated so far are expected to be pre-zygote fork.
2735 arena_pool->ForEachAllocatedArena(
2736 [](const TrackedArena& arena)
2737 REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); });
2738 }
2739 LinearAllocPageUpdater updater(this);
2740 arena_pool->VisitRoots(updater);
2741 } else {
2742 // Clear the flag as we care about this only if arenas are freed during
2743 // concurrent compaction.
2744 arena_pool->ClearArenasFreed();
2745 arena_pool->ForEachAllocatedArena(
2746 [this](const TrackedArena& arena) REQUIRES_SHARED(Locks::mutator_lock_) {
2747 // The pre-zygote fork arenas are not visited concurrently in the
2748 // zygote children processes. The native roots of the dirty objects
2749 // are visited during immune space visit below.
2750 if (!arena.IsPreZygoteForkArena()) {
2751 uint8_t* last_byte = arena.GetLastUsedByte();
2752 CHECK(linear_alloc_arenas_.insert({&arena, last_byte}).second);
2753 } else {
2754 LinearAllocPageUpdater updater(this);
2755 arena.VisitRoots(updater);
2756 }
2757 });
2758 }
2759
2760 SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ true);
2761
2762 {
2763 TimingLogger::ScopedTiming t2("(Paused)UpdateConcurrentRoots", GetTimings());
2764 runtime->VisitConcurrentRoots(this, kVisitRootFlagAllRoots);
2765 }
2766 {
2767 // TODO: don't visit the transaction roots if it's not active.
2768 TimingLogger::ScopedTiming t2("(Paused)UpdateNonThreadRoots", GetTimings());
2769 runtime->VisitNonThreadRoots(this);
2770 }
2771
2772 {
2773 // TODO: Immune space updation has to happen either before or after
2774 // remapping pre-compact pages to from-space. And depending on when it's
2775 // done, we have to invoke VisitRefsForCompaction() with or without
2776 // read-barrier.
2777 TimingLogger::ScopedTiming t2("(Paused)UpdateImmuneSpaces", GetTimings());
2778 accounting::CardTable* const card_table = heap_->GetCardTable();
2779 for (auto& space : immune_spaces_.GetSpaces()) {
2780 DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
2781 accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
2782 accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
2783 // Having zygote-space indicates that the first zygote fork has taken
2784 // place and that the classes/dex-caches in immune-spaces may have allocations
2785 // (ArtMethod/ArtField arrays, dex-cache array, etc.) in the
2786 // non-userfaultfd visited private-anonymous mappings. Visit them here.
2787 ImmuneSpaceUpdateObjVisitor visitor(this, /*visit_native_roots=*/false);
2788 if (table != nullptr) {
2789 table->ProcessCards();
2790 table->VisitObjects(ImmuneSpaceUpdateObjVisitor::Callback, &visitor);
2791 } else {
2792 WriterMutexLock wmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
2793 card_table->Scan<false>(
2794 live_bitmap,
2795 space->Begin(),
2796 space->Limit(),
2797 visitor,
2798 accounting::CardTable::kCardDirty - 1);
2799 }
2800 }
2801 }
2802
2803 if (use_uffd_sigbus_) {
2804 // Release order wrt to mutator threads' SIGBUS handler load.
2805 sigbus_in_progress_count_.store(0, std::memory_order_release);
2806 }
2807 KernelPreparation();
2808 UpdateNonMovingSpace();
2809 // fallback mode
2810 if (uffd_ == kFallbackMode) {
2811 CompactMovingSpace<kFallbackMode>(nullptr);
2812
2813 int32_t freed_bytes = black_objs_slide_diff_;
2814 bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
2815 RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
2816 } else {
2817 DCHECK_EQ(compaction_in_progress_count_.load(std::memory_order_relaxed), 0u);
2818 DCHECK_EQ(compaction_buffer_counter_.load(std::memory_order_relaxed), 1);
2819 if (!use_uffd_sigbus_) {
2820 // We must start worker threads before resuming mutators to avoid deadlocks.
2821 heap_->GetThreadPool()->StartWorkers(thread_running_gc_);
2822 }
2823 }
2824 stack_low_addr_ = nullptr;
2825 }
2826
KernelPrepareRangeForUffd(uint8_t * to_addr,uint8_t * from_addr,size_t map_size,int fd,uint8_t * shadow_addr)2827 void MarkCompact::KernelPrepareRangeForUffd(uint8_t* to_addr,
2828 uint8_t* from_addr,
2829 size_t map_size,
2830 int fd,
2831 uint8_t* shadow_addr) {
2832 int mremap_flags = MREMAP_MAYMOVE | MREMAP_FIXED;
2833 if (gHaveMremapDontunmap) {
2834 mremap_flags |= MREMAP_DONTUNMAP;
2835 }
2836
2837 void* ret = mremap(to_addr, map_size, map_size, mremap_flags, from_addr);
2838 CHECK_EQ(ret, static_cast<void*>(from_addr))
2839 << "mremap to move pages failed: " << strerror(errno)
2840 << ". space-addr=" << reinterpret_cast<void*>(to_addr) << " size=" << PrettySize(map_size);
2841
2842 if (shadow_addr != nullptr) {
2843 DCHECK_EQ(fd, kFdUnused);
2844 DCHECK(gHaveMremapDontunmap);
2845 ret = mremap(shadow_addr, map_size, map_size, mremap_flags, to_addr);
2846 CHECK_EQ(ret, static_cast<void*>(to_addr))
2847 << "mremap from shadow to to-space map failed: " << strerror(errno);
2848 } else if (!gHaveMremapDontunmap || fd > kFdUnused) {
2849 // Without MREMAP_DONTUNMAP the source mapping is unmapped by mremap. So mmap
2850 // the moving space again.
2851 int mmap_flags = MAP_FIXED;
2852 if (fd == kFdUnused) {
2853 // Use MAP_FIXED_NOREPLACE so that if someone else reserves 'to_addr'
2854 // mapping in meantime, which can happen when MREMAP_DONTUNMAP isn't
2855 // available, to avoid unmapping someone else' mapping and then causing
2856 // crashes elsewhere.
2857 mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
2858 // On some platforms MAP_ANONYMOUS expects fd to be -1.
2859 fd = -1;
2860 } else if (IsValidFd(fd)) {
2861 mmap_flags |= MAP_SHARED;
2862 } else {
2863 DCHECK_EQ(fd, kFdSharedAnon);
2864 mmap_flags |= MAP_SHARED | MAP_ANONYMOUS;
2865 }
2866 ret = mmap(to_addr, map_size, PROT_READ | PROT_WRITE, mmap_flags, fd, 0);
2867 CHECK_EQ(ret, static_cast<void*>(to_addr))
2868 << "mmap for moving space failed: " << strerror(errno);
2869 }
2870 }
2871
KernelPreparation()2872 void MarkCompact::KernelPreparation() {
2873 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
2874 uint8_t* moving_space_begin = bump_pointer_space_->Begin();
2875 size_t moving_space_size = bump_pointer_space_->Capacity();
2876 int mode = kCopyMode;
2877 size_t moving_space_register_sz;
2878 if (minor_fault_initialized_) {
2879 moving_space_register_sz = (moving_first_objs_count_ + black_page_count_) * kPageSize;
2880 if (shadow_to_space_map_.IsValid()) {
2881 size_t shadow_size = shadow_to_space_map_.Size();
2882 void* addr = shadow_to_space_map_.Begin();
2883 if (shadow_size < moving_space_register_sz) {
2884 addr = mremap(addr,
2885 shadow_size,
2886 moving_space_register_sz,
2887 // Don't allow moving with obj-ptr poisoning as the
2888 // mapping needs to be in <4GB address space.
2889 kObjPtrPoisoning ? 0 : MREMAP_MAYMOVE,
2890 /*new_address=*/nullptr);
2891 if (addr != MAP_FAILED) {
2892 // Succeeded in expanding the mapping. Update the MemMap entry for shadow map.
2893 MemMap temp = MemMap::MapPlaceholder(
2894 "moving-space-shadow", static_cast<uint8_t*>(addr), moving_space_register_sz);
2895 std::swap(shadow_to_space_map_, temp);
2896 }
2897 }
2898 if (addr != MAP_FAILED) {
2899 mode = kMinorFaultMode;
2900 } else {
2901 // We are not going to use shadow map. So protect it to catch any
2902 // potential bugs.
2903 DCHECK_EQ(mprotect(shadow_to_space_map_.Begin(), shadow_to_space_map_.Size(), PROT_NONE), 0)
2904 << "mprotect failed: " << strerror(errno);
2905 }
2906 }
2907 } else {
2908 moving_space_register_sz = moving_space_size;
2909 }
2910
2911 bool map_shared =
2912 minor_fault_initialized_ || (!Runtime::Current()->IsZygote() && uffd_minor_fault_supported_);
2913 uint8_t* shadow_addr = nullptr;
2914 if (moving_to_space_fd_ == kFdUnused && map_shared) {
2915 DCHECK(gHaveMremapDontunmap);
2916 DCHECK(shadow_to_space_map_.IsValid());
2917 DCHECK_EQ(shadow_to_space_map_.Size(), moving_space_size);
2918 shadow_addr = shadow_to_space_map_.Begin();
2919 }
2920
2921 KernelPrepareRangeForUffd(moving_space_begin,
2922 from_space_begin_,
2923 moving_space_size,
2924 moving_to_space_fd_,
2925 shadow_addr);
2926
2927 if (IsValidFd(uffd_)) {
2928 // Register the moving space with userfaultfd.
2929 RegisterUffd(moving_space_begin, moving_space_register_sz, mode);
2930 // Prepare linear-alloc for concurrent compaction.
2931 for (auto& data : linear_alloc_spaces_data_) {
2932 bool mmap_again = map_shared && !data.already_shared_;
2933 DCHECK_EQ(static_cast<ssize_t>(data.shadow_.Size()), data.end_ - data.begin_);
2934 // There could be threads running in suspended mode when the compaction
2935 // pause is being executed. In order to make the userfaultfd setup atomic,
2936 // the registration has to be done *before* moving the pages to shadow map.
2937 if (!mmap_again) {
2938 // See the comment in the constructor as to why it's conditionally done.
2939 RegisterUffd(data.begin_,
2940 data.shadow_.Size(),
2941 minor_fault_initialized_ ? kMinorFaultMode : kCopyMode);
2942 }
2943 KernelPrepareRangeForUffd(data.begin_,
2944 data.shadow_.Begin(),
2945 data.shadow_.Size(),
2946 mmap_again ? kFdSharedAnon : kFdUnused);
2947 if (mmap_again) {
2948 data.already_shared_ = true;
2949 RegisterUffd(data.begin_,
2950 data.shadow_.Size(),
2951 minor_fault_initialized_ ? kMinorFaultMode : kCopyMode);
2952 }
2953 }
2954 }
2955 if (map_shared) {
2956 // Start mapping linear-alloc MAP_SHARED only after the compaction pause of
2957 // the first GC in non-zygote processes. This is the GC which sets up
2958 // mappings for using minor-fault in future. Up to this point we run
2959 // userfaultfd in copy-mode, which requires the mappings (of linear-alloc)
2960 // to be MAP_PRIVATE.
2961 map_linear_alloc_shared_ = true;
2962 }
2963 }
2964
2965 template <int kMode>
ConcurrentCompaction(uint8_t * buf)2966 void MarkCompact::ConcurrentCompaction(uint8_t* buf) {
2967 DCHECK_NE(kMode, kFallbackMode);
2968 DCHECK(kMode != kCopyMode || buf != nullptr);
2969 size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_;
2970 while (true) {
2971 struct uffd_msg msg;
2972 ssize_t nread = read(uffd_, &msg, sizeof(msg));
2973 CHECK_GT(nread, 0);
2974 CHECK_EQ(msg.event, UFFD_EVENT_PAGEFAULT);
2975 DCHECK_EQ(nread, static_cast<ssize_t>(sizeof(msg)));
2976 uint8_t* fault_addr = reinterpret_cast<uint8_t*>(msg.arg.pagefault.address);
2977 if (fault_addr == conc_compaction_termination_page_) {
2978 // The counter doesn't need to be updated atomically as only one thread
2979 // would wake up against the gc-thread's load to this fault_addr. In fact,
2980 // the other threads would wake up serially because every exiting thread
2981 // will wake up gc-thread, which would retry load but again would find the
2982 // page missing. Also, the value will be flushed to caches due to the ioctl
2983 // syscall below.
2984 uint8_t ret = thread_pool_counter_--;
2985 // If 'gKernelHasFaultRetry == true' then only the last thread should map the
2986 // zeropage so that the gc-thread can proceed. Otherwise, each thread does
2987 // it and the gc-thread will repeat this fault until thread_pool_counter == 0.
2988 if (!gKernelHasFaultRetry || ret == 1) {
2989 ZeropageIoctl(fault_addr, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
2990 } else {
2991 struct uffdio_range uffd_range;
2992 uffd_range.start = msg.arg.pagefault.address;
2993 uffd_range.len = kPageSize;
2994 CHECK_EQ(ioctl(uffd_, UFFDIO_WAKE, &uffd_range), 0)
2995 << "ioctl_userfaultfd: wake failed for concurrent-compaction termination page: "
2996 << strerror(errno);
2997 }
2998 break;
2999 }
3000 uint8_t* fault_page = AlignDown(fault_addr, kPageSize);
3001 if (bump_pointer_space_->HasAddress(reinterpret_cast<mirror::Object*>(fault_addr))) {
3002 ConcurrentlyProcessMovingPage<kMode>(fault_page, buf, nr_moving_space_used_pages);
3003 } else if (minor_fault_initialized_) {
3004 ConcurrentlyProcessLinearAllocPage<kMinorFaultMode>(
3005 fault_page, (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0);
3006 } else {
3007 ConcurrentlyProcessLinearAllocPage<kCopyMode>(
3008 fault_page, (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0);
3009 }
3010 }
3011 }
3012
SigbusHandler(siginfo_t * info)3013 bool MarkCompact::SigbusHandler(siginfo_t* info) {
3014 class ScopedInProgressCount {
3015 public:
3016 explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) {
3017 // Increment the count only if compaction is not done yet.
3018 SigbusCounterType prev =
3019 collector_->sigbus_in_progress_count_.load(std::memory_order_relaxed);
3020 while ((prev & kSigbusCounterCompactionDoneMask) == 0) {
3021 if (collector_->sigbus_in_progress_count_.compare_exchange_strong(
3022 prev, prev + 1, std::memory_order_acquire)) {
3023 DCHECK_LT(prev, kSigbusCounterCompactionDoneMask - 1);
3024 compaction_done_ = false;
3025 return;
3026 }
3027 }
3028 compaction_done_ = true;
3029 }
3030
3031 bool IsCompactionDone() const {
3032 return compaction_done_;
3033 }
3034
3035 ~ScopedInProgressCount() {
3036 if (!IsCompactionDone()) {
3037 collector_->sigbus_in_progress_count_.fetch_sub(1, std::memory_order_release);
3038 }
3039 }
3040
3041 private:
3042 MarkCompact* const collector_;
3043 bool compaction_done_;
3044 };
3045
3046 DCHECK(use_uffd_sigbus_);
3047 if (info->si_code != BUS_ADRERR) {
3048 // Userfaultfd raises SIGBUS with BUS_ADRERR. All other causes can't be
3049 // handled here.
3050 return false;
3051 }
3052
3053 ScopedInProgressCount spc(this);
3054 uint8_t* fault_page = AlignDown(reinterpret_cast<uint8_t*>(info->si_addr), kPageSize);
3055 if (!spc.IsCompactionDone()) {
3056 if (bump_pointer_space_->HasAddress(reinterpret_cast<mirror::Object*>(fault_page))) {
3057 Thread* self = Thread::Current();
3058 Locks::mutator_lock_->AssertSharedHeld(self);
3059 size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_;
3060 if (minor_fault_initialized_) {
3061 ConcurrentlyProcessMovingPage<kMinorFaultMode>(
3062 fault_page, nullptr, nr_moving_space_used_pages);
3063 } else {
3064 ConcurrentlyProcessMovingPage<kCopyMode>(
3065 fault_page, self->GetThreadLocalGcBuffer(), nr_moving_space_used_pages);
3066 }
3067 return true;
3068 } else {
3069 // Find the linear-alloc space containing fault-addr
3070 for (auto& data : linear_alloc_spaces_data_) {
3071 if (data.begin_ <= fault_page && data.end_ > fault_page) {
3072 if (minor_fault_initialized_) {
3073 ConcurrentlyProcessLinearAllocPage<kMinorFaultMode>(fault_page, false);
3074 } else {
3075 ConcurrentlyProcessLinearAllocPage<kCopyMode>(fault_page, false);
3076 }
3077 return true;
3078 }
3079 }
3080 // Fault address doesn't belong to either moving-space or linear-alloc.
3081 return false;
3082 }
3083 } else {
3084 // We may spuriously get SIGBUS fault, which was initiated before the
3085 // compaction was finished, but ends up here. In that case, if the fault
3086 // address is valid then consider it handled.
3087 return bump_pointer_space_->HasAddress(reinterpret_cast<mirror::Object*>(fault_page)) ||
3088 linear_alloc_spaces_data_.end() !=
3089 std::find_if(linear_alloc_spaces_data_.begin(),
3090 linear_alloc_spaces_data_.end(),
3091 [fault_page](const LinearAllocSpaceData& data) {
3092 return data.begin_ <= fault_page && data.end_ > fault_page;
3093 });
3094 }
3095 }
3096
BackOff(uint32_t i)3097 static void BackOff(uint32_t i) {
3098 static constexpr uint32_t kYieldMax = 5;
3099 // TODO: Consider adding x86 PAUSE and/or ARM YIELD here.
3100 if (i <= kYieldMax) {
3101 sched_yield();
3102 } else {
3103 // nanosleep is not in the async-signal-safe list, but bionic implements it
3104 // with a pure system call, so it should be fine.
3105 NanoSleep(10000ull * (i - kYieldMax));
3106 }
3107 }
3108
3109 template <int kMode>
ConcurrentlyProcessMovingPage(uint8_t * fault_page,uint8_t * buf,size_t nr_moving_space_used_pages)3110 void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page,
3111 uint8_t* buf,
3112 size_t nr_moving_space_used_pages) {
3113 class ScopedInProgressCount {
3114 public:
3115 explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) {
3116 collector_->compaction_in_progress_count_.fetch_add(1, std::memory_order_relaxed);
3117 }
3118
3119 ~ScopedInProgressCount() {
3120 collector_->compaction_in_progress_count_.fetch_sub(1, std::memory_order_relaxed);
3121 }
3122
3123 private:
3124 MarkCompact* collector_;
3125 };
3126
3127 uint8_t* unused_space_begin =
3128 bump_pointer_space_->Begin() + nr_moving_space_used_pages * kPageSize;
3129 DCHECK(IsAligned<kPageSize>(unused_space_begin));
3130 DCHECK(kMode == kCopyMode || fault_page < unused_space_begin);
3131 if (kMode == kCopyMode && fault_page >= unused_space_begin) {
3132 // There is a race which allows more than one thread to install a
3133 // zero-page. But we can tolerate that. So absorb the EEXIST returned by
3134 // the ioctl and move on.
3135 ZeropageIoctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/true);
3136 return;
3137 }
3138 size_t page_idx = (fault_page - bump_pointer_space_->Begin()) / kPageSize;
3139 mirror::Object* first_obj = first_objs_moving_space_[page_idx].AsMirrorPtr();
3140 if (first_obj == nullptr) {
3141 // We should never have a case where two workers are trying to install a
3142 // zeropage in this range as we synchronize using moving_pages_status_[page_idx].
3143 PageState expected_state = PageState::kUnprocessed;
3144 if (moving_pages_status_[page_idx].compare_exchange_strong(
3145 expected_state, PageState::kProcessedAndMapping, std::memory_order_relaxed)) {
3146 // Note: ioctl acts as an acquire fence.
3147 ZeropageIoctl(fault_page, /*tolerate_eexist=*/false, /*tolerate_enoent=*/true);
3148 } else {
3149 DCHECK_EQ(expected_state, PageState::kProcessedAndMapping);
3150 }
3151 return;
3152 }
3153
3154 PageState state = moving_pages_status_[page_idx].load(
3155 use_uffd_sigbus_ ? std::memory_order_acquire : std::memory_order_relaxed);
3156 uint32_t backoff_count = 0;
3157 while (true) {
3158 switch (state) {
3159 case PageState::kUnprocessed: {
3160 // The increment to the in-progress counter must be done before updating
3161 // the page's state. Otherwise, we will end up leaving a window wherein
3162 // the GC-thread could observe that no worker is working on compaction
3163 // and could end up unregistering the moving space from userfaultfd.
3164 ScopedInProgressCount spc(this);
3165 // Acquire order to ensure we don't start writing to shadow map, which is
3166 // shared, before the CAS is successful. Release order to ensure that the
3167 // increment to moving_compactions_in_progress above is not re-ordered
3168 // after the CAS.
3169 if (moving_pages_status_[page_idx].compare_exchange_strong(
3170 state, PageState::kMutatorProcessing, std::memory_order_acq_rel)) {
3171 if (kMode == kMinorFaultMode) {
3172 DCHECK_EQ(buf, nullptr);
3173 buf = shadow_to_space_map_.Begin() + page_idx * kPageSize;
3174 } else if (UNLIKELY(buf == nullptr)) {
3175 DCHECK_EQ(kMode, kCopyMode);
3176 uint16_t idx = compaction_buffer_counter_.fetch_add(1, std::memory_order_relaxed);
3177 // The buffer-map is one page bigger as the first buffer is used by GC-thread.
3178 CHECK_LE(idx, kMutatorCompactionBufferCount);
3179 buf = compaction_buffers_map_.Begin() + idx * kPageSize;
3180 DCHECK(compaction_buffers_map_.HasAddress(buf));
3181 Thread::Current()->SetThreadLocalGcBuffer(buf);
3182 }
3183
3184 if (fault_page < post_compact_end_) {
3185 // The page has to be compacted.
3186 CompactPage(
3187 first_obj, pre_compact_offset_moving_space_[page_idx], buf, kMode == kCopyMode);
3188 } else {
3189 DCHECK_NE(first_obj, nullptr);
3190 DCHECK_GT(pre_compact_offset_moving_space_[page_idx], 0u);
3191 uint8_t* pre_compact_page = black_allocations_begin_ + (fault_page - post_compact_end_);
3192 DCHECK(IsAligned<kPageSize>(pre_compact_page));
3193 SlideBlackPage(first_obj, page_idx, pre_compact_page, buf, kMode == kCopyMode);
3194 }
3195 // Nobody else would simultaneously modify this page's state so an
3196 // atomic store is sufficient. Use 'release' order to guarantee that
3197 // loads/stores to the page are finished before this store.
3198 moving_pages_status_[page_idx].store(PageState::kProcessedAndMapping,
3199 std::memory_order_release);
3200 if (kMode == kCopyMode) {
3201 CopyIoctl(fault_page, buf);
3202 if (use_uffd_sigbus_) {
3203 // Store is sufficient as no other thread modifies the status at this stage.
3204 moving_pages_status_[page_idx].store(PageState::kProcessedAndMapped,
3205 std::memory_order_release);
3206 }
3207 return;
3208 } else {
3209 break;
3210 }
3211 }
3212 }
3213 continue;
3214 case PageState::kProcessing:
3215 DCHECK_EQ(kMode, kMinorFaultMode);
3216 if (moving_pages_status_[page_idx].compare_exchange_strong(
3217 state, PageState::kProcessingAndMapping, std::memory_order_relaxed) &&
3218 !use_uffd_sigbus_) {
3219 // Somebody else took or will take care of finishing the compaction and
3220 // then mapping the page.
3221 return;
3222 }
3223 continue;
3224 case PageState::kProcessed:
3225 // The page is processed but not mapped. We should map it.
3226 break;
3227 case PageState::kProcessingAndMapping:
3228 case PageState::kMutatorProcessing:
3229 case PageState::kProcessedAndMapping:
3230 if (use_uffd_sigbus_) {
3231 // Wait for the page to be mapped before returning.
3232 BackOff(backoff_count++);
3233 state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
3234 continue;
3235 }
3236 return;
3237 case PageState::kProcessedAndMapped:
3238 // Somebody else took care of the page.
3239 return;
3240 }
3241 break;
3242 }
3243
3244 DCHECK_EQ(kMode, kMinorFaultMode);
3245 if (state == PageState::kUnprocessed) {
3246 MapProcessedPages</*kFirstPageMapping=*/true>(
3247 fault_page, moving_pages_status_, page_idx, nr_moving_space_used_pages);
3248 } else {
3249 DCHECK_EQ(state, PageState::kProcessed);
3250 MapProcessedPages</*kFirstPageMapping=*/false>(
3251 fault_page, moving_pages_status_, page_idx, nr_moving_space_used_pages);
3252 }
3253 }
3254
MapUpdatedLinearAllocPage(uint8_t * page,uint8_t * shadow_page,Atomic<PageState> & state,bool page_touched)3255 void MarkCompact::MapUpdatedLinearAllocPage(uint8_t* page,
3256 uint8_t* shadow_page,
3257 Atomic<PageState>& state,
3258 bool page_touched) {
3259 DCHECK(!minor_fault_initialized_);
3260 if (page_touched) {
3261 CopyIoctl(page, shadow_page);
3262 } else {
3263 // If the page wasn't touched, then it means it is empty and
3264 // is most likely not present on the shadow-side. Furthermore,
3265 // since the shadow is also userfaultfd registered doing copy
3266 // ioctl fail as the copy-from-user in the kernel will cause
3267 // userfault. Instead, just map a zeropage, which is not only
3268 // correct but also efficient as it avoids unnecessary memcpy
3269 // in the kernel.
3270 ZeropageIoctl(page, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
3271 }
3272 if (use_uffd_sigbus_) {
3273 // Store is sufficient as no other thread can modify the
3274 // status of this page at this point.
3275 state.store(PageState::kProcessedAndMapped, std::memory_order_release);
3276 }
3277 }
3278
3279 template <int kMode>
ConcurrentlyProcessLinearAllocPage(uint8_t * fault_page,bool is_minor_fault)3280 void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool is_minor_fault) {
3281 DCHECK(!is_minor_fault || kMode == kMinorFaultMode);
3282 auto arena_iter = linear_alloc_arenas_.end();
3283 {
3284 TrackedArena temp_arena(fault_page);
3285 arena_iter = linear_alloc_arenas_.upper_bound(&temp_arena);
3286 arena_iter = arena_iter != linear_alloc_arenas_.begin() ? std::prev(arena_iter)
3287 : linear_alloc_arenas_.end();
3288 }
3289 if (arena_iter == linear_alloc_arenas_.end() || arena_iter->second <= fault_page) {
3290 // Fault page isn't in any of the arenas that existed before we started
3291 // compaction. So map zeropage and return.
3292 ZeropageIoctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/false);
3293 } else {
3294 // fault_page should always belong to some arena.
3295 DCHECK(arena_iter != linear_alloc_arenas_.end())
3296 << "fault_page:" << static_cast<void*>(fault_page) << "is_minor_fault:" << is_minor_fault;
3297 // Find the linear-alloc space containing fault-page
3298 LinearAllocSpaceData* space_data = nullptr;
3299 for (auto& data : linear_alloc_spaces_data_) {
3300 if (data.begin_ <= fault_page && fault_page < data.end_) {
3301 space_data = &data;
3302 break;
3303 }
3304 }
3305 DCHECK_NE(space_data, nullptr);
3306 ptrdiff_t diff = space_data->shadow_.Begin() - space_data->begin_;
3307 size_t page_idx = (fault_page - space_data->begin_) / kPageSize;
3308 Atomic<PageState>* state_arr =
3309 reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin());
3310 PageState state = state_arr[page_idx].load(use_uffd_sigbus_ ? std::memory_order_acquire :
3311 std::memory_order_relaxed);
3312 uint32_t backoff_count = 0;
3313 while (true) {
3314 switch (state) {
3315 case PageState::kUnprocessed: {
3316 // Acquire order to ensure we don't start writing to shadow map, which is
3317 // shared, before the CAS is successful.
3318 if (state_arr[page_idx].compare_exchange_strong(
3319 state, PageState::kProcessingAndMapping, std::memory_order_acquire)) {
3320 if (kMode == kCopyMode || is_minor_fault) {
3321 uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page);
3322 DCHECK_NE(first_obj, nullptr);
3323 LinearAllocPageUpdater updater(this);
3324 updater(fault_page + diff, first_obj + diff);
3325 if (kMode == kCopyMode) {
3326 MapUpdatedLinearAllocPage(fault_page,
3327 fault_page + diff,
3328 state_arr[page_idx],
3329 updater.WasLastPageTouched());
3330 return;
3331 }
3332 } else {
3333 // Don't touch the page in this case (there is no reason to do so
3334 // anyways) as it would mean reading from first_obj, which could be on
3335 // another missing page and hence may cause this thread to block, leading
3336 // to deadlocks.
3337 // Force read the page if it is missing so that a zeropage gets mapped on
3338 // the shadow map and then CONTINUE ioctl will map it on linear-alloc.
3339 ForceRead(fault_page + diff);
3340 }
3341 MapProcessedPages</*kFirstPageMapping=*/true>(
3342 fault_page, state_arr, page_idx, space_data->page_status_map_.Size());
3343 return;
3344 }
3345 }
3346 continue;
3347 case PageState::kProcessing:
3348 DCHECK_EQ(kMode, kMinorFaultMode);
3349 if (state_arr[page_idx].compare_exchange_strong(
3350 state, PageState::kProcessingAndMapping, std::memory_order_relaxed) &&
3351 !use_uffd_sigbus_) {
3352 // Somebody else took or will take care of finishing the updates and
3353 // then mapping the page.
3354 return;
3355 }
3356 continue;
3357 case PageState::kProcessed:
3358 // The page is processed but not mapped. We should map it.
3359 break;
3360 case PageState::kMutatorProcessing:
3361 UNREACHABLE();
3362 case PageState::kProcessingAndMapping:
3363 case PageState::kProcessedAndMapping:
3364 if (use_uffd_sigbus_) {
3365 // Wait for the page to be mapped before returning.
3366 BackOff(backoff_count++);
3367 state = state_arr[page_idx].load(std::memory_order_acquire);
3368 continue;
3369 }
3370 return;
3371 case PageState::kProcessedAndMapped:
3372 // Somebody else took care of the page.
3373 return;
3374 }
3375 break;
3376 }
3377
3378 DCHECK_EQ(kMode, kMinorFaultMode);
3379 DCHECK_EQ(state, PageState::kProcessed);
3380 if (!is_minor_fault) {
3381 // Force read the page if it is missing so that a zeropage gets mapped on
3382 // the shadow map and then CONTINUE ioctl will map it on linear-alloc.
3383 ForceRead(fault_page + diff);
3384 }
3385 MapProcessedPages</*kFirstPageMapping=*/false>(
3386 fault_page, state_arr, page_idx, space_data->page_status_map_.Size());
3387 }
3388 }
3389
ProcessLinearAlloc()3390 void MarkCompact::ProcessLinearAlloc() {
3391 GcVisitedArenaPool* arena_pool =
3392 static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
3393 for (auto& pair : linear_alloc_arenas_) {
3394 const TrackedArena* arena = pair.first;
3395 size_t arena_size;
3396 uint8_t* arena_begin;
3397 ptrdiff_t diff;
3398 bool others_processing;
3399 {
3400 // Acquire arena-pool's lock so that the arena being worked cannot be
3401 // deallocated at the same time.
3402 std::lock_guard<std::mutex> lock(arena_pool->GetLock());
3403 // If any arenas were freed since compaction pause then skip them from
3404 // visiting.
3405 if (arena_pool->AreArenasFreed() && !arena_pool->FindAllocatedArena(arena)) {
3406 continue;
3407 }
3408 uint8_t* last_byte = pair.second;
3409 DCHECK_ALIGNED(last_byte, kPageSize);
3410 others_processing = false;
3411 arena_begin = arena->Begin();
3412 arena_size = arena->Size();
3413 // Find the linear-alloc space containing the arena
3414 LinearAllocSpaceData* space_data = nullptr;
3415 for (auto& data : linear_alloc_spaces_data_) {
3416 if (data.begin_ <= arena_begin && arena_begin < data.end_) {
3417 space_data = &data;
3418 break;
3419 }
3420 }
3421 DCHECK_NE(space_data, nullptr);
3422 diff = space_data->shadow_.Begin() - space_data->begin_;
3423 auto visitor = [space_data, last_byte, diff, this, &others_processing](
3424 uint8_t* page_begin,
3425 uint8_t* first_obj) REQUIRES_SHARED(Locks::mutator_lock_) {
3426 // No need to process pages past last_byte as they already have updated
3427 // gc-roots, if any.
3428 if (page_begin >= last_byte) {
3429 return;
3430 }
3431 LinearAllocPageUpdater updater(this);
3432 size_t page_idx = (page_begin - space_data->begin_) / kPageSize;
3433 DCHECK_LT(page_idx, space_data->page_status_map_.Size());
3434 Atomic<PageState>* state_arr =
3435 reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin());
3436 PageState expected_state = PageState::kUnprocessed;
3437 PageState desired_state =
3438 minor_fault_initialized_ ? PageState::kProcessing : PageState::kProcessingAndMapping;
3439 // Acquire order to ensure that we don't start accessing the shadow page,
3440 // which is shared with other threads, prior to CAS. Also, for same
3441 // reason, we used 'release' order for changing the state to 'processed'.
3442 if (state_arr[page_idx].compare_exchange_strong(
3443 expected_state, desired_state, std::memory_order_acquire)) {
3444 updater(page_begin + diff, first_obj + diff);
3445 expected_state = PageState::kProcessing;
3446 if (!minor_fault_initialized_) {
3447 MapUpdatedLinearAllocPage(
3448 page_begin, page_begin + diff, state_arr[page_idx], updater.WasLastPageTouched());
3449 } else if (!state_arr[page_idx].compare_exchange_strong(
3450 expected_state, PageState::kProcessed, std::memory_order_release)) {
3451 DCHECK_EQ(expected_state, PageState::kProcessingAndMapping);
3452 // Force read in case the page was missing and updater didn't touch it
3453 // as there was nothing to do. This will ensure that a zeropage is
3454 // faulted on the shadow map.
3455 ForceRead(page_begin + diff);
3456 MapProcessedPages</*kFirstPageMapping=*/true>(
3457 page_begin, state_arr, page_idx, space_data->page_status_map_.Size());
3458 }
3459 } else {
3460 others_processing = true;
3461 }
3462 };
3463
3464 arena->VisitRoots(visitor);
3465 }
3466 // If we are not in minor-fault mode and if no other thread was found to be
3467 // processing any pages in this arena, then we can madvise the shadow size.
3468 // Otherwise, we will double the memory use for linear-alloc.
3469 if (!minor_fault_initialized_ && !others_processing) {
3470 ZeroAndReleasePages(arena_begin + diff, arena_size);
3471 }
3472 }
3473 }
3474
RegisterUffd(void * addr,size_t size,int mode)3475 void MarkCompact::RegisterUffd(void* addr, size_t size, int mode) {
3476 DCHECK(IsValidFd(uffd_));
3477 struct uffdio_register uffd_register;
3478 uffd_register.range.start = reinterpret_cast<uintptr_t>(addr);
3479 uffd_register.range.len = size;
3480 uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
3481 if (mode == kMinorFaultMode) {
3482 uffd_register.mode |= UFFDIO_REGISTER_MODE_MINOR;
3483 }
3484 CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0)
3485 << "ioctl_userfaultfd: register failed: " << strerror(errno)
3486 << ". start:" << static_cast<void*>(addr) << " len:" << PrettySize(size);
3487 }
3488
UnregisterUffd(uint8_t * start,size_t len)3489 void MarkCompact::UnregisterUffd(uint8_t* start, size_t len) {
3490 DCHECK(IsValidFd(uffd_));
3491 struct uffdio_range range;
3492 range.start = reinterpret_cast<uintptr_t>(start);
3493 range.len = len;
3494 CHECK_EQ(ioctl(uffd_, UFFDIO_UNREGISTER, &range), 0)
3495 << "ioctl_userfaultfd: unregister failed: " << strerror(errno)
3496 << ". addr:" << static_cast<void*>(start) << " len:" << PrettySize(len);
3497 // Due to an oversight in the kernel implementation of 'unregister', the
3498 // waiting threads are woken up only for copy uffds. Therefore, for now, we
3499 // have to explicitly wake up the threads in minor-fault case.
3500 // TODO: The fix in the kernel is being worked on. Once the kernel version
3501 // containing the fix is known, make it conditional on that as well.
3502 if (minor_fault_initialized_) {
3503 CHECK_EQ(ioctl(uffd_, UFFDIO_WAKE, &range), 0)
3504 << "ioctl_userfaultfd: wake failed: " << strerror(errno)
3505 << ". addr:" << static_cast<void*>(start) << " len:" << PrettySize(len);
3506 }
3507 }
3508
CompactionPhase()3509 void MarkCompact::CompactionPhase() {
3510 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3511 {
3512 int32_t freed_bytes = black_objs_slide_diff_;
3513 bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
3514 RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
3515 }
3516
3517 if (CanCompactMovingSpaceWithMinorFault()) {
3518 CompactMovingSpace<kMinorFaultMode>(/*page=*/nullptr);
3519 } else {
3520 CompactMovingSpace<kCopyMode>(compaction_buffers_map_.Begin());
3521 }
3522
3523 // Make sure no mutator is reading from the from-space before unregistering
3524 // userfaultfd from moving-space and then zapping from-space. The mutator
3525 // and GC may race to set a page state to processing or further along. The two
3526 // attempts are ordered. If the collector wins, then the mutator will see that
3527 // and not access the from-space page. If the muator wins, then the
3528 // compaction_in_progress_count_ increment by the mutator happens-before the test
3529 // here, and we will not see a zero value until the mutator has completed.
3530 for (uint32_t i = 0; compaction_in_progress_count_.load(std::memory_order_acquire) > 0; i++) {
3531 BackOff(i);
3532 }
3533
3534 size_t moving_space_size = bump_pointer_space_->Capacity();
3535 UnregisterUffd(bump_pointer_space_->Begin(),
3536 minor_fault_initialized_ ?
3537 (moving_first_objs_count_ + black_page_count_) * kPageSize :
3538 moving_space_size);
3539
3540 // Release all of the memory taken by moving-space's from-map
3541 if (minor_fault_initialized_) {
3542 if (IsValidFd(moving_from_space_fd_)) {
3543 // A strange behavior is observed wherein between GC cycles the from-space'
3544 // first page is accessed. But the memfd that is mapped on from-space, is
3545 // used on to-space in next GC cycle, causing issues with userfaultfd as the
3546 // page isn't missing. A possible reason for this could be prefetches. The
3547 // mprotect ensures that such accesses don't succeed.
3548 int ret = mprotect(from_space_begin_, moving_space_size, PROT_NONE);
3549 CHECK_EQ(ret, 0) << "mprotect(PROT_NONE) for from-space failed: " << strerror(errno);
3550 // madvise(MADV_REMOVE) needs PROT_WRITE. Use fallocate() instead, which
3551 // does the same thing.
3552 ret = fallocate(moving_from_space_fd_,
3553 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
3554 /*offset=*/0,
3555 moving_space_size);
3556 CHECK_EQ(ret, 0) << "fallocate for from-space failed: " << strerror(errno);
3557 } else {
3558 // We don't have a valid fd, so use madvise(MADV_REMOVE) instead. mprotect
3559 // is not required in this case as we create fresh
3560 // MAP_SHARED+MAP_ANONYMOUS mapping in each GC cycle.
3561 int ret = madvise(from_space_begin_, moving_space_size, MADV_REMOVE);
3562 CHECK_EQ(ret, 0) << "madvise(MADV_REMOVE) failed for from-space map:" << strerror(errno);
3563 }
3564 } else {
3565 from_space_map_.MadviseDontNeedAndZero();
3566 }
3567 // mprotect(PROT_NONE) all maps except to-space in debug-mode to catch any unexpected accesses.
3568 if (shadow_to_space_map_.IsValid()) {
3569 DCHECK_EQ(mprotect(shadow_to_space_map_.Begin(), shadow_to_space_map_.Size(), PROT_NONE), 0)
3570 << "mprotect(PROT_NONE) for shadow-map failed:" << strerror(errno);
3571 }
3572 if (!IsValidFd(moving_from_space_fd_)) {
3573 // The other case is already mprotected above.
3574 DCHECK_EQ(mprotect(from_space_begin_, moving_space_size, PROT_NONE), 0)
3575 << "mprotect(PROT_NONE) for from-space failed: " << strerror(errno);
3576 }
3577
3578 ProcessLinearAlloc();
3579
3580 if (use_uffd_sigbus_) {
3581 // Set compaction-done bit so that no new mutator threads start compaction
3582 // process in the SIGBUS handler.
3583 SigbusCounterType count = sigbus_in_progress_count_.fetch_or(kSigbusCounterCompactionDoneMask,
3584 std::memory_order_acq_rel);
3585 // Wait for SIGBUS handlers already in play.
3586 for (uint32_t i = 0; count > 0; i++) {
3587 BackOff(i);
3588 count = sigbus_in_progress_count_.load(std::memory_order_acquire);
3589 count &= ~kSigbusCounterCompactionDoneMask;
3590 }
3591 } else {
3592 DCHECK(IsAligned<kPageSize>(conc_compaction_termination_page_));
3593 // We will only iterate once if gKernelHasFaultRetry is true.
3594 do {
3595 // madvise the page so that we can get userfaults on it.
3596 ZeroAndReleasePages(conc_compaction_termination_page_, kPageSize);
3597 // The following load triggers 'special' userfaults. When received by the
3598 // thread-pool workers, they will exit out of the compaction task. This fault
3599 // happens because we madvised the page.
3600 ForceRead(conc_compaction_termination_page_);
3601 } while (thread_pool_counter_ > 0);
3602 }
3603 // Unregister linear-alloc spaces
3604 for (auto& data : linear_alloc_spaces_data_) {
3605 DCHECK_EQ(data.end_ - data.begin_, static_cast<ssize_t>(data.shadow_.Size()));
3606 UnregisterUffd(data.begin_, data.shadow_.Size());
3607 // madvise linear-allocs's page-status array
3608 data.page_status_map_.MadviseDontNeedAndZero();
3609 // Madvise the entire linear-alloc space's shadow. In copy-mode it gets rid
3610 // of the pages which are still mapped. In minor-fault mode this unmaps all
3611 // pages, which is good in reducing the mremap (done in STW pause) time in
3612 // next GC cycle.
3613 data.shadow_.MadviseDontNeedAndZero();
3614 if (minor_fault_initialized_) {
3615 DCHECK_EQ(mprotect(data.shadow_.Begin(), data.shadow_.Size(), PROT_NONE), 0)
3616 << "mprotect failed: " << strerror(errno);
3617 }
3618 }
3619
3620 if (!use_uffd_sigbus_) {
3621 heap_->GetThreadPool()->StopWorkers(thread_running_gc_);
3622 }
3623 }
3624
3625 template <size_t kBufferSize>
3626 class MarkCompact::ThreadRootsVisitor : public RootVisitor {
3627 public:
ThreadRootsVisitor(MarkCompact * mark_compact,Thread * const self)3628 explicit ThreadRootsVisitor(MarkCompact* mark_compact, Thread* const self)
3629 : mark_compact_(mark_compact), self_(self) {}
3630
~ThreadRootsVisitor()3631 ~ThreadRootsVisitor() {
3632 Flush();
3633 }
3634
VisitRoots(mirror::Object *** roots,size_t count,const RootInfo & info ATTRIBUTE_UNUSED)3635 void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED)
3636 override REQUIRES_SHARED(Locks::mutator_lock_)
3637 REQUIRES(Locks::heap_bitmap_lock_) {
3638 for (size_t i = 0; i < count; i++) {
3639 mirror::Object* obj = *roots[i];
3640 if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
3641 Push(obj);
3642 }
3643 }
3644 }
3645
VisitRoots(mirror::CompressedReference<mirror::Object> ** roots,size_t count,const RootInfo & info ATTRIBUTE_UNUSED)3646 void VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
3647 size_t count,
3648 const RootInfo& info ATTRIBUTE_UNUSED)
3649 override REQUIRES_SHARED(Locks::mutator_lock_)
3650 REQUIRES(Locks::heap_bitmap_lock_) {
3651 for (size_t i = 0; i < count; i++) {
3652 mirror::Object* obj = roots[i]->AsMirrorPtr();
3653 if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
3654 Push(obj);
3655 }
3656 }
3657 }
3658
3659 private:
Flush()3660 void Flush() REQUIRES_SHARED(Locks::mutator_lock_)
3661 REQUIRES(Locks::heap_bitmap_lock_) {
3662 StackReference<mirror::Object>* start;
3663 StackReference<mirror::Object>* end;
3664 {
3665 MutexLock mu(self_, mark_compact_->lock_);
3666 // Loop here because even after expanding once it may not be sufficient to
3667 // accommodate all references. It's almost impossible, but there is no harm
3668 // in implementing it this way.
3669 while (!mark_compact_->mark_stack_->BumpBack(idx_, &start, &end)) {
3670 mark_compact_->ExpandMarkStack();
3671 }
3672 }
3673 while (idx_ > 0) {
3674 *start++ = roots_[--idx_];
3675 }
3676 DCHECK_EQ(start, end);
3677 }
3678
Push(mirror::Object * obj)3679 void Push(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_)
3680 REQUIRES(Locks::heap_bitmap_lock_) {
3681 if (UNLIKELY(idx_ >= kBufferSize)) {
3682 Flush();
3683 }
3684 roots_[idx_++].Assign(obj);
3685 }
3686
3687 StackReference<mirror::Object> roots_[kBufferSize];
3688 size_t idx_ = 0;
3689 MarkCompact* const mark_compact_;
3690 Thread* const self_;
3691 };
3692
3693 class MarkCompact::CheckpointMarkThreadRoots : public Closure {
3694 public:
CheckpointMarkThreadRoots(MarkCompact * mark_compact)3695 explicit CheckpointMarkThreadRoots(MarkCompact* mark_compact) : mark_compact_(mark_compact) {}
3696
Run(Thread * thread)3697 void Run(Thread* thread) override NO_THREAD_SAFETY_ANALYSIS {
3698 ScopedTrace trace("Marking thread roots");
3699 // Note: self is not necessarily equal to thread since thread may be
3700 // suspended.
3701 Thread* const self = Thread::Current();
3702 CHECK(thread == self
3703 || thread->IsSuspended()
3704 || thread->GetState() == ThreadState::kWaitingPerformingGc)
3705 << thread->GetState() << " thread " << thread << " self " << self;
3706 {
3707 ThreadRootsVisitor</*kBufferSize*/ 20> visitor(mark_compact_, self);
3708 thread->VisitRoots(&visitor, kVisitRootFlagAllRoots);
3709 }
3710 // Clear page-buffer to prepare for compaction phase.
3711 thread->SetThreadLocalGcBuffer(nullptr);
3712
3713 // If thread is a running mutator, then act on behalf of the garbage
3714 // collector. See the code in ThreadList::RunCheckpoint.
3715 mark_compact_->GetBarrier().Pass(self);
3716 }
3717
3718 private:
3719 MarkCompact* const mark_compact_;
3720 };
3721
MarkRootsCheckpoint(Thread * self,Runtime * runtime)3722 void MarkCompact::MarkRootsCheckpoint(Thread* self, Runtime* runtime) {
3723 // We revote TLABs later during paused round of marking.
3724 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3725 CheckpointMarkThreadRoots check_point(this);
3726 ThreadList* thread_list = runtime->GetThreadList();
3727 gc_barrier_.Init(self, 0);
3728 // Request the check point is run on all threads returning a count of the threads that must
3729 // run through the barrier including self.
3730 size_t barrier_count = thread_list->RunCheckpoint(&check_point);
3731 // Release locks then wait for all mutator threads to pass the barrier.
3732 // If there are no threads to wait which implys that all the checkpoint functions are finished,
3733 // then no need to release locks.
3734 if (barrier_count == 0) {
3735 return;
3736 }
3737 Locks::heap_bitmap_lock_->ExclusiveUnlock(self);
3738 Locks::mutator_lock_->SharedUnlock(self);
3739 {
3740 ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun);
3741 gc_barrier_.Increment(self, barrier_count);
3742 }
3743 Locks::mutator_lock_->SharedLock(self);
3744 Locks::heap_bitmap_lock_->ExclusiveLock(self);
3745 }
3746
MarkNonThreadRoots(Runtime * runtime)3747 void MarkCompact::MarkNonThreadRoots(Runtime* runtime) {
3748 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3749 runtime->VisitNonThreadRoots(this);
3750 }
3751
MarkConcurrentRoots(VisitRootFlags flags,Runtime * runtime)3752 void MarkCompact::MarkConcurrentRoots(VisitRootFlags flags, Runtime* runtime) {
3753 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3754 runtime->VisitConcurrentRoots(this, flags);
3755 }
3756
RevokeAllThreadLocalBuffers()3757 void MarkCompact::RevokeAllThreadLocalBuffers() {
3758 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3759 bump_pointer_space_->RevokeAllThreadLocalBuffers();
3760 }
3761
3762 class MarkCompact::ScanObjectVisitor {
3763 public:
ScanObjectVisitor(MarkCompact * const mark_compact)3764 explicit ScanObjectVisitor(MarkCompact* const mark_compact) ALWAYS_INLINE
3765 : mark_compact_(mark_compact) {}
3766
operator ()(ObjPtr<mirror::Object> obj) const3767 void operator()(ObjPtr<mirror::Object> obj) const
3768 ALWAYS_INLINE
3769 REQUIRES(Locks::heap_bitmap_lock_)
3770 REQUIRES_SHARED(Locks::mutator_lock_) {
3771 mark_compact_->ScanObject</*kUpdateLiveWords*/ false>(obj.Ptr());
3772 }
3773
3774 private:
3775 MarkCompact* const mark_compact_;
3776 };
3777
UpdateAndMarkModUnion()3778 void MarkCompact::UpdateAndMarkModUnion() {
3779 accounting::CardTable* const card_table = heap_->GetCardTable();
3780 for (const auto& space : immune_spaces_.GetSpaces()) {
3781 const char* name = space->IsZygoteSpace()
3782 ? "UpdateAndMarkZygoteModUnionTable"
3783 : "UpdateAndMarkImageModUnionTable";
3784 DCHECK(space->IsZygoteSpace() || space->IsImageSpace()) << *space;
3785 TimingLogger::ScopedTiming t(name, GetTimings());
3786 accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
3787 if (table != nullptr) {
3788 // UpdateAndMarkReferences() doesn't visit Reference-type objects. But
3789 // that's fine because these objects are immutable enough (referent can
3790 // only be cleared) and hence the only referents they can have are intra-space.
3791 table->UpdateAndMarkReferences(this);
3792 } else {
3793 // No mod-union table, scan all dirty/aged cards in the corresponding
3794 // card-table. This can only occur for app images.
3795 card_table->Scan</*kClearCard*/ false>(space->GetMarkBitmap(),
3796 space->Begin(),
3797 space->End(),
3798 ScanObjectVisitor(this),
3799 gc::accounting::CardTable::kCardAged);
3800 }
3801 }
3802 }
3803
MarkReachableObjects()3804 void MarkCompact::MarkReachableObjects() {
3805 UpdateAndMarkModUnion();
3806 // Recursively mark all the non-image bits set in the mark bitmap.
3807 ProcessMarkStack();
3808 }
3809
3810 class MarkCompact::CardModifiedVisitor {
3811 public:
CardModifiedVisitor(MarkCompact * const mark_compact,accounting::ContinuousSpaceBitmap * const bitmap,accounting::CardTable * const card_table)3812 explicit CardModifiedVisitor(MarkCompact* const mark_compact,
3813 accounting::ContinuousSpaceBitmap* const bitmap,
3814 accounting::CardTable* const card_table)
3815 : visitor_(mark_compact), bitmap_(bitmap), card_table_(card_table) {}
3816
operator ()(uint8_t * card,uint8_t expected_value,uint8_t new_value ATTRIBUTE_UNUSED) const3817 void operator()(uint8_t* card,
3818 uint8_t expected_value,
3819 uint8_t new_value ATTRIBUTE_UNUSED) const {
3820 if (expected_value == accounting::CardTable::kCardDirty) {
3821 uintptr_t start = reinterpret_cast<uintptr_t>(card_table_->AddrFromCard(card));
3822 bitmap_->VisitMarkedRange(start, start + accounting::CardTable::kCardSize, visitor_);
3823 }
3824 }
3825
3826 private:
3827 ScanObjectVisitor visitor_;
3828 accounting::ContinuousSpaceBitmap* bitmap_;
3829 accounting::CardTable* const card_table_;
3830 };
3831
ScanDirtyObjects(bool paused,uint8_t minimum_age)3832 void MarkCompact::ScanDirtyObjects(bool paused, uint8_t minimum_age) {
3833 accounting::CardTable* card_table = heap_->GetCardTable();
3834 for (const auto& space : heap_->GetContinuousSpaces()) {
3835 const char* name = nullptr;
3836 switch (space->GetGcRetentionPolicy()) {
3837 case space::kGcRetentionPolicyNeverCollect:
3838 name = paused ? "(Paused)ScanGrayImmuneSpaceObjects" : "ScanGrayImmuneSpaceObjects";
3839 break;
3840 case space::kGcRetentionPolicyFullCollect:
3841 name = paused ? "(Paused)ScanGrayZygoteSpaceObjects" : "ScanGrayZygoteSpaceObjects";
3842 break;
3843 case space::kGcRetentionPolicyAlwaysCollect:
3844 name = paused ? "(Paused)ScanGrayAllocSpaceObjects" : "ScanGrayAllocSpaceObjects";
3845 break;
3846 default:
3847 LOG(FATAL) << "Unreachable";
3848 UNREACHABLE();
3849 }
3850 TimingLogger::ScopedTiming t(name, GetTimings());
3851 ScanObjectVisitor visitor(this);
3852 const bool is_immune_space = space->IsZygoteSpace() || space->IsImageSpace();
3853 if (paused) {
3854 DCHECK_EQ(minimum_age, gc::accounting::CardTable::kCardDirty);
3855 // We can clear the card-table for any non-immune space.
3856 if (is_immune_space) {
3857 card_table->Scan</*kClearCard*/false>(space->GetMarkBitmap(),
3858 space->Begin(),
3859 space->End(),
3860 visitor,
3861 minimum_age);
3862 } else {
3863 card_table->Scan</*kClearCard*/true>(space->GetMarkBitmap(),
3864 space->Begin(),
3865 space->End(),
3866 visitor,
3867 minimum_age);
3868 }
3869 } else {
3870 DCHECK_EQ(minimum_age, gc::accounting::CardTable::kCardAged);
3871 accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
3872 if (table) {
3873 table->ProcessCards();
3874 card_table->Scan</*kClearCard*/false>(space->GetMarkBitmap(),
3875 space->Begin(),
3876 space->End(),
3877 visitor,
3878 minimum_age);
3879 } else {
3880 CardModifiedVisitor card_modified_visitor(this, space->GetMarkBitmap(), card_table);
3881 // For the alloc spaces we should age the dirty cards and clear the rest.
3882 // For image and zygote-space without mod-union-table, age the dirty
3883 // cards but keep the already aged cards unchanged.
3884 // In either case, visit the objects on the cards that were changed from
3885 // dirty to aged.
3886 if (is_immune_space) {
3887 card_table->ModifyCardsAtomic(space->Begin(),
3888 space->End(),
3889 [](uint8_t card) {
3890 return (card == gc::accounting::CardTable::kCardClean)
3891 ? card
3892 : gc::accounting::CardTable::kCardAged;
3893 },
3894 card_modified_visitor);
3895 } else {
3896 card_table->ModifyCardsAtomic(space->Begin(),
3897 space->End(),
3898 AgeCardVisitor(),
3899 card_modified_visitor);
3900 }
3901 }
3902 }
3903 }
3904 }
3905
RecursiveMarkDirtyObjects(bool paused,uint8_t minimum_age)3906 void MarkCompact::RecursiveMarkDirtyObjects(bool paused, uint8_t minimum_age) {
3907 ScanDirtyObjects(paused, minimum_age);
3908 ProcessMarkStack();
3909 }
3910
MarkRoots(VisitRootFlags flags)3911 void MarkCompact::MarkRoots(VisitRootFlags flags) {
3912 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3913 Runtime* runtime = Runtime::Current();
3914 // Make sure that the checkpoint which collects the stack roots is the first
3915 // one capturning GC-roots. As this one is supposed to find the address
3916 // everything allocated after that (during this marking phase) will be
3917 // considered 'marked'.
3918 MarkRootsCheckpoint(thread_running_gc_, runtime);
3919 MarkNonThreadRoots(runtime);
3920 MarkConcurrentRoots(flags, runtime);
3921 }
3922
PreCleanCards()3923 void MarkCompact::PreCleanCards() {
3924 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3925 CHECK(!Locks::mutator_lock_->IsExclusiveHeld(thread_running_gc_));
3926 MarkRoots(static_cast<VisitRootFlags>(kVisitRootFlagClearRootLog | kVisitRootFlagNewRoots));
3927 RecursiveMarkDirtyObjects(/*paused*/ false, accounting::CardTable::kCardDirty - 1);
3928 }
3929
3930 // In a concurrent marking algorithm, if we are not using a write/read barrier, as
3931 // in this case, then we need a stop-the-world (STW) round in the end to mark
3932 // objects which were written into concurrently while concurrent marking was
3933 // performed.
3934 // In order to minimize the pause time, we could take one of the two approaches:
3935 // 1. Keep repeating concurrent marking of dirty cards until the time spent goes
3936 // below a threshold.
3937 // 2. Do two rounds concurrently and then attempt a paused one. If we figure
3938 // that it's taking too long, then resume mutators and retry.
3939 //
3940 // Given the non-trivial fixed overhead of running a round (card table and root
3941 // scan), it might be better to go with approach 2.
MarkingPhase()3942 void MarkCompact::MarkingPhase() {
3943 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
3944 DCHECK_EQ(thread_running_gc_, Thread::Current());
3945 WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
3946 BindAndResetBitmaps();
3947 MarkZygoteLargeObjects();
3948 MarkRoots(
3949 static_cast<VisitRootFlags>(kVisitRootFlagAllRoots | kVisitRootFlagStartLoggingNewRoots));
3950 MarkReachableObjects();
3951 // Pre-clean dirtied cards to reduce pauses.
3952 PreCleanCards();
3953
3954 // Setup reference processing and forward soft references once before enabling
3955 // slow path (in MarkingPause)
3956 ReferenceProcessor* rp = GetHeap()->GetReferenceProcessor();
3957 bool clear_soft_references = GetCurrentIteration()->GetClearSoftReferences();
3958 rp->Setup(thread_running_gc_, this, /*concurrent=*/ true, clear_soft_references);
3959 if (!clear_soft_references) {
3960 // Forward as many SoftReferences as possible before inhibiting reference access.
3961 rp->ForwardSoftReferences(GetTimings());
3962 }
3963 }
3964
3965 class MarkCompact::RefFieldsVisitor {
3966 public:
RefFieldsVisitor(MarkCompact * const mark_compact)3967 ALWAYS_INLINE explicit RefFieldsVisitor(MarkCompact* const mark_compact)
3968 : mark_compact_(mark_compact) {}
3969
operator ()(mirror::Object * obj,MemberOffset offset,bool is_static ATTRIBUTE_UNUSED) const3970 ALWAYS_INLINE void operator()(mirror::Object* obj,
3971 MemberOffset offset,
3972 bool is_static ATTRIBUTE_UNUSED) const
3973 REQUIRES(Locks::heap_bitmap_lock_)
3974 REQUIRES_SHARED(Locks::mutator_lock_) {
3975 if (kCheckLocks) {
3976 Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
3977 Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
3978 }
3979 mark_compact_->MarkObject(obj->GetFieldObject<mirror::Object>(offset), obj, offset);
3980 }
3981
operator ()(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref) const3982 void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) const
3983 REQUIRES(Locks::heap_bitmap_lock_)
3984 REQUIRES_SHARED(Locks::mutator_lock_) {
3985 mark_compact_->DelayReferenceReferent(klass, ref);
3986 }
3987
VisitRootIfNonNull(mirror::CompressedReference<mirror::Object> * root) const3988 void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
3989 REQUIRES(Locks::heap_bitmap_lock_)
3990 REQUIRES_SHARED(Locks::mutator_lock_) {
3991 if (!root->IsNull()) {
3992 VisitRoot(root);
3993 }
3994 }
3995
VisitRoot(mirror::CompressedReference<mirror::Object> * root) const3996 void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
3997 REQUIRES(Locks::heap_bitmap_lock_)
3998 REQUIRES_SHARED(Locks::mutator_lock_) {
3999 if (kCheckLocks) {
4000 Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
4001 Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
4002 }
4003 mark_compact_->MarkObject(root->AsMirrorPtr());
4004 }
4005
4006 private:
4007 MarkCompact* const mark_compact_;
4008 };
4009
4010 template <size_t kAlignment>
LiveBytesInBitmapWord(size_t chunk_idx) const4011 size_t MarkCompact::LiveWordsBitmap<kAlignment>::LiveBytesInBitmapWord(size_t chunk_idx) const {
4012 const size_t index = chunk_idx * kBitmapWordsPerVectorWord;
4013 size_t words = 0;
4014 for (uint32_t i = 0; i < kBitmapWordsPerVectorWord; i++) {
4015 words += POPCOUNT(Bitmap::Begin()[index + i]);
4016 }
4017 return words * kAlignment;
4018 }
4019
UpdateLivenessInfo(mirror::Object * obj,size_t obj_size)4020 void MarkCompact::UpdateLivenessInfo(mirror::Object* obj, size_t obj_size) {
4021 DCHECK(obj != nullptr);
4022 DCHECK_EQ(obj_size, obj->SizeOf<kDefaultVerifyFlags>());
4023 uintptr_t obj_begin = reinterpret_cast<uintptr_t>(obj);
4024 UpdateClassAfterObjectMap(obj);
4025 size_t size = RoundUp(obj_size, kAlignment);
4026 uintptr_t bit_index = live_words_bitmap_->SetLiveWords(obj_begin, size);
4027 size_t chunk_idx = (obj_begin - live_words_bitmap_->Begin()) / kOffsetChunkSize;
4028 // Compute the bit-index within the chunk-info vector word.
4029 bit_index %= kBitsPerVectorWord;
4030 size_t first_chunk_portion = std::min(size, (kBitsPerVectorWord - bit_index) * kAlignment);
4031
4032 chunk_info_vec_[chunk_idx++] += first_chunk_portion;
4033 DCHECK_LE(first_chunk_portion, size);
4034 for (size -= first_chunk_portion; size > kOffsetChunkSize; size -= kOffsetChunkSize) {
4035 DCHECK_EQ(chunk_info_vec_[chunk_idx], 0u);
4036 chunk_info_vec_[chunk_idx++] = kOffsetChunkSize;
4037 }
4038 chunk_info_vec_[chunk_idx] += size;
4039 freed_objects_--;
4040 }
4041
4042 template <bool kUpdateLiveWords>
ScanObject(mirror::Object * obj)4043 void MarkCompact::ScanObject(mirror::Object* obj) {
4044 // The size of `obj` is used both here (to update `bytes_scanned_`) and in
4045 // `UpdateLivenessInfo`. As fetching this value can be expensive, do it once
4046 // here and pass that information to `UpdateLivenessInfo`.
4047 size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>();
4048 bytes_scanned_ += obj_size;
4049
4050 RefFieldsVisitor visitor(this);
4051 DCHECK(IsMarked(obj)) << "Scanning marked object " << obj << "\n" << heap_->DumpSpaces();
4052 if (kUpdateLiveWords && moving_space_bitmap_->HasAddress(obj)) {
4053 UpdateLivenessInfo(obj, obj_size);
4054 }
4055 obj->VisitReferences(visitor, visitor);
4056 }
4057
4058 // Scan anything that's on the mark stack.
ProcessMarkStack()4059 void MarkCompact::ProcessMarkStack() {
4060 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
4061 // TODO: try prefetch like in CMS
4062 while (!mark_stack_->IsEmpty()) {
4063 mirror::Object* obj = mark_stack_->PopBack();
4064 DCHECK(obj != nullptr);
4065 ScanObject</*kUpdateLiveWords*/ true>(obj);
4066 }
4067 }
4068
ExpandMarkStack()4069 void MarkCompact::ExpandMarkStack() {
4070 const size_t new_size = mark_stack_->Capacity() * 2;
4071 std::vector<StackReference<mirror::Object>> temp(mark_stack_->Begin(),
4072 mark_stack_->End());
4073 mark_stack_->Resize(new_size);
4074 for (auto& ref : temp) {
4075 mark_stack_->PushBack(ref.AsMirrorPtr());
4076 }
4077 DCHECK(!mark_stack_->IsFull());
4078 }
4079
PushOnMarkStack(mirror::Object * obj)4080 inline void MarkCompact::PushOnMarkStack(mirror::Object* obj) {
4081 if (UNLIKELY(mark_stack_->IsFull())) {
4082 ExpandMarkStack();
4083 }
4084 mark_stack_->PushBack(obj);
4085 }
4086
MarkObjectNonNull(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4087 inline void MarkCompact::MarkObjectNonNull(mirror::Object* obj,
4088 mirror::Object* holder,
4089 MemberOffset offset) {
4090 DCHECK(obj != nullptr);
4091 if (MarkObjectNonNullNoPush</*kParallel*/false>(obj, holder, offset)) {
4092 PushOnMarkStack(obj);
4093 }
4094 }
4095
4096 template <bool kParallel>
MarkObjectNonNullNoPush(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4097 inline bool MarkCompact::MarkObjectNonNullNoPush(mirror::Object* obj,
4098 mirror::Object* holder,
4099 MemberOffset offset) {
4100 // We expect most of the referenes to be in bump-pointer space, so try that
4101 // first to keep the cost of this function minimal.
4102 if (LIKELY(moving_space_bitmap_->HasAddress(obj))) {
4103 return kParallel ? !moving_space_bitmap_->AtomicTestAndSet(obj)
4104 : !moving_space_bitmap_->Set(obj);
4105 } else if (non_moving_space_bitmap_->HasAddress(obj)) {
4106 return kParallel ? !non_moving_space_bitmap_->AtomicTestAndSet(obj)
4107 : !non_moving_space_bitmap_->Set(obj);
4108 } else if (immune_spaces_.ContainsObject(obj)) {
4109 DCHECK(IsMarked(obj) != nullptr);
4110 return false;
4111 } else {
4112 // Must be a large-object space, otherwise it's a case of heap corruption.
4113 if (!IsAligned<kPageSize>(obj)) {
4114 // Objects in large-object space are page aligned. So if we have an object
4115 // which doesn't belong to any space and is not page-aligned as well, then
4116 // it's memory corruption.
4117 // TODO: implement protect/unprotect in bump-pointer space.
4118 heap_->GetVerification()->LogHeapCorruption(holder, offset, obj, /*fatal*/ true);
4119 }
4120 DCHECK_NE(heap_->GetLargeObjectsSpace(), nullptr)
4121 << "ref=" << obj
4122 << " doesn't belong to any of the spaces and large object space doesn't exist";
4123 accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
4124 DCHECK(los_bitmap->HasAddress(obj));
4125 if (kParallel) {
4126 los_bitmap->AtomicTestAndSet(obj);
4127 } else {
4128 los_bitmap->Set(obj);
4129 }
4130 // We only have primitive arrays in large object space. So there is no
4131 // reason to push into mark-stack.
4132 DCHECK(obj->IsString() || (obj->IsArrayInstance() && !obj->IsObjectArray()));
4133 return false;
4134 }
4135 }
4136
MarkObject(mirror::Object * obj,mirror::Object * holder,MemberOffset offset)4137 inline void MarkCompact::MarkObject(mirror::Object* obj,
4138 mirror::Object* holder,
4139 MemberOffset offset) {
4140 if (obj != nullptr) {
4141 MarkObjectNonNull(obj, holder, offset);
4142 }
4143 }
4144
MarkObject(mirror::Object * obj)4145 mirror::Object* MarkCompact::MarkObject(mirror::Object* obj) {
4146 MarkObject(obj, nullptr, MemberOffset(0));
4147 return obj;
4148 }
4149
MarkHeapReference(mirror::HeapReference<mirror::Object> * obj,bool do_atomic_update ATTRIBUTE_UNUSED)4150 void MarkCompact::MarkHeapReference(mirror::HeapReference<mirror::Object>* obj,
4151 bool do_atomic_update ATTRIBUTE_UNUSED) {
4152 MarkObject(obj->AsMirrorPtr(), nullptr, MemberOffset(0));
4153 }
4154
VisitRoots(mirror::Object *** roots,size_t count,const RootInfo & info)4155 void MarkCompact::VisitRoots(mirror::Object*** roots,
4156 size_t count,
4157 const RootInfo& info) {
4158 if (compacting_) {
4159 for (size_t i = 0; i < count; ++i) {
4160 UpdateRoot(roots[i], info);
4161 }
4162 } else {
4163 for (size_t i = 0; i < count; ++i) {
4164 MarkObjectNonNull(*roots[i]);
4165 }
4166 }
4167 }
4168
VisitRoots(mirror::CompressedReference<mirror::Object> ** roots,size_t count,const RootInfo & info)4169 void MarkCompact::VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
4170 size_t count,
4171 const RootInfo& info) {
4172 // TODO: do we need to check if the root is null or not?
4173 if (compacting_) {
4174 for (size_t i = 0; i < count; ++i) {
4175 UpdateRoot(roots[i], info);
4176 }
4177 } else {
4178 for (size_t i = 0; i < count; ++i) {
4179 MarkObjectNonNull(roots[i]->AsMirrorPtr());
4180 }
4181 }
4182 }
4183
IsMarked(mirror::Object * obj)4184 mirror::Object* MarkCompact::IsMarked(mirror::Object* obj) {
4185 if (moving_space_bitmap_->HasAddress(obj)) {
4186 const bool is_black = reinterpret_cast<uint8_t*>(obj) >= black_allocations_begin_;
4187 if (compacting_) {
4188 if (is_black) {
4189 return PostCompactBlackObjAddr(obj);
4190 } else if (live_words_bitmap_->Test(obj)) {
4191 return PostCompactOldObjAddr(obj);
4192 } else {
4193 return nullptr;
4194 }
4195 }
4196 return (is_black || moving_space_bitmap_->Test(obj)) ? obj : nullptr;
4197 } else if (non_moving_space_bitmap_->HasAddress(obj)) {
4198 return non_moving_space_bitmap_->Test(obj) ? obj : nullptr;
4199 } else if (immune_spaces_.ContainsObject(obj)) {
4200 return obj;
4201 } else {
4202 DCHECK(heap_->GetLargeObjectsSpace())
4203 << "ref=" << obj
4204 << " doesn't belong to any of the spaces and large object space doesn't exist";
4205 accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
4206 if (los_bitmap->HasAddress(obj)) {
4207 DCHECK(IsAligned<kPageSize>(obj));
4208 return los_bitmap->Test(obj) ? obj : nullptr;
4209 } else {
4210 // The given obj is not in any of the known spaces, so return null. This could
4211 // happen for instance in interpreter caches wherein a concurrent updation
4212 // to the cache could result in obj being a non-reference. This is
4213 // tolerable because SweepInterpreterCaches only updates if the given
4214 // object has moved, which can't be the case for the non-reference.
4215 return nullptr;
4216 }
4217 }
4218 }
4219
IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object> * obj,bool do_atomic_update ATTRIBUTE_UNUSED)4220 bool MarkCompact::IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj,
4221 bool do_atomic_update ATTRIBUTE_UNUSED) {
4222 mirror::Object* ref = obj->AsMirrorPtr();
4223 if (ref == nullptr) {
4224 return true;
4225 }
4226 return IsMarked(ref);
4227 }
4228
4229 // Process the 'referent' field in a java.lang.ref.Reference. If the referent
4230 // has not yet been marked, put it on the appropriate list in the heap for later
4231 // processing.
DelayReferenceReferent(ObjPtr<mirror::Class> klass,ObjPtr<mirror::Reference> ref)4232 void MarkCompact::DelayReferenceReferent(ObjPtr<mirror::Class> klass,
4233 ObjPtr<mirror::Reference> ref) {
4234 heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, this);
4235 }
4236
FinishPhase()4237 void MarkCompact::FinishPhase() {
4238 GetCurrentIteration()->SetScannedBytes(bytes_scanned_);
4239 bool is_zygote = Runtime::Current()->IsZygote();
4240 compacting_ = false;
4241 minor_fault_initialized_ = !is_zygote && uffd_minor_fault_supported_;
4242 // Madvise compaction buffers. When using threaded implementation, skip the first page,
4243 // which is used by the gc-thread for the next iteration. Otherwise, we get into a
4244 // deadlock due to userfault on it in the next iteration. This page is not consuming any
4245 // physical memory because we already madvised it above and then we triggered a read
4246 // userfault, which maps a special zero-page.
4247 if (use_uffd_sigbus_ || !minor_fault_initialized_ || !shadow_to_space_map_.IsValid() ||
4248 shadow_to_space_map_.Size() < (moving_first_objs_count_ + black_page_count_) * kPageSize) {
4249 size_t adjustment = use_uffd_sigbus_ ? 0 : kPageSize;
4250 ZeroAndReleasePages(compaction_buffers_map_.Begin() + adjustment,
4251 compaction_buffers_map_.Size() - adjustment);
4252 } else if (shadow_to_space_map_.Size() == bump_pointer_space_->Capacity()) {
4253 // Now that we are going to use minor-faults from next GC cycle, we can
4254 // unmap the buffers used by worker threads.
4255 compaction_buffers_map_.SetSize(kPageSize);
4256 }
4257 info_map_.MadviseDontNeedAndZero();
4258 live_words_bitmap_->ClearBitmap();
4259 // TODO: We can clear this bitmap right before compaction pause. But in that
4260 // case we need to ensure that we don't assert on this bitmap afterwards.
4261 // Also, we would still need to clear it here again as we may have to use the
4262 // bitmap for black-allocations (see UpdateMovingSpaceBlackAllocations()).
4263 moving_space_bitmap_->Clear();
4264
4265 if (UNLIKELY(is_zygote && IsValidFd(uffd_))) {
4266 heap_->DeleteThreadPool();
4267 // This unregisters all ranges as a side-effect.
4268 close(uffd_);
4269 uffd_ = kFdUnused;
4270 uffd_initialized_ = false;
4271 }
4272 CHECK(mark_stack_->IsEmpty()); // Ensure that the mark stack is empty.
4273 mark_stack_->Reset();
4274 DCHECK_EQ(thread_running_gc_, Thread::Current());
4275 if (kIsDebugBuild) {
4276 MutexLock mu(thread_running_gc_, lock_);
4277 if (updated_roots_.get() != nullptr) {
4278 updated_roots_->clear();
4279 }
4280 }
4281 class_after_obj_ordered_map_.clear();
4282 delete[] moving_pages_status_;
4283 linear_alloc_arenas_.clear();
4284 {
4285 ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_);
4286 WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_);
4287 heap_->ClearMarkedObjects();
4288 }
4289 std::swap(moving_to_space_fd_, moving_from_space_fd_);
4290 if (IsValidFd(moving_to_space_fd_)) {
4291 // Confirm that the memfd to be used on to-space in next GC cycle is empty.
4292 struct stat buf;
4293 DCHECK_EQ(fstat(moving_to_space_fd_, &buf), 0) << "fstat failed: " << strerror(errno);
4294 DCHECK_EQ(buf.st_blocks, 0u);
4295 }
4296 }
4297
4298 } // namespace collector
4299 } // namespace gc
4300 } // namespace art
4301