1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <stdio.h>
18 #include <unistd.h>
19 #include <sys/mman.h>
20
21 #include "garbage_collector.h"
22
23 #include "android-base/stringprintf.h"
24
25 #include "base/dumpable.h"
26 #include "base/histogram-inl.h"
27 #include "base/logging.h" // For VLOG_IS_ON.
28 #include "base/mutex-inl.h"
29 #include "base/systrace.h"
30 #include "base/time_utils.h"
31 #include "base/utils.h"
32 #include "gc/accounting/heap_bitmap.h"
33 #include "gc/gc_pause_listener.h"
34 #include "gc/heap.h"
35 #include "gc/space/large_object_space.h"
36 #include "gc/space/space-inl.h"
37 #include "runtime.h"
38 #include "thread-current-inl.h"
39 #include "thread_list.h"
40
41 namespace art HIDDEN {
42 namespace gc {
43 namespace collector {
44
45 namespace {
46
47 // Report a GC metric via the ATrace interface.
TraceGCMetric(const char * name,int64_t value)48 void TraceGCMetric(const char* name, int64_t value) {
49 // ART's interface with systrace (through libartpalette) only supports
50 // reporting 32-bit (signed) integer values at the moment. Upon
51 // underflows/overflows, clamp metric values at `int32_t` min/max limits and
52 // report these events via a corresponding underflow/overflow counter; also
53 // log a warning about the first underflow/overflow occurrence.
54 //
55 // TODO(b/300015145): Consider extending libarpalette to allow reporting this
56 // value as a 64-bit (signed) integer (instead of a 32-bit (signed) integer).
57 // Note that this is likely unnecessary at the moment (November 2023) for any
58 // size-related GC metric, given the maximum theoretical size of a managed
59 // heap (4 GiB).
60 if (UNLIKELY(value < std::numeric_limits<int32_t>::min())) {
61 ATraceIntegerValue(name, std::numeric_limits<int32_t>::min());
62 std::string underflow_counter_name = std::string(name) + " int32_t underflow";
63 ATraceIntegerValue(underflow_counter_name.c_str(), 1);
64 static bool int32_underflow_reported = false;
65 if (!int32_underflow_reported) {
66 LOG(WARNING) << "GC Metric \"" << name << "\" with value " << value
67 << " causing a 32-bit integer underflow";
68 int32_underflow_reported = true;
69 }
70 return;
71 }
72 if (UNLIKELY(value > std::numeric_limits<int32_t>::max())) {
73 ATraceIntegerValue(name, std::numeric_limits<int32_t>::max());
74 std::string overflow_counter_name = std::string(name) + " int32_t overflow";
75 ATraceIntegerValue(overflow_counter_name.c_str(), 1);
76 static bool int32_overflow_reported = false;
77 if (!int32_overflow_reported) {
78 LOG(WARNING) << "GC Metric \"" << name << "\" with value " << value
79 << " causing a 32-bit integer overflow";
80 int32_overflow_reported = true;
81 }
82 return;
83 }
84 ATraceIntegerValue(name, value);
85 }
86
87 } // namespace
88
Iteration()89 Iteration::Iteration()
90 : duration_ns_(0), timings_("GC iteration timing logger", true, VLOG_IS_ON(heap)) {
91 Reset(kGcCauseBackground, false); // Reset to some place holder values.
92 }
93
Reset(GcCause gc_cause,bool clear_soft_references)94 void Iteration::Reset(GcCause gc_cause, bool clear_soft_references) {
95 timings_.Reset();
96 pause_times_.clear();
97 duration_ns_ = 0;
98 bytes_scanned_ = 0;
99 clear_soft_references_ = clear_soft_references;
100 gc_cause_ = gc_cause;
101 freed_ = ObjectBytePair();
102 freed_los_ = ObjectBytePair();
103 freed_bytes_revoke_ = 0;
104 }
105
GetEstimatedThroughput() const106 uint64_t Iteration::GetEstimatedThroughput() const {
107 // Add 1ms to prevent possible division by 0.
108 return (static_cast<uint64_t>(freed_.bytes) * 1000) / (NsToMs(GetDurationNs()) + 1);
109 }
110
GarbageCollector(Heap * heap,const std::string & name)111 GarbageCollector::GarbageCollector(Heap* heap, const std::string& name)
112 : heap_(heap),
113 name_(name),
114 pause_histogram_((name_ + " paused").c_str(), kPauseBucketSize, kPauseBucketCount),
115 rss_histogram_((name_ + " peak-rss").c_str(), kMemBucketSize, kMemBucketCount),
116 freed_bytes_histogram_((name_ + " freed-bytes").c_str(), kMemBucketSize, kMemBucketCount),
117 gc_time_histogram_(nullptr),
118 metrics_gc_count_(nullptr),
119 metrics_gc_count_delta_(nullptr),
120 gc_throughput_histogram_(nullptr),
121 gc_tracing_throughput_hist_(nullptr),
122 gc_throughput_avg_(nullptr),
123 gc_tracing_throughput_avg_(nullptr),
124 gc_scanned_bytes_(nullptr),
125 gc_scanned_bytes_delta_(nullptr),
126 gc_freed_bytes_(nullptr),
127 gc_freed_bytes_delta_(nullptr),
128 gc_duration_(nullptr),
129 gc_duration_delta_(nullptr),
130 cumulative_timings_(name),
131 pause_histogram_lock_("pause histogram lock", kDefaultMutexLevel, true),
132 is_transaction_active_(false),
133 are_metrics_initialized_(false) {
134 ResetMeasurements();
135 }
136
RegisterPause(uint64_t nano_length)137 void GarbageCollector::RegisterPause(uint64_t nano_length) {
138 GetCurrentIteration()->pause_times_.push_back(nano_length);
139 }
140
ExtractRssFromMincore(std::list<std::pair<void *,void * >> * gc_ranges)141 uint64_t GarbageCollector::ExtractRssFromMincore(
142 std::list<std::pair<void*, void*>>* gc_ranges) {
143 uint64_t rss = 0;
144 if (gc_ranges->empty()) {
145 return 0;
146 }
147 // mincore() is linux-specific syscall.
148 #if defined(__linux__)
149 using range_t = std::pair<void*, void*>;
150 // Sort gc_ranges
151 gc_ranges->sort([](const range_t& a, const range_t& b) {
152 return std::less()(a.first, b.first);
153 });
154 // Merge gc_ranges. It's necessary because the kernel may merge contiguous
155 // regions if their properties match. This is sufficient as kernel doesn't
156 // merge those adjoining ranges which differ only in name.
157 size_t vec_len = 0;
158 for (auto it = gc_ranges->begin(); it != gc_ranges->end(); it++) {
159 auto next_it = it;
160 next_it++;
161 while (next_it != gc_ranges->end()) {
162 if (it->second == next_it->first) {
163 it->second = next_it->second;
164 next_it = gc_ranges->erase(next_it);
165 } else {
166 break;
167 }
168 }
169 size_t length = static_cast<uint8_t*>(it->second) - static_cast<uint8_t*>(it->first);
170 // Compute max length for vector allocation later.
171 vec_len = std::max(vec_len, DivideByPageSize(length));
172 }
173 std::unique_ptr<unsigned char[]> vec(new unsigned char[vec_len]);
174 for (const auto it : *gc_ranges) {
175 size_t length = static_cast<uint8_t*>(it.second) - static_cast<uint8_t*>(it.first);
176 if (mincore(it.first, length, vec.get()) == 0) {
177 for (size_t i = 0; i < DivideByPageSize(length); i++) {
178 // Least significant bit represents residency of a page. Other bits are
179 // reserved.
180 rss += vec[i] & 0x1;
181 }
182 } else {
183 LOG(WARNING) << "Call to mincore() on memory range [0x" << std::hex << it.first
184 << ", 0x" << it.second << std::dec << ") failed: " << strerror(errno);
185 }
186 }
187 rss *= gPageSize;
188 rss_histogram_.AddValue(rss / KB);
189 #endif
190 return rss;
191 }
192
Run(GcCause gc_cause,bool clear_soft_references)193 void GarbageCollector::Run(GcCause gc_cause, bool clear_soft_references) {
194 ScopedTrace trace(android::base::StringPrintf("%s %s GC", PrettyCause(gc_cause), GetName()));
195 Thread* self = Thread::Current();
196 Runtime* runtime = Runtime::Current();
197 uint64_t start_time = NanoTime();
198 uint64_t thread_cpu_start_time = ThreadCpuNanoTime();
199 GetHeap()->CalculatePreGcWeightedAllocatedBytes();
200 Iteration* current_iteration = GetCurrentIteration();
201 current_iteration->Reset(gc_cause, clear_soft_references);
202 // Note transaction mode is single-threaded and there's no asynchronous GC and this flag doesn't
203 // change in the middle of a GC.
204 is_transaction_active_ = runtime->IsActiveTransaction();
205 RunPhases(); // Run all the GC phases.
206 GetHeap()->CalculatePostGcWeightedAllocatedBytes();
207 // Add the current timings to the cumulative timings.
208 cumulative_timings_.AddLogger(*GetTimings());
209 // Update cumulative statistics with how many bytes the GC iteration freed.
210 total_freed_objects_ += current_iteration->GetFreedObjects() +
211 current_iteration->GetFreedLargeObjects();
212 total_scanned_bytes_ += current_iteration->GetScannedBytes();
213 int64_t freed_bytes = current_iteration->GetFreedBytes() +
214 current_iteration->GetFreedLargeObjectBytes();
215 total_freed_bytes_ += freed_bytes;
216 // Rounding negative freed bytes to 0 as we are not interested in such corner cases.
217 freed_bytes_histogram_.AddValue(std::max<int64_t>(freed_bytes / KB, 0));
218 uint64_t end_time = NanoTime();
219 uint64_t thread_cpu_end_time = ThreadCpuNanoTime();
220 total_thread_cpu_time_ns_ += thread_cpu_end_time - thread_cpu_start_time;
221 uint64_t duration_ns = end_time - start_time;
222 current_iteration->SetDurationNs(duration_ns);
223 if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
224 // The entire GC was paused, clear the fake pauses which might be in the pause times and add
225 // the whole GC duration.
226 current_iteration->pause_times_.clear();
227 RegisterPause(duration_ns);
228 }
229 total_time_ns_ += duration_ns;
230 uint64_t total_pause_time_ns = 0;
231 for (uint64_t pause_time : current_iteration->GetPauseTimes()) {
232 MutexLock mu(self, pause_histogram_lock_);
233 pause_histogram_.AdjustAndAddValue(pause_time);
234 total_pause_time_ns += pause_time;
235 }
236 metrics::ArtMetrics* metrics = runtime->GetMetrics();
237 // Report STW pause time in microseconds.
238 const uint64_t total_pause_time_us = total_pause_time_ns / 1'000;
239 metrics->WorldStopTimeDuringGCAvg()->Add(total_pause_time_us);
240 metrics->GcWorldStopTime()->Add(total_pause_time_us);
241 metrics->GcWorldStopTimeDelta()->Add(total_pause_time_us);
242 metrics->GcWorldStopCount()->AddOne();
243 metrics->GcWorldStopCountDelta()->AddOne();
244 // Report total collection time of all GCs put together.
245 metrics->TotalGcCollectionTime()->Add(NsToMs(duration_ns));
246 metrics->TotalGcCollectionTimeDelta()->Add(NsToMs(duration_ns));
247 if (are_metrics_initialized_) {
248 metrics_gc_count_->Add(1);
249 metrics_gc_count_delta_->Add(1);
250 // Report GC time in milliseconds.
251 gc_time_histogram_->Add(NsToMs(duration_ns));
252 // Throughput in bytes/s. Add 1us to prevent possible division by 0.
253 uint64_t throughput = (current_iteration->GetScannedBytes() * 1'000'000)
254 / (NsToUs(duration_ns) + 1);
255 // Report in MB/s.
256 throughput /= MB;
257 gc_tracing_throughput_hist_->Add(throughput);
258 gc_tracing_throughput_avg_->Add(throughput);
259
260 // Report GC throughput in MB/s.
261 throughput = current_iteration->GetEstimatedThroughput() / MB;
262 gc_throughput_histogram_->Add(throughput);
263 gc_throughput_avg_->Add(throughput);
264
265 gc_scanned_bytes_->Add(current_iteration->GetScannedBytes());
266 gc_scanned_bytes_delta_->Add(current_iteration->GetScannedBytes());
267 gc_freed_bytes_->Add(current_iteration->GetFreedBytes());
268 gc_freed_bytes_delta_->Add(current_iteration->GetFreedBytes());
269 gc_duration_->Add(NsToMs(current_iteration->GetDurationNs()));
270 gc_duration_delta_->Add(NsToMs(current_iteration->GetDurationNs()));
271 }
272
273 // Report some metrics via the ATrace interface, to surface them in Perfetto.
274 TraceGCMetric("freed_normal_object_bytes", current_iteration->GetFreedBytes());
275 TraceGCMetric("freed_large_object_bytes", current_iteration->GetFreedLargeObjectBytes());
276 TraceGCMetric("freed_bytes", freed_bytes);
277
278 is_transaction_active_ = false;
279 }
280
SwapBitmaps()281 void GarbageCollector::SwapBitmaps() {
282 TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
283 // Swap the live and mark bitmaps for each alloc space. This is needed since sweep re-swaps
284 // these bitmaps. The bitmap swapping is an optimization so that we do not need to clear the live
285 // bits of dead objects in the live bitmap.
286 const GcType gc_type = GetGcType();
287 for (const auto& space : GetHeap()->GetContinuousSpaces()) {
288 // We never allocate into zygote spaces.
289 if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect ||
290 (gc_type == kGcTypeFull &&
291 space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect)) {
292 if (space->GetLiveBitmap() != nullptr && !space->HasBoundBitmaps()) {
293 CHECK(space->IsContinuousMemMapAllocSpace());
294 space->AsContinuousMemMapAllocSpace()->SwapBitmaps();
295 }
296 }
297 }
298 for (const auto& disc_space : GetHeap()->GetDiscontinuousSpaces()) {
299 disc_space->AsLargeObjectSpace()->SwapBitmaps();
300 }
301 }
302
GetEstimatedMeanThroughput() const303 uint64_t GarbageCollector::GetEstimatedMeanThroughput() const {
304 // Add 1ms to prevent possible division by 0.
305 return (total_freed_bytes_ * 1000) / (NsToMs(GetCumulativeTimings().GetTotalNs()) + 1);
306 }
307
ResetMeasurements()308 void GarbageCollector::ResetMeasurements() {
309 {
310 MutexLock mu(Thread::Current(), pause_histogram_lock_);
311 pause_histogram_.Reset();
312 }
313 cumulative_timings_.Reset();
314 rss_histogram_.Reset();
315 freed_bytes_histogram_.Reset();
316 total_thread_cpu_time_ns_ = 0u;
317 total_time_ns_ = 0u;
318 total_freed_objects_ = 0u;
319 total_freed_bytes_ = 0;
320 total_scanned_bytes_ = 0u;
321 }
322
ScopedPause(GarbageCollector * collector,bool with_reporting)323 GarbageCollector::ScopedPause::ScopedPause(GarbageCollector* collector, bool with_reporting)
324 : start_time_(NanoTime()), collector_(collector), with_reporting_(with_reporting) {
325 Runtime* runtime = Runtime::Current();
326 runtime->GetThreadList()->SuspendAll(__FUNCTION__);
327 if (with_reporting) {
328 GcPauseListener* pause_listener = runtime->GetHeap()->GetGcPauseListener();
329 if (pause_listener != nullptr) {
330 pause_listener->StartPause();
331 }
332 }
333 }
334
~ScopedPause()335 GarbageCollector::ScopedPause::~ScopedPause() {
336 collector_->RegisterPause(NanoTime() - start_time_);
337 Runtime* runtime = Runtime::Current();
338 if (with_reporting_) {
339 GcPauseListener* pause_listener = runtime->GetHeap()->GetGcPauseListener();
340 if (pause_listener != nullptr) {
341 pause_listener->EndPause();
342 }
343 }
344 runtime->GetThreadList()->ResumeAll();
345 }
346
347 // Returns the current GC iteration and assocated info.
GetCurrentIteration()348 Iteration* GarbageCollector::GetCurrentIteration() {
349 return heap_->GetCurrentGcIteration();
350 }
GetCurrentIteration() const351 const Iteration* GarbageCollector::GetCurrentIteration() const {
352 return heap_->GetCurrentGcIteration();
353 }
354
ShouldEagerlyReleaseMemoryToOS() const355 bool GarbageCollector::ShouldEagerlyReleaseMemoryToOS() const {
356 Runtime* runtime = Runtime::Current();
357 // Zygote isn't a memory heavy process, we should always instantly release memory to the OS.
358 if (runtime->IsZygote()) {
359 return true;
360 }
361 if (GetCurrentIteration()->GetGcCause() == kGcCauseExplicit &&
362 !runtime->IsEagerlyReleaseExplicitGcDisabled()) {
363 // Our behavior with explicit GCs is to always release any available memory.
364 return true;
365 }
366 // Keep on the memory if the app is in foreground. If it is in background or
367 // goes into the background (see invocation with cause kGcCauseCollectorTransition),
368 // release the memory.
369 return !runtime->InJankPerceptibleProcessState();
370 }
371
RecordFree(const ObjectBytePair & freed)372 void GarbageCollector::RecordFree(const ObjectBytePair& freed) {
373 GetCurrentIteration()->freed_.Add(freed);
374 heap_->RecordFree(freed.objects, freed.bytes);
375 }
RecordFreeLOS(const ObjectBytePair & freed)376 void GarbageCollector::RecordFreeLOS(const ObjectBytePair& freed) {
377 GetCurrentIteration()->freed_los_.Add(freed);
378 heap_->RecordFree(freed.objects, freed.bytes);
379 }
380
GetTotalPausedTimeNs()381 uint64_t GarbageCollector::GetTotalPausedTimeNs() {
382 MutexLock mu(Thread::Current(), pause_histogram_lock_);
383 return pause_histogram_.AdjustedSum();
384 }
385
DumpPerformanceInfo(std::ostream & os)386 void GarbageCollector::DumpPerformanceInfo(std::ostream& os) {
387 const CumulativeLogger& logger = GetCumulativeTimings();
388 const size_t iterations = logger.GetIterations();
389 if (iterations == 0) {
390 return;
391 }
392 os << Dumpable<CumulativeLogger>(logger);
393 const uint64_t total_ns = logger.GetTotalNs();
394 const double seconds = NsToMs(total_ns) / 1000.0;
395 const uint64_t freed_bytes = GetTotalFreedBytes();
396 const uint64_t scanned_bytes = GetTotalScannedBytes();
397 {
398 MutexLock mu(Thread::Current(), pause_histogram_lock_);
399 if (pause_histogram_.SampleSize() > 0) {
400 Histogram<uint64_t>::CumulativeData cumulative_data;
401 pause_histogram_.CreateHistogram(&cumulative_data);
402 pause_histogram_.PrintConfidenceIntervals(os, 0.99, cumulative_data);
403 }
404 }
405 #if defined(__linux__)
406 if (rss_histogram_.SampleSize() > 0) {
407 os << rss_histogram_.Name()
408 << ": Avg: " << PrettySize(rss_histogram_.Mean() * KB)
409 << " Max: " << PrettySize(rss_histogram_.Max() * KB)
410 << " Min: " << PrettySize(rss_histogram_.Min() * KB) << "\n";
411 os << "Peak-rss Histogram: ";
412 rss_histogram_.DumpBins(os);
413 os << "\n";
414 }
415 #endif
416 if (freed_bytes_histogram_.SampleSize() > 0) {
417 os << freed_bytes_histogram_.Name()
418 << ": Avg: " << PrettySize(freed_bytes_histogram_.Mean() * KB)
419 << " Max: " << PrettySize(freed_bytes_histogram_.Max() * KB)
420 << " Min: " << PrettySize(freed_bytes_histogram_.Min() * KB) << "\n";
421 os << "Freed-bytes histogram: ";
422 freed_bytes_histogram_.DumpBins(os);
423 os << "\n";
424 }
425 const double cpu_seconds = NsToMs(GetTotalCpuTime()) / 1000.0;
426 os << GetName() << " total time: " << PrettyDuration(total_ns)
427 << " mean time: " << PrettyDuration(total_ns / iterations) << "\n"
428 << GetName() << " freed: " << PrettySize(freed_bytes) << "\n"
429 << GetName() << " throughput: " << PrettySize(freed_bytes / seconds) << "/s"
430 << " per cpu-time: "
431 << static_cast<uint64_t>(freed_bytes / cpu_seconds) << "/s / "
432 << PrettySize(freed_bytes / cpu_seconds) << "/s\n"
433 << GetName() << " tracing throughput: "
434 << PrettySize(scanned_bytes / seconds) << "/s "
435 << " per cpu-time: "
436 << PrettySize(scanned_bytes / cpu_seconds) << "/s\n";
437 }
438
439 } // namespace collector
440 } // namespace gc
441 } // namespace art
442