• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <stdio.h>
18 #include <unistd.h>
19 #include <sys/mman.h>
20 
21 #include "garbage_collector.h"
22 
23 #include "android-base/stringprintf.h"
24 
25 #include "base/dumpable.h"
26 #include "base/histogram-inl.h"
27 #include "base/logging.h"  // For VLOG_IS_ON.
28 #include "base/mutex-inl.h"
29 #include "base/systrace.h"
30 #include "base/time_utils.h"
31 #include "base/utils.h"
32 #include "gc/accounting/heap_bitmap.h"
33 #include "gc/gc_pause_listener.h"
34 #include "gc/heap.h"
35 #include "gc/space/large_object_space.h"
36 #include "gc/space/space-inl.h"
37 #include "runtime.h"
38 #include "thread-current-inl.h"
39 #include "thread_list.h"
40 
41 namespace art HIDDEN {
42 namespace gc {
43 namespace collector {
44 
45 namespace {
46 
47 // Report a GC metric via the ATrace interface.
TraceGCMetric(const char * name,int64_t value)48 void TraceGCMetric(const char* name, int64_t value) {
49   // ART's interface with systrace (through libartpalette) only supports
50   // reporting 32-bit (signed) integer values at the moment. Upon
51   // underflows/overflows, clamp metric values at `int32_t` min/max limits and
52   // report these events via a corresponding underflow/overflow counter; also
53   // log a warning about the first underflow/overflow occurrence.
54   //
55   // TODO(b/300015145): Consider extending libarpalette to allow reporting this
56   // value as a 64-bit (signed) integer (instead of a 32-bit (signed) integer).
57   // Note that this is likely unnecessary at the moment (November 2023) for any
58   // size-related GC metric, given the maximum theoretical size of a managed
59   // heap (4 GiB).
60   if (UNLIKELY(value < std::numeric_limits<int32_t>::min())) {
61     ATraceIntegerValue(name, std::numeric_limits<int32_t>::min());
62     std::string underflow_counter_name = std::string(name) + " int32_t underflow";
63     ATraceIntegerValue(underflow_counter_name.c_str(), 1);
64     static bool int32_underflow_reported = false;
65     if (!int32_underflow_reported) {
66       LOG(WARNING) << "GC Metric \"" << name << "\" with value " << value
67                    << " causing a 32-bit integer underflow";
68       int32_underflow_reported = true;
69     }
70     return;
71   }
72   if (UNLIKELY(value > std::numeric_limits<int32_t>::max())) {
73     ATraceIntegerValue(name, std::numeric_limits<int32_t>::max());
74     std::string overflow_counter_name = std::string(name) + " int32_t overflow";
75     ATraceIntegerValue(overflow_counter_name.c_str(), 1);
76     static bool int32_overflow_reported = false;
77     if (!int32_overflow_reported) {
78       LOG(WARNING) << "GC Metric \"" << name << "\" with value " << value
79                    << " causing a 32-bit integer overflow";
80       int32_overflow_reported = true;
81     }
82     return;
83   }
84   ATraceIntegerValue(name, value);
85 }
86 
87 }  // namespace
88 
Iteration()89 Iteration::Iteration()
90     : duration_ns_(0), timings_("GC iteration timing logger", true, VLOG_IS_ON(heap)) {
91   Reset(kGcCauseBackground, false);  // Reset to some place holder values.
92 }
93 
Reset(GcCause gc_cause,bool clear_soft_references)94 void Iteration::Reset(GcCause gc_cause, bool clear_soft_references) {
95   timings_.Reset();
96   pause_times_.clear();
97   duration_ns_ = 0;
98   bytes_scanned_ = 0;
99   clear_soft_references_ = clear_soft_references;
100   gc_cause_ = gc_cause;
101   freed_ = ObjectBytePair();
102   freed_los_ = ObjectBytePair();
103   freed_bytes_revoke_ = 0;
104 }
105 
GetEstimatedThroughput() const106 uint64_t Iteration::GetEstimatedThroughput() const {
107   // Add 1ms to prevent possible division by 0.
108   return (static_cast<uint64_t>(freed_.bytes) * 1000) / (NsToMs(GetDurationNs()) + 1);
109 }
110 
GarbageCollector(Heap * heap,const std::string & name)111 GarbageCollector::GarbageCollector(Heap* heap, const std::string& name)
112     : heap_(heap),
113       name_(name),
114       pause_histogram_((name_ + " paused").c_str(), kPauseBucketSize, kPauseBucketCount),
115       rss_histogram_((name_ + " peak-rss").c_str(), kMemBucketSize, kMemBucketCount),
116       freed_bytes_histogram_((name_ + " freed-bytes").c_str(), kMemBucketSize, kMemBucketCount),
117       gc_time_histogram_(nullptr),
118       metrics_gc_count_(nullptr),
119       metrics_gc_count_delta_(nullptr),
120       gc_throughput_histogram_(nullptr),
121       gc_tracing_throughput_hist_(nullptr),
122       gc_throughput_avg_(nullptr),
123       gc_tracing_throughput_avg_(nullptr),
124       gc_scanned_bytes_(nullptr),
125       gc_scanned_bytes_delta_(nullptr),
126       gc_freed_bytes_(nullptr),
127       gc_freed_bytes_delta_(nullptr),
128       gc_duration_(nullptr),
129       gc_duration_delta_(nullptr),
130       cumulative_timings_(name),
131       pause_histogram_lock_("pause histogram lock", kDefaultMutexLevel, true),
132       is_transaction_active_(false),
133       are_metrics_initialized_(false) {
134   ResetMeasurements();
135 }
136 
RegisterPause(uint64_t nano_length)137 void GarbageCollector::RegisterPause(uint64_t nano_length) {
138   GetCurrentIteration()->pause_times_.push_back(nano_length);
139 }
140 
ExtractRssFromMincore(std::list<std::pair<void *,void * >> * gc_ranges)141 uint64_t GarbageCollector::ExtractRssFromMincore(
142     std::list<std::pair<void*, void*>>* gc_ranges) {
143   uint64_t rss = 0;
144   if (gc_ranges->empty()) {
145     return 0;
146   }
147   // mincore() is linux-specific syscall.
148 #if defined(__linux__)
149   using range_t = std::pair<void*, void*>;
150   // Sort gc_ranges
151   gc_ranges->sort([](const range_t& a, const range_t& b) {
152     return std::less()(a.first, b.first);
153   });
154   // Merge gc_ranges. It's necessary because the kernel may merge contiguous
155   // regions if their properties match. This is sufficient as kernel doesn't
156   // merge those adjoining ranges which differ only in name.
157   size_t vec_len = 0;
158   for (auto it = gc_ranges->begin(); it != gc_ranges->end(); it++) {
159     auto next_it = it;
160     next_it++;
161     while (next_it != gc_ranges->end()) {
162       if (it->second == next_it->first) {
163         it->second = next_it->second;
164         next_it = gc_ranges->erase(next_it);
165       } else {
166         break;
167       }
168     }
169     size_t length = static_cast<uint8_t*>(it->second) - static_cast<uint8_t*>(it->first);
170     // Compute max length for vector allocation later.
171     vec_len = std::max(vec_len, DivideByPageSize(length));
172   }
173   std::unique_ptr<unsigned char[]> vec(new unsigned char[vec_len]);
174   for (const auto it : *gc_ranges) {
175     size_t length = static_cast<uint8_t*>(it.second) - static_cast<uint8_t*>(it.first);
176     if (mincore(it.first, length, vec.get()) == 0) {
177       for (size_t i = 0; i < DivideByPageSize(length); i++) {
178         // Least significant bit represents residency of a page. Other bits are
179         // reserved.
180         rss += vec[i] & 0x1;
181       }
182     } else {
183       LOG(WARNING) << "Call to mincore() on memory range [0x" << std::hex << it.first
184                    << ", 0x" << it.second << std::dec << ") failed: " << strerror(errno);
185     }
186   }
187   rss *= gPageSize;
188   rss_histogram_.AddValue(rss / KB);
189 #endif
190   return rss;
191 }
192 
Run(GcCause gc_cause,bool clear_soft_references)193 void GarbageCollector::Run(GcCause gc_cause, bool clear_soft_references) {
194   ScopedTrace trace(android::base::StringPrintf("%s %s GC", PrettyCause(gc_cause), GetName()));
195   Thread* self = Thread::Current();
196   Runtime* runtime = Runtime::Current();
197   uint64_t start_time = NanoTime();
198   uint64_t thread_cpu_start_time = ThreadCpuNanoTime();
199   GetHeap()->CalculatePreGcWeightedAllocatedBytes();
200   Iteration* current_iteration = GetCurrentIteration();
201   current_iteration->Reset(gc_cause, clear_soft_references);
202   // Note transaction mode is single-threaded and there's no asynchronous GC and this flag doesn't
203   // change in the middle of a GC.
204   is_transaction_active_ = runtime->IsActiveTransaction();
205   RunPhases();  // Run all the GC phases.
206   GetHeap()->CalculatePostGcWeightedAllocatedBytes();
207   // Add the current timings to the cumulative timings.
208   cumulative_timings_.AddLogger(*GetTimings());
209   // Update cumulative statistics with how many bytes the GC iteration freed.
210   total_freed_objects_ += current_iteration->GetFreedObjects() +
211       current_iteration->GetFreedLargeObjects();
212   total_scanned_bytes_ += current_iteration->GetScannedBytes();
213   int64_t freed_bytes = current_iteration->GetFreedBytes() +
214       current_iteration->GetFreedLargeObjectBytes();
215   total_freed_bytes_ += freed_bytes;
216   // Rounding negative freed bytes to 0 as we are not interested in such corner cases.
217   freed_bytes_histogram_.AddValue(std::max<int64_t>(freed_bytes / KB, 0));
218   uint64_t end_time = NanoTime();
219   uint64_t thread_cpu_end_time = ThreadCpuNanoTime();
220   total_thread_cpu_time_ns_ += thread_cpu_end_time - thread_cpu_start_time;
221   uint64_t duration_ns = end_time - start_time;
222   current_iteration->SetDurationNs(duration_ns);
223   if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
224     // The entire GC was paused, clear the fake pauses which might be in the pause times and add
225     // the whole GC duration.
226     current_iteration->pause_times_.clear();
227     RegisterPause(duration_ns);
228   }
229   total_time_ns_ += duration_ns;
230   uint64_t total_pause_time_ns = 0;
231   for (uint64_t pause_time : current_iteration->GetPauseTimes()) {
232     MutexLock mu(self, pause_histogram_lock_);
233     pause_histogram_.AdjustAndAddValue(pause_time);
234     total_pause_time_ns += pause_time;
235   }
236   metrics::ArtMetrics* metrics = runtime->GetMetrics();
237   // Report STW pause time in microseconds.
238   const uint64_t total_pause_time_us = total_pause_time_ns / 1'000;
239   metrics->WorldStopTimeDuringGCAvg()->Add(total_pause_time_us);
240   metrics->GcWorldStopTime()->Add(total_pause_time_us);
241   metrics->GcWorldStopTimeDelta()->Add(total_pause_time_us);
242   metrics->GcWorldStopCount()->AddOne();
243   metrics->GcWorldStopCountDelta()->AddOne();
244   // Report total collection time of all GCs put together.
245   metrics->TotalGcCollectionTime()->Add(NsToMs(duration_ns));
246   metrics->TotalGcCollectionTimeDelta()->Add(NsToMs(duration_ns));
247   if (are_metrics_initialized_) {
248     metrics_gc_count_->Add(1);
249     metrics_gc_count_delta_->Add(1);
250     // Report GC time in milliseconds.
251     gc_time_histogram_->Add(NsToMs(duration_ns));
252     // Throughput in bytes/s. Add 1us to prevent possible division by 0.
253     uint64_t throughput = (current_iteration->GetScannedBytes() * 1'000'000)
254             / (NsToUs(duration_ns) + 1);
255     // Report in MB/s.
256     throughput /= MB;
257     gc_tracing_throughput_hist_->Add(throughput);
258     gc_tracing_throughput_avg_->Add(throughput);
259 
260     // Report GC throughput in MB/s.
261     throughput = current_iteration->GetEstimatedThroughput() / MB;
262     gc_throughput_histogram_->Add(throughput);
263     gc_throughput_avg_->Add(throughput);
264 
265     gc_scanned_bytes_->Add(current_iteration->GetScannedBytes());
266     gc_scanned_bytes_delta_->Add(current_iteration->GetScannedBytes());
267     gc_freed_bytes_->Add(current_iteration->GetFreedBytes());
268     gc_freed_bytes_delta_->Add(current_iteration->GetFreedBytes());
269     gc_duration_->Add(NsToMs(current_iteration->GetDurationNs()));
270     gc_duration_delta_->Add(NsToMs(current_iteration->GetDurationNs()));
271   }
272 
273   // Report some metrics via the ATrace interface, to surface them in Perfetto.
274   TraceGCMetric("freed_normal_object_bytes", current_iteration->GetFreedBytes());
275   TraceGCMetric("freed_large_object_bytes", current_iteration->GetFreedLargeObjectBytes());
276   TraceGCMetric("freed_bytes", freed_bytes);
277 
278   is_transaction_active_ = false;
279 }
280 
SwapBitmaps()281 void GarbageCollector::SwapBitmaps() {
282   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
283   // Swap the live and mark bitmaps for each alloc space. This is needed since sweep re-swaps
284   // these bitmaps. The bitmap swapping is an optimization so that we do not need to clear the live
285   // bits of dead objects in the live bitmap.
286   const GcType gc_type = GetGcType();
287   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
288     // We never allocate into zygote spaces.
289     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect ||
290         (gc_type == kGcTypeFull &&
291          space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect)) {
292       if (space->GetLiveBitmap() != nullptr && !space->HasBoundBitmaps()) {
293         CHECK(space->IsContinuousMemMapAllocSpace());
294         space->AsContinuousMemMapAllocSpace()->SwapBitmaps();
295       }
296     }
297   }
298   for (const auto& disc_space : GetHeap()->GetDiscontinuousSpaces()) {
299     disc_space->AsLargeObjectSpace()->SwapBitmaps();
300   }
301 }
302 
GetEstimatedMeanThroughput() const303 uint64_t GarbageCollector::GetEstimatedMeanThroughput() const {
304   // Add 1ms to prevent possible division by 0.
305   return (total_freed_bytes_ * 1000) / (NsToMs(GetCumulativeTimings().GetTotalNs()) + 1);
306 }
307 
ResetMeasurements()308 void GarbageCollector::ResetMeasurements() {
309   {
310     MutexLock mu(Thread::Current(), pause_histogram_lock_);
311     pause_histogram_.Reset();
312   }
313   cumulative_timings_.Reset();
314   rss_histogram_.Reset();
315   freed_bytes_histogram_.Reset();
316   total_thread_cpu_time_ns_ = 0u;
317   total_time_ns_ = 0u;
318   total_freed_objects_ = 0u;
319   total_freed_bytes_ = 0;
320   total_scanned_bytes_ = 0u;
321 }
322 
ScopedPause(GarbageCollector * collector,bool with_reporting)323 GarbageCollector::ScopedPause::ScopedPause(GarbageCollector* collector, bool with_reporting)
324     : start_time_(NanoTime()), collector_(collector), with_reporting_(with_reporting) {
325   Runtime* runtime = Runtime::Current();
326   runtime->GetThreadList()->SuspendAll(__FUNCTION__);
327   if (with_reporting) {
328     GcPauseListener* pause_listener = runtime->GetHeap()->GetGcPauseListener();
329     if (pause_listener != nullptr) {
330       pause_listener->StartPause();
331     }
332   }
333 }
334 
~ScopedPause()335 GarbageCollector::ScopedPause::~ScopedPause() {
336   collector_->RegisterPause(NanoTime() - start_time_);
337   Runtime* runtime = Runtime::Current();
338   if (with_reporting_) {
339     GcPauseListener* pause_listener = runtime->GetHeap()->GetGcPauseListener();
340     if (pause_listener != nullptr) {
341       pause_listener->EndPause();
342     }
343   }
344   runtime->GetThreadList()->ResumeAll();
345 }
346 
347 // Returns the current GC iteration and assocated info.
GetCurrentIteration()348 Iteration* GarbageCollector::GetCurrentIteration() {
349   return heap_->GetCurrentGcIteration();
350 }
GetCurrentIteration() const351 const Iteration* GarbageCollector::GetCurrentIteration() const {
352   return heap_->GetCurrentGcIteration();
353 }
354 
ShouldEagerlyReleaseMemoryToOS() const355 bool GarbageCollector::ShouldEagerlyReleaseMemoryToOS() const {
356   Runtime* runtime = Runtime::Current();
357   // Zygote isn't a memory heavy process, we should always instantly release memory to the OS.
358   if (runtime->IsZygote()) {
359     return true;
360   }
361   if (GetCurrentIteration()->GetGcCause() == kGcCauseExplicit &&
362       !runtime->IsEagerlyReleaseExplicitGcDisabled()) {
363     // Our behavior with explicit GCs is to always release any available memory.
364     return true;
365   }
366   // Keep on the memory if the app is in foreground. If it is in background or
367   // goes into the background (see invocation with cause kGcCauseCollectorTransition),
368   // release the memory.
369   return !runtime->InJankPerceptibleProcessState();
370 }
371 
RecordFree(const ObjectBytePair & freed)372 void GarbageCollector::RecordFree(const ObjectBytePair& freed) {
373   GetCurrentIteration()->freed_.Add(freed);
374   heap_->RecordFree(freed.objects, freed.bytes);
375 }
RecordFreeLOS(const ObjectBytePair & freed)376 void GarbageCollector::RecordFreeLOS(const ObjectBytePair& freed) {
377   GetCurrentIteration()->freed_los_.Add(freed);
378   heap_->RecordFree(freed.objects, freed.bytes);
379 }
380 
GetTotalPausedTimeNs()381 uint64_t GarbageCollector::GetTotalPausedTimeNs() {
382   MutexLock mu(Thread::Current(), pause_histogram_lock_);
383   return pause_histogram_.AdjustedSum();
384 }
385 
DumpPerformanceInfo(std::ostream & os)386 void GarbageCollector::DumpPerformanceInfo(std::ostream& os) {
387   const CumulativeLogger& logger = GetCumulativeTimings();
388   const size_t iterations = logger.GetIterations();
389   if (iterations == 0) {
390     return;
391   }
392   os << Dumpable<CumulativeLogger>(logger);
393   const uint64_t total_ns = logger.GetTotalNs();
394   const double seconds = NsToMs(total_ns) / 1000.0;
395   const uint64_t freed_bytes = GetTotalFreedBytes();
396   const uint64_t scanned_bytes = GetTotalScannedBytes();
397   {
398     MutexLock mu(Thread::Current(), pause_histogram_lock_);
399     if (pause_histogram_.SampleSize() > 0) {
400       Histogram<uint64_t>::CumulativeData cumulative_data;
401       pause_histogram_.CreateHistogram(&cumulative_data);
402       pause_histogram_.PrintConfidenceIntervals(os, 0.99, cumulative_data);
403     }
404   }
405 #if defined(__linux__)
406   if (rss_histogram_.SampleSize() > 0) {
407     os << rss_histogram_.Name()
408        << ": Avg: " << PrettySize(rss_histogram_.Mean() * KB)
409        << " Max: " << PrettySize(rss_histogram_.Max() * KB)
410        << " Min: " << PrettySize(rss_histogram_.Min() * KB) << "\n";
411     os << "Peak-rss Histogram: ";
412     rss_histogram_.DumpBins(os);
413     os << "\n";
414   }
415 #endif
416   if (freed_bytes_histogram_.SampleSize() > 0) {
417     os << freed_bytes_histogram_.Name()
418        << ": Avg: " << PrettySize(freed_bytes_histogram_.Mean() * KB)
419        << " Max: " << PrettySize(freed_bytes_histogram_.Max() * KB)
420        << " Min: " << PrettySize(freed_bytes_histogram_.Min() * KB) << "\n";
421     os << "Freed-bytes histogram: ";
422     freed_bytes_histogram_.DumpBins(os);
423     os << "\n";
424   }
425   const double cpu_seconds = NsToMs(GetTotalCpuTime()) / 1000.0;
426   os << GetName() << " total time: " << PrettyDuration(total_ns)
427      << " mean time: " << PrettyDuration(total_ns / iterations) << "\n"
428      << GetName() << " freed: " << PrettySize(freed_bytes) << "\n"
429      << GetName() << " throughput: " << PrettySize(freed_bytes / seconds) << "/s"
430      << "  per cpu-time: "
431      << static_cast<uint64_t>(freed_bytes / cpu_seconds) << "/s / "
432      << PrettySize(freed_bytes / cpu_seconds) << "/s\n"
433      << GetName() << " tracing throughput: "
434      << PrettySize(scanned_bytes / seconds) << "/s "
435      << " per cpu-time: "
436      << PrettySize(scanned_bytes / cpu_seconds) << "/s\n";
437 }
438 
439 }  // namespace collector
440 }  // namespace gc
441 }  // namespace art
442