1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "perfetto_hprof"
18
19 #include "perfetto_hprof.h"
20
21 #include <fcntl.h>
22 #include <fnmatch.h>
23 #include <inttypes.h>
24 #include <sched.h>
25 #include <signal.h>
26 #include <sys/socket.h>
27 #include <sys/stat.h>
28 #include <sys/types.h>
29 #include <sys/un.h>
30 #include <sys/wait.h>
31 #include <thread>
32 #include <time.h>
33
34 #include <limits>
35 #include <optional>
36 #include <type_traits>
37
38 #include "android-base/file.h"
39 #include "android-base/logging.h"
40 #include "android-base/properties.h"
41 #include "base/fast_exit.h"
42 #include "base/systrace.h"
43 #include "gc/heap-visit-objects-inl.h"
44 #include "gc/heap.h"
45 #include "gc/scoped_gc_critical_section.h"
46 #include "mirror/object-refvisitor-inl.h"
47 #include "nativehelper/scoped_local_ref.h"
48 #include "perfetto/profiling/parse_smaps.h"
49 #include "perfetto/trace/interned_data/interned_data.pbzero.h"
50 #include "perfetto/trace/profiling/heap_graph.pbzero.h"
51 #include "perfetto/trace/profiling/profile_common.pbzero.h"
52 #include "perfetto/trace/profiling/smaps.pbzero.h"
53 #include "perfetto/config/profiling/java_hprof_config.pbzero.h"
54 #include "perfetto/protozero/packed_repeated_fields.h"
55 #include "perfetto/tracing.h"
56 #include "runtime-inl.h"
57 #include "runtime_callbacks.h"
58 #include "scoped_thread_state_change-inl.h"
59 #include "thread_list.h"
60 #include "well_known_classes.h"
61 #include "dex/descriptors_names.h"
62
63 // There are three threads involved in this:
64 // * listener thread: this is idle in the background when this plugin gets loaded, and waits
65 // for data on on g_signal_pipe_fds.
66 // * signal thread: an arbitrary thread that handles the signal and writes data to
67 // g_signal_pipe_fds.
68 // * perfetto producer thread: once the signal is received, the app forks. In the newly forked
69 // child, the Perfetto Client API spawns a thread to communicate with traced.
70
71 namespace perfetto_hprof {
72
73 constexpr int kJavaHeapprofdSignal = __SIGRTMIN + 6;
74 constexpr time_t kWatchdogTimeoutSec = 120;
75 // This needs to be lower than the maximum acceptable chunk size, because this
76 // is checked *before* writing another submessage. We conservatively assume
77 // submessages can be up to 100k here for a 500k chunk size.
78 // DropBox has a 500k chunk limit, and each chunk needs to parse as a proto.
79 constexpr uint32_t kPacketSizeThreshold = 400000;
80 constexpr char kByte[1] = {'x'};
GetStateMutex()81 static art::Mutex& GetStateMutex() {
82 static art::Mutex state_mutex("perfetto_hprof_state_mutex", art::LockLevel::kGenericBottomLock);
83 return state_mutex;
84 }
85
GetStateCV()86 static art::ConditionVariable& GetStateCV() {
87 static art::ConditionVariable state_cv("perfetto_hprof_state_cv", GetStateMutex());
88 return state_cv;
89 }
90
91 static int requested_tracing_session_id = 0;
92 static State g_state = State::kUninitialized;
93 static bool g_oome_triggered = false;
94 static uint32_t g_oome_sessions_pending = 0;
95
96 // Pipe to signal from the signal handler into a worker thread that handles the
97 // dump requests.
98 int g_signal_pipe_fds[2];
99 static struct sigaction g_orig_act = {};
100
101 template <typename T>
FindOrAppend(std::map<T,uint64_t> * m,const T & s)102 uint64_t FindOrAppend(std::map<T, uint64_t>* m, const T& s) {
103 auto it = m->find(s);
104 if (it == m->end()) {
105 std::tie(it, std::ignore) = m->emplace(s, m->size());
106 }
107 return it->second;
108 }
109
ArmWatchdogOrDie()110 void ArmWatchdogOrDie() {
111 timer_t timerid{};
112 struct sigevent sev {};
113 sev.sigev_notify = SIGEV_SIGNAL;
114 sev.sigev_signo = SIGKILL;
115
116 if (timer_create(CLOCK_MONOTONIC, &sev, &timerid) == -1) {
117 // This only gets called in the child, so we can fatal without impacting
118 // the app.
119 PLOG(FATAL) << "failed to create watchdog timer";
120 }
121
122 struct itimerspec its {};
123 its.it_value.tv_sec = kWatchdogTimeoutSec;
124
125 if (timer_settime(timerid, 0, &its, nullptr) == -1) {
126 // This only gets called in the child, so we can fatal without impacting
127 // the app.
128 PLOG(FATAL) << "failed to arm watchdog timer";
129 }
130 }
131
132 // Sample entries that match one of the following
133 // start with /system/
134 // start with /vendor/
135 // start with /data/app/
136 // contains "extracted in memory from Y", where Y matches any of the above
ShouldSampleSmapsEntry(const perfetto::profiling::SmapsEntry & e)137 bool ShouldSampleSmapsEntry(const perfetto::profiling::SmapsEntry& e) {
138 if (e.pathname.starts_with("/system/") ||
139 e.pathname.starts_with("/vendor/") ||
140 e.pathname.starts_with("/data/app/")) {
141 return true;
142 }
143 if (e.pathname.starts_with("[anon:")) {
144 if (e.pathname.find("extracted in memory from /system/") != std::string::npos) {
145 return true;
146 }
147 if (e.pathname.find("extracted in memory from /vendor/") != std::string::npos) {
148 return true;
149 }
150 if (e.pathname.find("extracted in memory from /data/app/") != std::string::npos) {
151 return true;
152 }
153 }
154 return false;
155 }
156
GetCurrentBootClockNs()157 uint64_t GetCurrentBootClockNs() {
158 struct timespec ts = {};
159 if (clock_gettime(CLOCK_BOOTTIME, &ts) != 0) {
160 LOG(FATAL) << "Failed to get boottime.";
161 }
162 return ts.tv_sec * 1000000000LL + ts.tv_nsec;
163 }
164
IsDebugBuild()165 bool IsDebugBuild() {
166 std::string build_type = android::base::GetProperty("ro.build.type", "");
167 return !build_type.empty() && build_type != "user";
168 }
169
170 // Verifies the manifest restrictions are respected.
171 // For regular heap dumps this is already handled by heapprofd.
IsOomeHeapDumpAllowed(const perfetto::DataSourceConfig & ds_config)172 bool IsOomeHeapDumpAllowed(const perfetto::DataSourceConfig& ds_config) {
173 if (art::Runtime::Current()->IsJavaDebuggable() || IsDebugBuild()) {
174 return true;
175 }
176
177 if (ds_config.session_initiator() ==
178 perfetto::DataSourceConfig::SESSION_INITIATOR_TRUSTED_SYSTEM) {
179 return art::Runtime::Current()->IsProfileable() || art::Runtime::Current()->IsSystemServer();
180 } else {
181 return art::Runtime::Current()->IsProfileableFromShell();
182 }
183 }
184
185 class JavaHprofDataSource : public perfetto::DataSource<JavaHprofDataSource> {
186 public:
187 constexpr static perfetto::BufferExhaustedPolicy kBufferExhaustedPolicy =
188 perfetto::BufferExhaustedPolicy::kStall;
189
JavaHprofDataSource(bool is_oome_heap)190 explicit JavaHprofDataSource(bool is_oome_heap) : is_oome_heap_(is_oome_heap) {}
191
OnSetup(const SetupArgs & args)192 void OnSetup(const SetupArgs& args) override {
193 if (!is_oome_heap_) {
194 uint64_t normalized_tracing_session_id =
195 args.config->tracing_session_id() % std::numeric_limits<int32_t>::max();
196 if (requested_tracing_session_id < 0) {
197 LOG(ERROR) << "invalid requested tracing session id " << requested_tracing_session_id;
198 return;
199 }
200 if (static_cast<uint64_t>(requested_tracing_session_id) != normalized_tracing_session_id) {
201 return;
202 }
203 }
204
205 // This is on the heap as it triggers -Wframe-larger-than.
206 std::unique_ptr<perfetto::protos::pbzero::JavaHprofConfig::Decoder> cfg(
207 new perfetto::protos::pbzero::JavaHprofConfig::Decoder(
208 args.config->java_hprof_config_raw()));
209
210 dump_smaps_ = cfg->dump_smaps();
211 for (auto it = cfg->ignored_types(); it; ++it) {
212 std::string name = (*it).ToStdString();
213 ignored_types_.emplace_back(art::InversePrettyDescriptor(name));
214 }
215 // This tracing session ID matches the requesting tracing session ID, so we know heapprofd
216 // has verified it targets this process.
217 enabled_ =
218 !is_oome_heap_ || (IsOomeHeapDumpAllowed(*args.config) && IsOomeDumpEnabled(*cfg.get()));
219 }
220
dump_smaps()221 bool dump_smaps() { return dump_smaps_; }
222
223 // Per-DataSource enable bit. Invoked by the ::Trace method.
enabled()224 bool enabled() { return enabled_; }
225
OnStart(const StartArgs &)226 void OnStart(const StartArgs&) override {
227 art::MutexLock lk(art_thread(), GetStateMutex());
228 // In case there are multiple tracing sessions waiting for an OOME error,
229 // there will be a data source instance for each of them. Before the
230 // transition to kStart and signaling the dumping thread, we need to make
231 // sure all the data sources are ready.
232 if (is_oome_heap_ && g_oome_sessions_pending > 0) {
233 --g_oome_sessions_pending;
234 }
235 if (g_state == State::kWaitForStart) {
236 // WriteHeapPackets is responsible for checking whether the DataSource is\
237 // actually enabled.
238 if (!is_oome_heap_ || g_oome_sessions_pending == 0) {
239 g_state = State::kStart;
240 GetStateCV().Broadcast(art_thread());
241 }
242 }
243 }
244
245 // This datasource can be used with a trace config with a short duration_ms
246 // but a long datasource_stop_timeout_ms. In that case, OnStop is called (in
247 // general) before the dump is done. In that case, we handle the stop
248 // asynchronously, and notify the tracing service once we are done.
249 // In case OnStop is called after the dump is done (but before the process)
250 // has exited, we just acknowledge the request.
OnStop(const StopArgs & a)251 void OnStop(const StopArgs& a) override {
252 art::MutexLock lk(art_thread(), finish_mutex_);
253 if (is_finished_) {
254 return;
255 }
256 is_stopped_ = true;
257 async_stop_ = a.HandleStopAsynchronously();
258 }
259
art_thread()260 static art::Thread* art_thread() {
261 // TODO(fmayer): Attach the Perfetto producer thread to ART and give it a name. This is
262 // not trivial, we cannot just attach the first time this method is called, because
263 // AttachCurrentThread deadlocks with the ConditionVariable::Wait in WaitForDataSource.
264 //
265 // We should attach the thread as soon as the Client API spawns it, but that needs more
266 // complicated plumbing.
267 return nullptr;
268 }
269
ignored_types()270 std::vector<std::string> ignored_types() { return ignored_types_; }
271
Finish()272 void Finish() {
273 art::MutexLock lk(art_thread(), finish_mutex_);
274 if (is_stopped_) {
275 async_stop_();
276 } else {
277 is_finished_ = true;
278 }
279 }
280
281 private:
IsOomeDumpEnabled(const perfetto::protos::pbzero::JavaHprofConfig::Decoder & cfg)282 static bool IsOomeDumpEnabled(const perfetto::protos::pbzero::JavaHprofConfig::Decoder& cfg) {
283 std::string cmdline;
284 if (!android::base::ReadFileToString("/proc/self/cmdline", &cmdline)) {
285 return false;
286 }
287 const char* argv0 = cmdline.c_str();
288
289 for (auto it = cfg.process_cmdline(); it; ++it) {
290 std::string pattern = (*it).ToStdString();
291 if (fnmatch(pattern.c_str(), argv0, FNM_NOESCAPE) == 0) {
292 return true;
293 }
294 }
295 return false;
296 }
297
298 bool is_oome_heap_ = false;
299 bool enabled_ = false;
300 bool dump_smaps_ = false;
301 std::vector<std::string> ignored_types_;
302
303 art::Mutex finish_mutex_{"perfetto_hprof_ds_mutex", art::LockLevel::kGenericBottomLock};
304 bool is_finished_ = false;
305 bool is_stopped_ = false;
306 std::function<void()> async_stop_;
307 };
308
SetupDataSource(const std::string & ds_name,bool is_oome_heap)309 void SetupDataSource(const std::string& ds_name, bool is_oome_heap) {
310 perfetto::TracingInitArgs args;
311 args.backends = perfetto::BackendType::kSystemBackend;
312 perfetto::Tracing::Initialize(args);
313
314 perfetto::DataSourceDescriptor dsd;
315 dsd.set_name(ds_name);
316 dsd.set_will_notify_on_stop(true);
317 JavaHprofDataSource::Register(dsd, is_oome_heap);
318 }
319
320 // Waits for the data source OnStart
WaitForDataSource(art::Thread * self)321 void WaitForDataSource(art::Thread* self) {
322 art::MutexLock lk(self, GetStateMutex());
323 while (g_state != State::kStart) {
324 GetStateCV().Wait(self);
325 }
326 }
327
328 // Waits for the data source OnStart with a timeout. Returns false on timeout.
TimedWaitForDataSource(art::Thread * self,int64_t timeout_ms)329 bool TimedWaitForDataSource(art::Thread* self, int64_t timeout_ms) {
330 const uint64_t cutoff_ns = GetCurrentBootClockNs() + timeout_ms * 1000000;
331 art::MutexLock lk(self, GetStateMutex());
332 while (g_state != State::kStart) {
333 const uint64_t current_ns = GetCurrentBootClockNs();
334 if (current_ns >= cutoff_ns) {
335 return false;
336 }
337 GetStateCV().TimedWait(self, (cutoff_ns - current_ns) / 1000000, 0);
338 }
339 return true;
340 }
341
342 // Helper class to write Java heap dumps to `ctx`. The whole heap dump can be
343 // split into more perfetto.protos.HeapGraph messages, to avoid making each
344 // message too big.
345 class Writer {
346 public:
Writer(pid_t pid,JavaHprofDataSource::TraceContext * ctx,uint64_t timestamp)347 Writer(pid_t pid, JavaHprofDataSource::TraceContext* ctx, uint64_t timestamp)
348 : pid_(pid), ctx_(ctx), timestamp_(timestamp),
349 last_written_(ctx_->written()) {}
350
351 // Return whether the next call to GetHeapGraph will create a new TracePacket.
will_create_new_packet() const352 bool will_create_new_packet() const {
353 return !heap_graph_ || ctx_->written() - last_written_ > kPacketSizeThreshold;
354 }
355
GetHeapGraph()356 perfetto::protos::pbzero::HeapGraph* GetHeapGraph() {
357 if (will_create_new_packet()) {
358 CreateNewHeapGraph();
359 }
360 return heap_graph_;
361 }
362
Finalize()363 void Finalize() {
364 if (trace_packet_) {
365 trace_packet_->Finalize();
366 }
367 heap_graph_ = nullptr;
368 }
369
~Writer()370 ~Writer() { Finalize(); }
371
372 private:
373 Writer(const Writer&) = delete;
374 Writer& operator=(const Writer&) = delete;
375 Writer(Writer&&) = delete;
376 Writer& operator=(Writer&&) = delete;
377
CreateNewHeapGraph()378 void CreateNewHeapGraph() {
379 if (heap_graph_) {
380 heap_graph_->set_continued(true);
381 }
382 Finalize();
383
384 uint64_t written = ctx_->written();
385
386 trace_packet_ = ctx_->NewTracePacket();
387 trace_packet_->set_timestamp(timestamp_);
388 heap_graph_ = trace_packet_->set_heap_graph();
389 heap_graph_->set_pid(pid_);
390 heap_graph_->set_index(index_++);
391
392 last_written_ = written;
393 }
394
395 const pid_t pid_;
396 JavaHprofDataSource::TraceContext* const ctx_;
397 const uint64_t timestamp_;
398
399 uint64_t last_written_ = 0;
400
401 perfetto::DataSource<JavaHprofDataSource>::TraceContext::TracePacketHandle
402 trace_packet_;
403 perfetto::protos::pbzero::HeapGraph* heap_graph_ = nullptr;
404
405 uint64_t index_ = 0;
406 };
407
408 class ReferredObjectsFinder {
409 public:
ReferredObjectsFinder(std::vector<std::pair<std::string,art::mirror::Object * >> * referred_objects,bool emit_field_ids)410 explicit ReferredObjectsFinder(
411 std::vector<std::pair<std::string, art::mirror::Object*>>* referred_objects,
412 bool emit_field_ids)
413 : referred_objects_(referred_objects), emit_field_ids_(emit_field_ids) {}
414
415 // For art::mirror::Object::VisitReferences.
operator ()(art::ObjPtr<art::mirror::Object> obj,art::MemberOffset offset,bool is_static) const416 void operator()(art::ObjPtr<art::mirror::Object> obj, art::MemberOffset offset,
417 bool is_static) const
418 REQUIRES_SHARED(art::Locks::mutator_lock_) {
419 if (offset.Uint32Value() == art::mirror::Object::ClassOffset().Uint32Value()) {
420 // Skip shadow$klass pointer.
421 return;
422 }
423 art::mirror::Object* ref = obj->GetFieldObject<art::mirror::Object>(offset);
424 art::ArtField* field;
425 if (is_static) {
426 field = art::ArtField::FindStaticFieldWithOffset(obj->AsClass(), offset.Uint32Value());
427 } else {
428 field = art::ArtField::FindInstanceFieldWithOffset(obj->GetClass(), offset.Uint32Value());
429 }
430 std::string field_name = "";
431 if (field != nullptr && emit_field_ids_) {
432 field_name = field->PrettyField(/*with_type=*/true);
433 }
434 referred_objects_->emplace_back(std::move(field_name), ref);
435 }
436
VisitRootIfNonNull(art::mirror::CompressedReference<art::mirror::Object> * root) const437 void VisitRootIfNonNull(
438 [[maybe_unused]] art::mirror::CompressedReference<art::mirror::Object>* root) const {}
VisitRoot(art::mirror::CompressedReference<art::mirror::Object> * root) const439 void VisitRoot(
440 [[maybe_unused]] art::mirror::CompressedReference<art::mirror::Object>* root) const {}
441
442 private:
443 // We can use a raw Object* pointer here, because there are no concurrent GC threads after the
444 // fork.
445 std::vector<std::pair<std::string, art::mirror::Object*>>* referred_objects_;
446 // Prettifying field names is expensive; avoid if field name will not be used.
447 bool emit_field_ids_;
448 };
449
450 class RootFinder : public art::SingleRootVisitor {
451 public:
RootFinder(std::map<art::RootType,std::vector<art::mirror::Object * >> * root_objects)452 explicit RootFinder(
453 std::map<art::RootType, std::vector<art::mirror::Object*>>* root_objects)
454 : root_objects_(root_objects) {}
455
VisitRoot(art::mirror::Object * root,const art::RootInfo & info)456 void VisitRoot(art::mirror::Object* root, const art::RootInfo& info) override {
457 (*root_objects_)[info.GetType()].emplace_back(root);
458 }
459
460 private:
461 // We can use a raw Object* pointer here, because there are no concurrent GC threads after the
462 // fork.
463 std::map<art::RootType, std::vector<art::mirror::Object*>>* root_objects_;
464 };
465
ToProtoType(art::RootType art_type)466 perfetto::protos::pbzero::HeapGraphRoot::Type ToProtoType(art::RootType art_type) {
467 using perfetto::protos::pbzero::HeapGraphRoot;
468 switch (art_type) {
469 case art::kRootUnknown:
470 return HeapGraphRoot::ROOT_UNKNOWN;
471 case art::kRootJNIGlobal:
472 return HeapGraphRoot::ROOT_JNI_GLOBAL;
473 case art::kRootJNILocal:
474 return HeapGraphRoot::ROOT_JNI_LOCAL;
475 case art::kRootJavaFrame:
476 return HeapGraphRoot::ROOT_JAVA_FRAME;
477 case art::kRootNativeStack:
478 return HeapGraphRoot::ROOT_NATIVE_STACK;
479 case art::kRootStickyClass:
480 return HeapGraphRoot::ROOT_STICKY_CLASS;
481 case art::kRootThreadBlock:
482 return HeapGraphRoot::ROOT_THREAD_BLOCK;
483 case art::kRootMonitorUsed:
484 return HeapGraphRoot::ROOT_MONITOR_USED;
485 case art::kRootThreadObject:
486 return HeapGraphRoot::ROOT_THREAD_OBJECT;
487 case art::kRootInternedString:
488 return HeapGraphRoot::ROOT_INTERNED_STRING;
489 case art::kRootFinalizing:
490 return HeapGraphRoot::ROOT_FINALIZING;
491 case art::kRootDebugger:
492 return HeapGraphRoot::ROOT_DEBUGGER;
493 case art::kRootReferenceCleanup:
494 return HeapGraphRoot::ROOT_REFERENCE_CLEANUP;
495 case art::kRootVMInternal:
496 return HeapGraphRoot::ROOT_VM_INTERNAL;
497 case art::kRootJNIMonitor:
498 return HeapGraphRoot::ROOT_JNI_MONITOR;
499 }
500 }
501
ProtoClassKind(uint32_t class_flags)502 perfetto::protos::pbzero::HeapGraphType::Kind ProtoClassKind(uint32_t class_flags) {
503 using perfetto::protos::pbzero::HeapGraphType;
504 switch (class_flags) {
505 case art::mirror::kClassFlagNormal:
506 case art::mirror::kClassFlagRecord:
507 return HeapGraphType::KIND_NORMAL;
508 case art::mirror::kClassFlagNoReferenceFields:
509 case art::mirror::kClassFlagNoReferenceFields | art::mirror::kClassFlagRecord:
510 return HeapGraphType::KIND_NOREFERENCES;
511 case art::mirror::kClassFlagString | art::mirror::kClassFlagNoReferenceFields:
512 return HeapGraphType::KIND_STRING;
513 case art::mirror::kClassFlagObjectArray:
514 return HeapGraphType::KIND_ARRAY;
515 case art::mirror::kClassFlagClass:
516 return HeapGraphType::KIND_CLASS;
517 case art::mirror::kClassFlagClassLoader:
518 return HeapGraphType::KIND_CLASSLOADER;
519 case art::mirror::kClassFlagDexCache:
520 return HeapGraphType::KIND_DEXCACHE;
521 case art::mirror::kClassFlagSoftReference:
522 return HeapGraphType::KIND_SOFT_REFERENCE;
523 case art::mirror::kClassFlagWeakReference:
524 return HeapGraphType::KIND_WEAK_REFERENCE;
525 case art::mirror::kClassFlagFinalizerReference:
526 return HeapGraphType::KIND_FINALIZER_REFERENCE;
527 case art::mirror::kClassFlagPhantomReference:
528 return HeapGraphType::KIND_PHANTOM_REFERENCE;
529 default:
530 return HeapGraphType::KIND_UNKNOWN;
531 }
532 }
533
PrettyType(art::mirror::Class * klass)534 std::string PrettyType(art::mirror::Class* klass) NO_THREAD_SAFETY_ANALYSIS {
535 if (klass == nullptr) {
536 return "(raw)";
537 }
538 std::string temp;
539 std::string result(art::PrettyDescriptor(klass->GetDescriptor(&temp)));
540 return result;
541 }
542
DumpSmaps(JavaHprofDataSource::TraceContext * ctx)543 void DumpSmaps(JavaHprofDataSource::TraceContext* ctx) {
544 FILE* smaps = fopen("/proc/self/smaps", "re");
545 if (smaps != nullptr) {
546 auto trace_packet = ctx->NewTracePacket();
547 auto* smaps_packet = trace_packet->set_smaps_packet();
548 smaps_packet->set_pid(getpid());
549 perfetto::profiling::ParseSmaps(smaps,
550 [&smaps_packet](const perfetto::profiling::SmapsEntry& e) {
551 if (ShouldSampleSmapsEntry(e)) {
552 auto* smaps_entry = smaps_packet->add_entries();
553 smaps_entry->set_path(e.pathname);
554 smaps_entry->set_size_kb(e.size_kb);
555 smaps_entry->set_private_dirty_kb(e.private_dirty_kb);
556 smaps_entry->set_swap_kb(e.swap_kb);
557 }
558 });
559 fclose(smaps);
560 } else {
561 PLOG(ERROR) << "failed to open smaps";
562 }
563 }
564
GetObjectId(const art::mirror::Object * obj)565 uint64_t GetObjectId(const art::mirror::Object* obj) {
566 return reinterpret_cast<uint64_t>(obj) / std::alignment_of<art::mirror::Object>::value;
567 }
568
569 template <typename F>
ForInstanceReferenceField(art::mirror::Class * klass,F fn)570 void ForInstanceReferenceField(art::mirror::Class* klass, F fn) NO_THREAD_SAFETY_ANALYSIS {
571 for (art::ArtField& af : klass->GetFields()) {
572 if (af.IsStatic() ||
573 af.IsPrimitiveType() ||
574 af.GetOffset().Uint32Value() == art::mirror::Object::ClassOffset().Uint32Value()) {
575 continue;
576 }
577 fn(af.GetOffset());
578 }
579 }
580
EncodedSize(uint64_t n)581 size_t EncodedSize(uint64_t n) {
582 if (n == 0) return 1;
583 return 1 + static_cast<size_t>(art::MostSignificantBit(n)) / 7;
584 }
585
586 // Returns all the references that `*obj` (an object of type `*klass`) is holding.
GetReferences(art::mirror::Object * obj,art::mirror::Class * klass,bool emit_field_ids)587 std::vector<std::pair<std::string, art::mirror::Object*>> GetReferences(art::mirror::Object* obj,
588 art::mirror::Class* klass,
589 bool emit_field_ids)
590 REQUIRES_SHARED(art::Locks::mutator_lock_) {
591 std::vector<std::pair<std::string, art::mirror::Object*>> referred_objects;
592 ReferredObjectsFinder objf(&referred_objects, emit_field_ids);
593
594 uint32_t klass_flags = klass->GetClassFlags();
595 if (klass_flags != art::mirror::kClassFlagNormal &&
596 klass_flags != art::mirror::kClassFlagSoftReference &&
597 klass_flags != art::mirror::kClassFlagWeakReference &&
598 klass_flags != art::mirror::kClassFlagFinalizerReference &&
599 klass_flags != art::mirror::kClassFlagPhantomReference) {
600 obj->VisitReferences(objf, art::VoidFunctor());
601 } else {
602 for (art::mirror::Class* cls = klass; cls != nullptr; cls = cls->GetSuperClass().Ptr()) {
603 ForInstanceReferenceField(cls,
604 [obj, objf](art::MemberOffset offset) NO_THREAD_SAFETY_ANALYSIS {
605 objf(art::ObjPtr<art::mirror::Object>(obj),
606 offset,
607 /*is_static=*/false);
608 });
609 }
610 }
611 return referred_objects;
612 }
613
614 // Returns the base for delta encoding all the `referred_objects`. If delta
615 // encoding would waste space, returns 0.
EncodeBaseObjId(const std::vector<std::pair<std::string,art::mirror::Object * >> & referred_objects,const art::mirror::Object * min_nonnull_ptr)616 uint64_t EncodeBaseObjId(
617 const std::vector<std::pair<std::string, art::mirror::Object*>>& referred_objects,
618 const art::mirror::Object* min_nonnull_ptr) REQUIRES_SHARED(art::Locks::mutator_lock_) {
619 uint64_t base_obj_id = GetObjectId(min_nonnull_ptr);
620 if (base_obj_id <= 1) {
621 return 0;
622 }
623
624 // We need to decrement the base for object ids so that we can tell apart
625 // null references.
626 base_obj_id--;
627 uint64_t bytes_saved = 0;
628 for (const auto& p : referred_objects) {
629 art::mirror::Object* referred_obj = p.second;
630 if (!referred_obj) {
631 continue;
632 }
633 uint64_t referred_obj_id = GetObjectId(referred_obj);
634 bytes_saved += EncodedSize(referred_obj_id) - EncodedSize(referred_obj_id - base_obj_id);
635 }
636
637 // +1 for storing the field id.
638 if (bytes_saved <= EncodedSize(base_obj_id) + 1) {
639 // Subtracting the base ptr gains fewer bytes than it takes to store it.
640 return 0;
641 }
642 return base_obj_id;
643 }
644
645 // Helper to keep intermediate state while dumping objects and classes from ART into
646 // perfetto.protos.HeapGraph.
647 class HeapGraphDumper {
648 public:
649 // Instances of classes whose name is in `ignored_types` will be ignored.
HeapGraphDumper(const std::vector<std::string> & ignored_types)650 explicit HeapGraphDumper(const std::vector<std::string>& ignored_types)
651 : ignored_types_(ignored_types),
652 reference_field_ids_(std::make_unique<protozero::PackedVarInt>()),
653 reference_object_ids_(std::make_unique<protozero::PackedVarInt>()) {}
654
655 // Dumps a heap graph from `*runtime` and writes it to `writer`.
Dump(art::Runtime * runtime,Writer & writer)656 void Dump(art::Runtime* runtime, Writer& writer) REQUIRES(art::Locks::mutator_lock_) {
657 DumpRootObjects(runtime, writer);
658
659 DumpObjects(runtime, writer);
660
661 WriteInternedData(writer);
662 }
663
664 private:
665 // Dumps the root objects from `*runtime` to `writer`.
DumpRootObjects(art::Runtime * runtime,Writer & writer)666 void DumpRootObjects(art::Runtime* runtime, Writer& writer)
667 REQUIRES_SHARED(art::Locks::mutator_lock_) {
668 std::map<art::RootType, std::vector<art::mirror::Object*>> root_objects;
669 RootFinder rcf(&root_objects);
670 runtime->VisitRoots(&rcf);
671 std::unique_ptr<protozero::PackedVarInt> object_ids(new protozero::PackedVarInt);
672 for (const auto& p : root_objects) {
673 const art::RootType root_type = p.first;
674 const std::vector<art::mirror::Object*>& children = p.second;
675 perfetto::protos::pbzero::HeapGraphRoot* root_proto = writer.GetHeapGraph()->add_roots();
676 root_proto->set_root_type(ToProtoType(root_type));
677 for (art::mirror::Object* obj : children) {
678 if (writer.will_create_new_packet()) {
679 root_proto->set_object_ids(*object_ids);
680 object_ids->Reset();
681 root_proto = writer.GetHeapGraph()->add_roots();
682 root_proto->set_root_type(ToProtoType(root_type));
683 }
684 object_ids->Append(GetObjectId(obj));
685 }
686 root_proto->set_object_ids(*object_ids);
687 object_ids->Reset();
688 }
689 }
690
691 // Dumps all the objects from `*runtime` to `writer`.
DumpObjects(art::Runtime * runtime,Writer & writer)692 void DumpObjects(art::Runtime* runtime, Writer& writer) REQUIRES(art::Locks::mutator_lock_) {
693 runtime->GetHeap()->VisitObjectsPaused(
694 [this, &writer](art::mirror::Object* obj)
695 REQUIRES_SHARED(art::Locks::mutator_lock_) { WriteOneObject(obj, writer); });
696 }
697
698 // Writes all the previously accumulated (while dumping objects and roots) interned data to
699 // `writer`.
WriteInternedData(Writer & writer)700 void WriteInternedData(Writer& writer) {
701 for (const auto& p : interned_locations_) {
702 const std::string& str = p.first;
703 uint64_t id = p.second;
704
705 perfetto::protos::pbzero::InternedString* location_proto =
706 writer.GetHeapGraph()->add_location_names();
707 location_proto->set_iid(id);
708 location_proto->set_str(reinterpret_cast<const uint8_t*>(str.c_str()), str.size());
709 }
710 for (const auto& p : interned_fields_) {
711 const std::string& str = p.first;
712 uint64_t id = p.second;
713
714 perfetto::protos::pbzero::InternedString* field_proto =
715 writer.GetHeapGraph()->add_field_names();
716 field_proto->set_iid(id);
717 field_proto->set_str(reinterpret_cast<const uint8_t*>(str.c_str()), str.size());
718 }
719 }
720
721 // Writes `*obj` into `writer`.
WriteOneObject(art::mirror::Object * obj,Writer & writer)722 void WriteOneObject(art::mirror::Object* obj, Writer& writer)
723 REQUIRES_SHARED(art::Locks::mutator_lock_) {
724 if (obj->IsClass()) {
725 WriteClass(obj->AsClass().Ptr(), writer);
726 }
727
728 art::mirror::Class* klass = obj->GetClass();
729 uintptr_t class_ptr = reinterpret_cast<uintptr_t>(klass);
730 // We need to synethesize a new type for Class<Foo>, which does not exist
731 // in the runtime. Otherwise, all the static members of all classes would be
732 // attributed to java.lang.Class.
733 if (klass->IsClassClass()) {
734 class_ptr = WriteSyntheticClassFromObj(obj, writer);
735 }
736
737 if (IsIgnored(obj)) {
738 return;
739 }
740
741 auto class_id = FindOrAppend(&interned_classes_, class_ptr);
742
743 uint64_t object_id = GetObjectId(obj);
744 perfetto::protos::pbzero::HeapGraphObject* object_proto = writer.GetHeapGraph()->add_objects();
745 if (prev_object_id_ && prev_object_id_ < object_id) {
746 object_proto->set_id_delta(object_id - prev_object_id_);
747 } else {
748 object_proto->set_id(object_id);
749 }
750 prev_object_id_ = object_id;
751 object_proto->set_type_id(class_id);
752
753 // Arrays / strings are magic and have an instance dependent size.
754 if (obj->SizeOf() != klass->GetObjectSize()) {
755 object_proto->set_self_size(obj->SizeOf());
756 }
757
758 const art::gc::Heap* heap = art::Runtime::Current()->GetHeap();
759 const auto* space = heap->FindContinuousSpaceFromObject(obj, /*fail_ok=*/true);
760 auto heap_type = perfetto::protos::pbzero::HeapGraphObject::HEAP_TYPE_APP;
761 if (space != nullptr) {
762 if (space->IsZygoteSpace()) {
763 heap_type = perfetto::protos::pbzero::HeapGraphObject::HEAP_TYPE_ZYGOTE;
764 } else if (space->IsImageSpace() && heap->ObjectIsInBootImageSpace(obj)) {
765 heap_type = perfetto::protos::pbzero::HeapGraphObject::HEAP_TYPE_BOOT_IMAGE;
766 }
767 } else {
768 const auto* los = heap->GetLargeObjectsSpace();
769 if (los->Contains(obj) && los->IsZygoteLargeObject(art::Thread::Current(), obj)) {
770 heap_type = perfetto::protos::pbzero::HeapGraphObject::HEAP_TYPE_ZYGOTE;
771 }
772 }
773 if (heap_type != prev_heap_type_) {
774 object_proto->set_heap_type_delta(heap_type);
775 prev_heap_type_ = heap_type;
776 }
777
778 FillReferences(obj, klass, object_proto);
779
780 FillFieldValues(obj, klass, object_proto);
781 }
782
783 // Writes `*klass` into `writer`.
WriteClass(art::mirror::Class * klass,Writer & writer)784 void WriteClass(art::mirror::Class* klass, Writer& writer)
785 REQUIRES_SHARED(art::Locks::mutator_lock_) {
786 perfetto::protos::pbzero::HeapGraphType* type_proto = writer.GetHeapGraph()->add_types();
787 type_proto->set_id(FindOrAppend(&interned_classes_, reinterpret_cast<uintptr_t>(klass)));
788 type_proto->set_class_name(PrettyType(klass));
789 type_proto->set_location_id(FindOrAppend(&interned_locations_, klass->GetLocation()));
790 type_proto->set_object_size(klass->GetObjectSize());
791 type_proto->set_kind(ProtoClassKind(klass->GetClassFlags()));
792 type_proto->set_classloader_id(GetObjectId(klass->GetClassLoader().Ptr()));
793 if (klass->GetSuperClass().Ptr()) {
794 type_proto->set_superclass_id(FindOrAppend(
795 &interned_classes_, reinterpret_cast<uintptr_t>(klass->GetSuperClass().Ptr())));
796 }
797 ForInstanceReferenceField(
798 klass, [klass, this](art::MemberOffset offset) NO_THREAD_SAFETY_ANALYSIS {
799 auto art_field = art::ArtField::FindInstanceFieldWithOffset(klass, offset.Uint32Value());
800 reference_field_ids_->Append(
801 FindOrAppend(&interned_fields_, art_field->PrettyField(true)));
802 });
803 type_proto->set_reference_field_id(*reference_field_ids_);
804 reference_field_ids_->Reset();
805 }
806
807 // Creates a fake class that represents a type only used by `*obj` into `writer`.
WriteSyntheticClassFromObj(art::mirror::Object * obj,Writer & writer)808 uintptr_t WriteSyntheticClassFromObj(art::mirror::Object* obj, Writer& writer)
809 REQUIRES_SHARED(art::Locks::mutator_lock_) {
810 CHECK(obj->IsClass());
811 perfetto::protos::pbzero::HeapGraphType* type_proto = writer.GetHeapGraph()->add_types();
812 // All pointers are at least multiples of two, so this way we can make sure
813 // we are not colliding with a real class.
814 uintptr_t class_ptr = reinterpret_cast<uintptr_t>(obj) | 1;
815 auto class_id = FindOrAppend(&interned_classes_, class_ptr);
816 type_proto->set_id(class_id);
817 type_proto->set_class_name(obj->PrettyTypeOf());
818 type_proto->set_location_id(FindOrAppend(&interned_locations_, obj->AsClass()->GetLocation()));
819 return class_ptr;
820 }
821
822 // Fills `*object_proto` with all the references held by `*obj` (an object of type `*klass`).
FillReferences(art::mirror::Object * obj,art::mirror::Class * klass,perfetto::protos::pbzero::HeapGraphObject * object_proto)823 void FillReferences(art::mirror::Object* obj,
824 art::mirror::Class* klass,
825 perfetto::protos::pbzero::HeapGraphObject* object_proto)
826 REQUIRES_SHARED(art::Locks::mutator_lock_) {
827 const uint32_t klass_flags = klass->GetClassFlags();
828 const bool emit_field_ids = klass_flags != art::mirror::kClassFlagObjectArray &&
829 klass_flags != art::mirror::kClassFlagNormal &&
830 klass_flags != art::mirror::kClassFlagSoftReference &&
831 klass_flags != art::mirror::kClassFlagWeakReference &&
832 klass_flags != art::mirror::kClassFlagFinalizerReference &&
833 klass_flags != art::mirror::kClassFlagPhantomReference;
834 std::vector<std::pair<std::string, art::mirror::Object*>> referred_objects =
835 GetReferences(obj, klass, emit_field_ids);
836
837 art::mirror::Object* min_nonnull_ptr = FilterIgnoredReferencesAndFindMin(referred_objects);
838
839 uint64_t base_obj_id = EncodeBaseObjId(referred_objects, min_nonnull_ptr);
840
841 for (const auto& p : referred_objects) {
842 const std::string& field_name = p.first;
843 art::mirror::Object* referred_obj = p.second;
844 if (emit_field_ids) {
845 reference_field_ids_->Append(FindOrAppend(&interned_fields_, field_name));
846 }
847 uint64_t referred_obj_id = GetObjectId(referred_obj);
848 if (referred_obj_id) {
849 referred_obj_id -= base_obj_id;
850 }
851 reference_object_ids_->Append(referred_obj_id);
852 }
853 if (emit_field_ids) {
854 object_proto->set_reference_field_id(*reference_field_ids_);
855 reference_field_ids_->Reset();
856 }
857 if (base_obj_id) {
858 // The field is called `reference_field_id_base`, but it has always been used as a base for
859 // `reference_object_id`. It should be called `reference_object_id_base`.
860 object_proto->set_reference_field_id_base(base_obj_id);
861 }
862 object_proto->set_reference_object_id(*reference_object_ids_);
863 reference_object_ids_->Reset();
864 }
865
866 // Iterates all the `referred_objects` and sets all the objects that are supposed to be ignored
867 // to nullptr. Returns the object with the smallest address (ignoring nullptr).
FilterIgnoredReferencesAndFindMin(std::vector<std::pair<std::string,art::mirror::Object * >> & referred_objects) const868 art::mirror::Object* FilterIgnoredReferencesAndFindMin(
869 std::vector<std::pair<std::string, art::mirror::Object*>>& referred_objects) const
870 REQUIRES_SHARED(art::Locks::mutator_lock_) {
871 art::mirror::Object* min_nonnull_ptr = nullptr;
872 for (auto& p : referred_objects) {
873 art::mirror::Object*& referred_obj = p.second;
874 if (referred_obj == nullptr)
875 continue;
876 if (IsIgnored(referred_obj)) {
877 referred_obj = nullptr;
878 continue;
879 }
880 if (min_nonnull_ptr == nullptr || min_nonnull_ptr > referred_obj) {
881 min_nonnull_ptr = referred_obj;
882 }
883 }
884 return min_nonnull_ptr;
885 }
886
887 // Fills `*object_proto` with the value of a subset of potentially interesting fields of `*obj`
888 // (an object of type `*klass`).
FillFieldValues(art::mirror::Object * obj,art::mirror::Class * klass,perfetto::protos::pbzero::HeapGraphObject * object_proto) const889 void FillFieldValues(art::mirror::Object* obj,
890 art::mirror::Class* klass,
891 perfetto::protos::pbzero::HeapGraphObject* object_proto) const
892 REQUIRES_SHARED(art::Locks::mutator_lock_) {
893 if (obj->IsClass() || klass->IsClassClass()) {
894 return;
895 }
896
897 for (art::mirror::Class* cls = klass; cls != nullptr; cls = cls->GetSuperClass().Ptr()) {
898 if (cls->IsArrayClass()) {
899 continue;
900 }
901
902 if (cls->DescriptorEquals("Llibcore/util/NativeAllocationRegistry;")) {
903 art::ArtField* af = cls->FindDeclaredInstanceField(
904 "size", art::Primitive::Descriptor(art::Primitive::kPrimLong));
905 if (af) {
906 object_proto->set_native_allocation_registry_size_field(af->GetLong(obj));
907 }
908 }
909 }
910 }
911
912 // Returns true if `*obj` has a type that's supposed to be ignored.
IsIgnored(art::mirror::Object * obj) const913 bool IsIgnored(art::mirror::Object* obj) const REQUIRES_SHARED(art::Locks::mutator_lock_) {
914 if (obj->IsClass()) {
915 return false;
916 }
917 art::mirror::Class* klass = obj->GetClass();
918 std::string temp;
919 std::string_view name(klass->GetDescriptor(&temp));
920 return std::find(ignored_types_.begin(), ignored_types_.end(), name) != ignored_types_.end();
921 }
922
923 // Name of classes whose instances should be ignored.
924 const std::vector<std::string> ignored_types_;
925
926 // Make sure that intern ID 0 (default proto value for a uint64_t) always maps to ""
927 // (default proto value for a string) or to 0 (default proto value for a uint64).
928
929 // Map from string (the field name) to its index in perfetto.protos.HeapGraph.field_names
930 std::map<std::string, uint64_t> interned_fields_{{"", 0}};
931 // Map from string (the location name) to its index in perfetto.protos.HeapGraph.location_names
932 std::map<std::string, uint64_t> interned_locations_{{"", 0}};
933 // Map from addr (the class pointer) to its id in perfetto.protos.HeapGraph.types
934 std::map<uintptr_t, uint64_t> interned_classes_{{0, 0}};
935
936 // Temporary buffers: used locally in some methods and then cleared.
937 std::unique_ptr<protozero::PackedVarInt> reference_field_ids_;
938 std::unique_ptr<protozero::PackedVarInt> reference_object_ids_;
939
940 // Id of the previous object that was dumped. Used for delta encoding.
941 uint64_t prev_object_id_ = 0;
942 // Heap type of the previous object that was dumped. Used for delta encoding.
943 perfetto::protos::pbzero::HeapGraphObject::HeapType prev_heap_type_ =
944 perfetto::protos::pbzero::HeapGraphObject::HEAP_TYPE_UNKNOWN;
945 };
946
947 // waitpid with a timeout implemented by ~busy-waiting
948 // See b/181031512 for rationale.
BusyWaitpid(pid_t pid,uint32_t timeout_ms)949 void BusyWaitpid(pid_t pid, uint32_t timeout_ms) {
950 for (size_t i = 0;; ++i) {
951 if (i == timeout_ms) {
952 // The child hasn't exited.
953 // Give up and SIGKILL it. The next waitpid should succeed.
954 LOG(ERROR) << "perfetto_hprof child timed out. Sending SIGKILL.";
955 kill(pid, SIGKILL);
956 }
957 int stat_loc;
958 pid_t wait_result = waitpid(pid, &stat_loc, WNOHANG);
959 if (wait_result == -1 && errno != EINTR) {
960 if (errno != ECHILD) {
961 // This hopefully never happens (should only be EINVAL).
962 PLOG(FATAL_WITHOUT_ABORT) << "waitpid";
963 }
964 // If we get ECHILD, the parent process was handling SIGCHLD, or did a wildcard wait.
965 // The child is no longer here either way, so that's good enough for us.
966 break;
967 } else if (wait_result > 0) {
968 break;
969 } else { // wait_result == 0 || errno == EINTR.
970 usleep(1000);
971 }
972 }
973 }
974
975 enum class ResumeParentPolicy {
976 IMMEDIATELY,
977 DEFERRED
978 };
979
ForkUnderThreadListLock(art::Thread * self)980 pid_t ForkUnderThreadListLock(art::Thread* self) {
981 art::MutexLock lk(self, *art::Locks::thread_list_lock_);
982 return fork();
983 }
984
ForkAndRun(art::Thread * self,ResumeParentPolicy resume_parent_policy,const std::function<void (pid_t child)> & parent_runnable,const std::function<void (pid_t parent,uint64_t timestamp)> & child_runnable)985 void ForkAndRun(art::Thread* self,
986 ResumeParentPolicy resume_parent_policy,
987 const std::function<void(pid_t child)>& parent_runnable,
988 const std::function<void(pid_t parent, uint64_t timestamp)>& child_runnable) {
989 pid_t parent_pid = getpid();
990 LOG(INFO) << "forking for " << parent_pid;
991 // Need to take a heap dump while GC isn't running. See the comment in
992 // Heap::VisitObjects(). Also we need the critical section to avoid visiting
993 // the same object twice. See b/34967844.
994 //
995 // We need to do this before the fork, because otherwise it can deadlock
996 // waiting for the GC, as all other threads get terminated by the clone, but
997 // their locks are not released.
998 // We must also avoid any logd logging actions on the forked process; art LogdLoggerLocked
999 // serializes logging from different threads via a mutex.
1000 // This does not perfectly solve all fork-related issues, as there could still be threads that
1001 // are unaffected by ScopedSuspendAll and in a non-fork-friendly situation
1002 // (e.g. inside a malloc holding a lock). This situation is quite rare, and in that case we will
1003 // hit the watchdog in the grand-child process if it gets stuck.
1004 std::optional<art::gc::ScopedGCCriticalSection> gcs(std::in_place, self, art::gc::kGcCauseHprof,
1005 art::gc::kCollectorTypeHprof);
1006
1007 std::optional<art::ScopedSuspendAll> ssa(std::in_place, __FUNCTION__, /* long_suspend=*/ true);
1008
1009 // Optimistically get the thread_list_lock_ to avoid the child process deadlocking
1010 pid_t pid = ForkUnderThreadListLock(self);
1011 if (pid == -1) {
1012 // Fork error.
1013 PLOG(ERROR) << "fork";
1014 return;
1015 }
1016 if (pid != 0) {
1017 // Parent
1018 if (resume_parent_policy == ResumeParentPolicy::IMMEDIATELY) {
1019 // Stop the thread suspension as soon as possible to allow the rest of the application to
1020 // continue while we waitpid here.
1021 ssa.reset();
1022 gcs.reset();
1023 }
1024 parent_runnable(pid);
1025 if (resume_parent_policy != ResumeParentPolicy::IMMEDIATELY) {
1026 ssa.reset();
1027 gcs.reset();
1028 }
1029 return;
1030 }
1031 // The following code is only executed by the child of the original process.
1032 // Uninstall signal handler, so we don't trigger a profile on it.
1033 if (sigaction(kJavaHeapprofdSignal, &g_orig_act, nullptr) != 0) {
1034 close(g_signal_pipe_fds[0]);
1035 close(g_signal_pipe_fds[1]);
1036 PLOG(FATAL) << "Failed to sigaction";
1037 return;
1038 }
1039
1040 uint64_t ts = GetCurrentBootClockNs();
1041 child_runnable(parent_pid, ts);
1042 // Prevent the `atexit` handlers from running. We do not want to call cleanup
1043 // functions the parent process has registered.
1044 art::FastExit(0);
1045 }
1046
WriteHeapPackets(pid_t parent_pid,uint64_t timestamp)1047 void WriteHeapPackets(pid_t parent_pid, uint64_t timestamp) {
1048 JavaHprofDataSource::Trace(
1049 [parent_pid, timestamp](JavaHprofDataSource::TraceContext ctx)
1050 NO_THREAD_SAFETY_ANALYSIS {
1051 bool dump_smaps;
1052 std::vector<std::string> ignored_types;
1053 {
1054 auto ds = ctx.GetDataSourceLocked();
1055 if (!ds || !ds->enabled()) {
1056 if (ds) ds->Finish();
1057 LOG(INFO) << "skipping irrelevant data source.";
1058 return;
1059 }
1060 dump_smaps = ds->dump_smaps();
1061 ignored_types = ds->ignored_types();
1062 }
1063 art::ScopedTrace trace("ART heap dump for " + std::to_string(parent_pid));
1064 if (dump_smaps) {
1065 DumpSmaps(&ctx);
1066 }
1067 Writer writer(parent_pid, &ctx, timestamp);
1068 HeapGraphDumper dumper(ignored_types);
1069
1070 dumper.Dump(art::Runtime::Current(), writer);
1071
1072 writer.Finalize();
1073 ctx.Flush([] {
1074 art::MutexLock lk(JavaHprofDataSource::art_thread(), GetStateMutex());
1075 g_state = State::kEnd;
1076 GetStateCV().Broadcast(JavaHprofDataSource::art_thread());
1077 });
1078 // Wait for the Flush that will happen on the Perfetto thread.
1079 {
1080 art::MutexLock lk(JavaHprofDataSource::art_thread(), GetStateMutex());
1081 while (g_state != State::kEnd) {
1082 GetStateCV().Wait(JavaHprofDataSource::art_thread());
1083 }
1084 }
1085 {
1086 auto ds = ctx.GetDataSourceLocked();
1087 if (ds) {
1088 ds->Finish();
1089 } else {
1090 LOG(ERROR) << "datasource timed out (duration_ms + datasource_stop_timeout_ms) "
1091 "before dump finished";
1092 }
1093 }
1094 });
1095 }
1096
DumpPerfetto(art::Thread * self)1097 void DumpPerfetto(art::Thread* self) {
1098 ForkAndRun(
1099 self,
1100 ResumeParentPolicy::IMMEDIATELY,
1101 // parent thread
1102 [](pid_t child) {
1103 // Busy waiting here will introduce some extra latency, but that is okay because we have
1104 // already unsuspended all other threads. This runs on the perfetto_hprof_listener, which
1105 // is not needed for progress of the app itself.
1106 // We daemonize the child process, so effectively we only need to wait
1107 // for it to fork and exit.
1108 BusyWaitpid(child, 1000);
1109 },
1110 // child thread
1111 [self](pid_t dumped_pid, uint64_t timestamp) {
1112 // Daemon creates a new process that is the grand-child of the original process, and exits.
1113 if (daemon(0, 0) == -1) {
1114 PLOG(FATAL) << "daemon";
1115 }
1116 // The following code is only executed by the grand-child of the original process.
1117
1118 // Make sure that this is the first thing we do after forking, so if anything
1119 // below hangs, the fork will go away from the watchdog.
1120 ArmWatchdogOrDie();
1121 SetupDataSource("android.java_hprof", false);
1122 WaitForDataSource(self);
1123 WriteHeapPackets(dumped_pid, timestamp);
1124 LOG(INFO) << "finished dumping heap for " << dumped_pid;
1125 });
1126 }
1127
DumpPerfettoOutOfMemory()1128 void DumpPerfettoOutOfMemory() REQUIRES_SHARED(art::Locks::mutator_lock_) {
1129 art::Thread* self = art::Thread::Current();
1130 if (!self) {
1131 LOG(FATAL_WITHOUT_ABORT) << "no thread in DumpPerfettoOutOfMemory";
1132 return;
1133 }
1134
1135 // Ensure that there is an active, armed tracing session
1136 uint32_t session_cnt =
1137 android::base::GetUintProperty<uint32_t>("traced.oome_heap_session.count", 0);
1138 if (session_cnt == 0) {
1139 return;
1140 }
1141 {
1142 // OutOfMemoryErrors are reentrant, make sure we do not fork and process
1143 // more than once.
1144 art::MutexLock lk(self, GetStateMutex());
1145 if (g_oome_triggered) {
1146 return;
1147 }
1148 g_oome_triggered = true;
1149 g_oome_sessions_pending = session_cnt;
1150 }
1151
1152 art::ScopedThreadSuspension sts(self, art::ThreadState::kSuspended);
1153 // If we fork & resume the original process execution it will most likely exit
1154 // ~immediately due to the OOME error thrown. When the system detects that
1155 // that, it will cleanup by killing all processes in the cgroup (including
1156 // the process we just forked).
1157 // We need to avoid the race between the heap dump and the process group
1158 // cleanup, and the only way to do this is to avoid resuming the original
1159 // process until the heap dump is complete.
1160 // Given we are already about to crash anyway, the diagnostic data we get
1161 // outweighs the cost of introducing some latency.
1162 ForkAndRun(
1163 self,
1164 ResumeParentPolicy::DEFERRED,
1165 // parent process
1166 [](pid_t child) {
1167 // waitpid to reap the zombie
1168 // we are explicitly waiting for the child to exit
1169 // The reason for the timeout on top of the watchdog is that it is
1170 // possible (albeit unlikely) that even the watchdog will fail to be
1171 // activated in the case of an atfork handler.
1172 BusyWaitpid(child, kWatchdogTimeoutSec * 1000);
1173 },
1174 // child process
1175 [self](pid_t dumped_pid, uint64_t timestamp) {
1176 ArmWatchdogOrDie();
1177 art::SetThreadName("perfetto_oome_hprof");
1178 art::ScopedTrace trace("perfetto_hprof oome");
1179 SetupDataSource("android.java_hprof.oom", true);
1180 perfetto::Tracing::ActivateTriggers({"com.android.telemetry.art-outofmemory"}, 500);
1181
1182 // A pre-armed tracing session might not exist, so we should wait for a
1183 // limited amount of time before we decide to let the execution continue.
1184 if (!TimedWaitForDataSource(self, 1000)) {
1185 LOG(INFO) << "OOME hprof timeout (state " << g_state << ")";
1186 return;
1187 }
1188 WriteHeapPackets(dumped_pid, timestamp);
1189 LOG(INFO) << "OOME hprof complete for " << dumped_pid;
1190 });
1191 }
1192
1193 // The plugin initialization function.
ArtPlugin_Initialize()1194 extern "C" bool ArtPlugin_Initialize() {
1195 if (art::Runtime::Current() == nullptr) {
1196 return false;
1197 }
1198 art::Thread* self = art::Thread::Current();
1199 {
1200 art::MutexLock lk(self, GetStateMutex());
1201 if (g_state != State::kUninitialized) {
1202 LOG(ERROR) << "perfetto_hprof already initialized. state: " << g_state;
1203 return false;
1204 }
1205 g_state = State::kWaitForListener;
1206 }
1207
1208 if (pipe2(g_signal_pipe_fds, O_CLOEXEC) == -1) {
1209 PLOG(ERROR) << "Failed to pipe";
1210 return false;
1211 }
1212
1213 struct sigaction act = {};
1214 act.sa_flags = SA_SIGINFO | SA_RESTART;
1215 act.sa_sigaction = [](int, siginfo_t* si, void*) {
1216 requested_tracing_session_id = si->si_value.sival_int;
1217 if (write(g_signal_pipe_fds[1], kByte, sizeof(kByte)) == -1) {
1218 PLOG(ERROR) << "Failed to trigger heap dump";
1219 }
1220 };
1221
1222 // TODO(fmayer): We can probably use the SignalCatcher thread here to not
1223 // have an idle thread.
1224 if (sigaction(kJavaHeapprofdSignal, &act, &g_orig_act) != 0) {
1225 close(g_signal_pipe_fds[0]);
1226 close(g_signal_pipe_fds[1]);
1227 PLOG(ERROR) << "Failed to sigaction";
1228 return false;
1229 }
1230
1231 std::thread th([] {
1232 art::Runtime* runtime = art::Runtime::Current();
1233 if (!runtime) {
1234 LOG(FATAL_WITHOUT_ABORT) << "no runtime in perfetto_hprof_listener";
1235 return;
1236 }
1237 if (!runtime->AttachCurrentThread("perfetto_hprof_listener", /*as_daemon=*/ true,
1238 runtime->GetSystemThreadGroup(), /*create_peer=*/ false)) {
1239 LOG(ERROR) << "failed to attach thread.";
1240 {
1241 art::MutexLock lk(nullptr, GetStateMutex());
1242 g_state = State::kUninitialized;
1243 GetStateCV().Broadcast(nullptr);
1244 }
1245
1246 return;
1247 }
1248 art::Thread* self = art::Thread::Current();
1249 if (!self) {
1250 LOG(FATAL_WITHOUT_ABORT) << "no thread in perfetto_hprof_listener";
1251 return;
1252 }
1253 {
1254 art::MutexLock lk(self, GetStateMutex());
1255 if (g_state == State::kWaitForListener) {
1256 g_state = State::kWaitForStart;
1257 GetStateCV().Broadcast(self);
1258 }
1259 }
1260 char buf[1];
1261 for (;;) {
1262 int res;
1263 do {
1264 res = read(g_signal_pipe_fds[0], buf, sizeof(buf));
1265 } while (res == -1 && errno == EINTR);
1266
1267 if (res <= 0) {
1268 if (res == -1) {
1269 PLOG(ERROR) << "failed to read";
1270 }
1271 close(g_signal_pipe_fds[0]);
1272 return;
1273 }
1274
1275 perfetto_hprof::DumpPerfetto(self);
1276 }
1277 });
1278 th.detach();
1279
1280 // Register the OOM error handler.
1281 art::Runtime::Current()->SetOutOfMemoryErrorHook(perfetto_hprof::DumpPerfettoOutOfMemory);
1282
1283 return true;
1284 }
1285
ArtPlugin_Deinitialize()1286 extern "C" bool ArtPlugin_Deinitialize() {
1287 art::Runtime::Current()->SetOutOfMemoryErrorHook(nullptr);
1288
1289 if (sigaction(kJavaHeapprofdSignal, &g_orig_act, nullptr) != 0) {
1290 PLOG(ERROR) << "failed to reset signal handler";
1291 // We cannot close the pipe if the signal handler wasn't unregistered,
1292 // to avoid receiving SIGPIPE.
1293 return false;
1294 }
1295 close(g_signal_pipe_fds[1]);
1296
1297 art::Thread* self = art::Thread::Current();
1298 art::MutexLock lk(self, GetStateMutex());
1299 // Wait until after the thread was registered to the runtime. This is so
1300 // we do not attempt to register it with the runtime after it had been torn
1301 // down (ArtPlugin_Deinitialize gets called in the Runtime dtor).
1302 while (g_state == State::kWaitForListener) {
1303 GetStateCV().Wait(art::Thread::Current());
1304 }
1305 g_state = State::kUninitialized;
1306 GetStateCV().Broadcast(self);
1307 return true;
1308 }
1309
1310 } // namespace perfetto_hprof
1311
1312 namespace perfetto {
1313
1314 PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(perfetto_hprof::JavaHprofDataSource);
1315
1316 }
1317