• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/profiling/perf/unwinding.h"
18 
19 #include <mutex>
20 
21 #include <inttypes.h>
22 
23 #include "perfetto/ext/base/metatrace.h"
24 #include "perfetto/ext/base/thread_utils.h"
25 
26 namespace {
27 constexpr size_t kUnwindingMaxFrames = 1000;
28 constexpr uint32_t kDataSourceShutdownRetryDelayMs = 400;
29 
MaybeReleaseAllocatorMemToOS()30 void MaybeReleaseAllocatorMemToOS() {
31 #if defined(__BIONIC__)
32   // TODO(b/152414415): libunwindstack's volume of small allocations is
33   // adverarial to scudo, which doesn't automatically release small
34   // allocation regions back to the OS. Forceful purge does reclaim all size
35   // classes.
36   mallopt(M_PURGE, 0);
37 #endif
38 }
39 
40 }  // namespace
41 
42 namespace perfetto {
43 namespace profiling {
44 
45 Unwinder::Delegate::~Delegate() = default;
46 
Unwinder(Delegate * delegate,base::UnixTaskRunner * task_runner)47 Unwinder::Unwinder(Delegate* delegate, base::UnixTaskRunner* task_runner)
48     : task_runner_(task_runner), delegate_(delegate) {
49   ResetAndEnableUnwindstackCache();
50   base::MaybeSetThreadName("stack-unwinding");
51 }
52 
PostStartDataSource(DataSourceInstanceID ds_id)53 void Unwinder::PostStartDataSource(DataSourceInstanceID ds_id) {
54   // No need for a weak pointer as the associated task runner quits (stops
55   // running tasks) strictly before the Unwinder's destruction.
56   task_runner_->PostTask([this, ds_id] { StartDataSource(ds_id); });
57 }
58 
StartDataSource(DataSourceInstanceID ds_id)59 void Unwinder::StartDataSource(DataSourceInstanceID ds_id) {
60   PERFETTO_DCHECK_THREAD(thread_checker_);
61   PERFETTO_DLOG("Unwinder::StartDataSource(%zu)", static_cast<size_t>(ds_id));
62 
63   auto it_and_inserted = data_sources_.emplace(ds_id, DataSourceState{});
64   PERFETTO_DCHECK(it_and_inserted.second);
65 }
66 
67 // c++11: use shared_ptr to transfer resource handles, so that the resources get
68 // released even if the task runner is destroyed with pending tasks.
69 // "Cleverness" warning:
70 // the task will be executed on a different thread, and will mutate the
71 // pointed-to memory. It may be the case that this posting thread will not
72 // decrement its shared_ptr refcount until *after* the task has executed. In
73 // that scenario, the destruction of the pointed-to memory will be happening on
74 // the posting thread. This implies a data race between the mutation on the task
75 // thread, and the destruction on the posting thread. *However*, we assume that
76 // there is no race in practice due to refcount decrements having
77 // release-acquire semantics. The refcount decrements pair with each other, and
78 // therefore also serve as a memory barrier between the destructor, and any
79 // previous modifications of the pointed-to memory.
80 // TODO(rsavitski): present a more convincing argument, or reimplement
81 // without relying on shared_ptr implementation details.
PostAdoptProcDescriptors(DataSourceInstanceID ds_id,pid_t pid,base::ScopedFile maps_fd,base::ScopedFile mem_fd)82 void Unwinder::PostAdoptProcDescriptors(DataSourceInstanceID ds_id,
83                                         pid_t pid,
84                                         base::ScopedFile maps_fd,
85                                         base::ScopedFile mem_fd) {
86   auto shared_maps = std::make_shared<base::ScopedFile>(std::move(maps_fd));
87   auto shared_mem = std::make_shared<base::ScopedFile>(std::move(mem_fd));
88   task_runner_->PostTask([this, ds_id, pid, shared_maps, shared_mem] {
89     base::ScopedFile maps = std::move(*shared_maps.get());
90     base::ScopedFile mem = std::move(*shared_mem.get());
91     AdoptProcDescriptors(ds_id, pid, std::move(maps), std::move(mem));
92   });
93 }
94 
AdoptProcDescriptors(DataSourceInstanceID ds_id,pid_t pid,base::ScopedFile maps_fd,base::ScopedFile mem_fd)95 void Unwinder::AdoptProcDescriptors(DataSourceInstanceID ds_id,
96                                     pid_t pid,
97                                     base::ScopedFile maps_fd,
98                                     base::ScopedFile mem_fd) {
99   PERFETTO_DCHECK_THREAD(thread_checker_);
100   PERFETTO_DLOG("Unwinder::AdoptProcDescriptors(%zu, %d, %d, %d)",
101                 static_cast<size_t>(ds_id), static_cast<int>(pid),
102                 maps_fd.get(), mem_fd.get());
103 
104   auto it = data_sources_.find(ds_id);
105   PERFETTO_CHECK(it != data_sources_.end());
106   DataSourceState& ds = it->second;
107 
108   ProcessState& proc_state = ds.process_states[pid];  // insert if new
109   PERFETTO_DCHECK(proc_state.status != ProcessState::Status::kResolved);
110   PERFETTO_DCHECK(!proc_state.unwind_state.has_value());
111 
112   PERFETTO_METATRACE_SCOPED(TAG_PRODUCER, PROFILER_MAPS_PARSE);
113 
114   proc_state.status = ProcessState::Status::kResolved;
115   proc_state.unwind_state =
116       UnwindingMetadata{std::move(maps_fd), std::move(mem_fd)};
117 }
118 
PostRecordTimedOutProcDescriptors(DataSourceInstanceID ds_id,pid_t pid)119 void Unwinder::PostRecordTimedOutProcDescriptors(DataSourceInstanceID ds_id,
120                                                  pid_t pid) {
121   task_runner_->PostTask(
122       [this, ds_id, pid] { RecordTimedOutProcDescriptors(ds_id, pid); });
123 }
124 
RecordTimedOutProcDescriptors(DataSourceInstanceID ds_id,pid_t pid)125 void Unwinder::RecordTimedOutProcDescriptors(DataSourceInstanceID ds_id,
126                                              pid_t pid) {
127   PERFETTO_DCHECK_THREAD(thread_checker_);
128   PERFETTO_DLOG("Unwinder::RecordTimedOutProcDescriptors(%zu, %d)",
129                 static_cast<size_t>(ds_id), static_cast<int>(pid));
130 
131   auto it = data_sources_.find(ds_id);
132   PERFETTO_CHECK(it != data_sources_.end());
133   DataSourceState& ds = it->second;
134 
135   ProcessState& proc_state = ds.process_states[pid];  // insert if new
136   PERFETTO_DCHECK(proc_state.status == ProcessState::Status::kResolving);
137   PERFETTO_DCHECK(!proc_state.unwind_state.has_value());
138 
139   proc_state.status = ProcessState::Status::kExpired;
140 }
141 
PostProcessQueue()142 void Unwinder::PostProcessQueue() {
143   task_runner_->PostTask([this] { ProcessQueue(); });
144 }
145 
146 // Note: we always walk the queue in order. So if there are multiple data
147 // sources, one of which is shutting down, its shutdown can be delayed by
148 // unwinding of other sources' samples. Instead, we could scan the queue
149 // multiple times, prioritizing the samples for shutting-down sources. At the
150 // time of writing, the earlier is considered to be fair enough.
ProcessQueue()151 void Unwinder::ProcessQueue() {
152   PERFETTO_DCHECK_THREAD(thread_checker_);
153   PERFETTO_METATRACE_SCOPED(TAG_PRODUCER, PROFILER_UNWIND_TICK);
154   PERFETTO_DLOG("Unwinder::ProcessQueue");
155 
156   base::FlatSet<DataSourceInstanceID> pending_sample_sources =
157       ConsumeAndUnwindReadySamples();
158 
159   // Deal with the possiblity of data sources that are shutting down.
160   bool post_delayed_reprocess = false;
161   base::FlatSet<DataSourceInstanceID> sources_to_stop;
162   for (auto& id_and_ds : data_sources_) {
163     DataSourceInstanceID ds_id = id_and_ds.first;
164     const DataSourceState& ds = id_and_ds.second;
165 
166     if (ds.status == DataSourceState::Status::kActive)
167       continue;
168 
169     // Data source that is shutting down. If we're still waiting on proc-fds (or
170     // the lookup to time out) for samples in the queue - repost a later
171     // attempt (as there is no guarantee that there are any readers waking up
172     // the unwinder anymore).
173     if (pending_sample_sources.count(ds_id)) {
174       PERFETTO_DLOG(
175           "Unwinder delaying DS(%zu) stop: waiting on a pending sample",
176           static_cast<size_t>(ds_id));
177       post_delayed_reprocess = true;
178     } else {
179       // Otherwise, proceed with tearing down data source state (after
180       // completing the loop, to avoid invalidating the iterator).
181       sources_to_stop.insert(ds_id);
182     }
183   }
184 
185   for (auto ds_id : sources_to_stop)
186     FinishDataSourceStop(ds_id);
187 
188   if (post_delayed_reprocess)
189     task_runner_->PostDelayedTask([this] { ProcessQueue(); },
190                                   kDataSourceShutdownRetryDelayMs);
191 }
192 
ConsumeAndUnwindReadySamples()193 base::FlatSet<DataSourceInstanceID> Unwinder::ConsumeAndUnwindReadySamples() {
194   PERFETTO_DCHECK_THREAD(thread_checker_);
195   base::FlatSet<DataSourceInstanceID> pending_sample_sources;
196 
197   // Use a single snapshot of the ring buffer pointers.
198   ReadView read_view = unwind_queue_.BeginRead();
199 
200   PERFETTO_METATRACE_COUNTER(
201       TAG_PRODUCER, PROFILER_UNWIND_QUEUE_SZ,
202       static_cast<int32_t>(read_view.write_pos - read_view.read_pos));
203 
204   if (read_view.read_pos == read_view.write_pos)
205     return pending_sample_sources;
206 
207   // Walk the queue.
208   for (auto read_pos = read_view.read_pos; read_pos < read_view.write_pos;
209        read_pos++) {
210     UnwindEntry& entry = unwind_queue_.at(read_pos);
211 
212     if (!entry.valid)
213       continue;  // already processed
214 
215     auto it = data_sources_.find(entry.data_source_id);
216     PERFETTO_CHECK(it != data_sources_.end());
217     DataSourceState& ds = it->second;
218 
219     pid_t pid = entry.sample.pid;
220     ProcessState& proc_state = ds.process_states[pid];  // insert if new
221 
222     // Giving up on the sample (proc-fd lookup timed out).
223     if (proc_state.status == ProcessState::Status::kExpired) {
224       PERFETTO_DLOG("Unwinder skipping sample for pid [%d]",
225                     static_cast<int>(pid));
226 
227       delegate_->PostEmitUnwinderSkippedSample(entry.data_source_id,
228                                                std::move(entry.sample));
229       entry = UnwindEntry::Invalid();
230       continue;
231     }
232 
233     // Still waiting on the proc-fds.
234     if (proc_state.status == ProcessState::Status::kResolving) {
235       PERFETTO_DLOG("Unwinder deferring sample for pid [%d]",
236                     static_cast<int>(pid));
237 
238       pending_sample_sources.insert(entry.data_source_id);
239       continue;
240     }
241 
242     // Sample ready - process it.
243     if (proc_state.status == ProcessState::Status::kResolved) {
244       // Metatrace: emit both a scoped slice, as well as a "counter"
245       // representing the pid being unwound.
246       PERFETTO_METATRACE_SCOPED(TAG_PRODUCER, PROFILER_UNWIND_SAMPLE);
247       PERFETTO_METATRACE_COUNTER(TAG_PRODUCER, PROFILER_UNWIND_CURRENT_PID,
248                                  static_cast<int32_t>(pid));
249 
250       PERFETTO_CHECK(proc_state.unwind_state.has_value());
251       CompletedSample unwound_sample =
252           UnwindSample(entry.sample, &proc_state.unwind_state.value(),
253                        proc_state.attempted_unwinding);
254       proc_state.attempted_unwinding = true;
255 
256       PERFETTO_METATRACE_COUNTER(TAG_PRODUCER, PROFILER_UNWIND_CURRENT_PID, 0);
257 
258       delegate_->PostEmitSample(entry.data_source_id,
259                                 std::move(unwound_sample));
260       entry = UnwindEntry::Invalid();
261       continue;
262     }
263   }
264 
265   // Consume all leading processed entries in the queue.
266   auto new_read_pos = read_view.read_pos;
267   for (; new_read_pos < read_view.write_pos; new_read_pos++) {
268     UnwindEntry& entry = unwind_queue_.at(new_read_pos);
269     if (entry.valid)
270       break;
271   }
272   if (new_read_pos != read_view.read_pos)
273     unwind_queue_.CommitNewReadPosition(new_read_pos);
274 
275   PERFETTO_METATRACE_COUNTER(
276       TAG_PRODUCER, PROFILER_UNWIND_QUEUE_SZ,
277       static_cast<int32_t>(read_view.write_pos - new_read_pos));
278 
279   PERFETTO_DLOG("Unwind queue drain: [%" PRIu64 "]->[%" PRIu64 "]",
280                 read_view.write_pos - read_view.read_pos,
281                 read_view.write_pos - new_read_pos);
282 
283   return pending_sample_sources;
284 }
285 
UnwindSample(const ParsedSample & sample,UnwindingMetadata * unwind_state,bool pid_unwound_before)286 CompletedSample Unwinder::UnwindSample(const ParsedSample& sample,
287                                        UnwindingMetadata* unwind_state,
288                                        bool pid_unwound_before) {
289   PERFETTO_DCHECK_THREAD(thread_checker_);
290   PERFETTO_DCHECK(unwind_state);
291 
292   CompletedSample ret;
293   ret.cpu = sample.cpu;
294   ret.pid = sample.pid;
295   ret.tid = sample.tid;
296   ret.timestamp = sample.timestamp;
297   ret.cpu_mode = sample.cpu_mode;
298 
299   // Overlay the stack bytes over /proc/<pid>/mem.
300   std::shared_ptr<unwindstack::Memory> overlay_memory =
301       std::make_shared<StackOverlayMemory>(
302           unwind_state->fd_mem, sample.regs->sp(),
303           reinterpret_cast<const uint8_t*>(sample.stack.data()),
304           sample.stack.size());
305 
306   // Unwindstack clobbers registers, so make a copy in case we need to retry.
307   auto regs_copy = std::unique_ptr<unwindstack::Regs>{sample.regs->Clone()};
308 
309   unwindstack::ErrorCode error_code = unwindstack::ERROR_NONE;
310   unwindstack::Unwinder unwinder(kUnwindingMaxFrames, &unwind_state->fd_maps,
311                                  regs_copy.get(), overlay_memory);
312 
313   // TODO(rsavitski): consider rate-limiting unwind retries.
314   for (int attempt = 0; attempt < 2; attempt++) {
315     metatrace::ScopedEvent m(metatrace::TAG_PRODUCER,
316                              pid_unwound_before
317                                  ? metatrace::PROFILER_UNWIND_ATTEMPT
318                                  : metatrace::PROFILER_UNWIND_INITIAL_ATTEMPT);
319 
320 #if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
321     unwinder.SetJitDebug(unwind_state->jit_debug.get(), regs_copy->Arch());
322     unwinder.SetDexFiles(unwind_state->dex_files.get(), regs_copy->Arch());
323 #endif
324     unwinder.Unwind(/*initial_map_names_to_skip=*/nullptr,
325                     /*map_suffixes_to_ignore=*/nullptr);
326     error_code = unwinder.LastErrorCode();
327     if (error_code != unwindstack::ERROR_INVALID_MAP)
328       break;
329 
330     // Otherwise, reparse the maps, and possibly retry the unwind.
331     PERFETTO_DLOG("Reparsing maps for pid [%d]", static_cast<int>(sample.pid));
332     PERFETTO_METATRACE_SCOPED(TAG_PRODUCER, PROFILER_MAPS_REPARSE);
333     unwind_state->ReparseMaps();
334   }
335 
336   PERFETTO_DLOG("Frames from unwindstack for pid [%d]:",
337                 static_cast<int>(sample.pid));
338   std::vector<unwindstack::FrameData> frames = unwinder.ConsumeFrames();
339   ret.frames.reserve(frames.size());
340   for (unwindstack::FrameData& frame : frames) {
341     if (PERFETTO_DLOG_IS_ON())
342       PERFETTO_DLOG("%s", unwinder.FormatFrame(frame).c_str());
343 
344     ret.frames.emplace_back(unwind_state->AnnotateFrame(std::move(frame)));
345   }
346 
347   // In case of an unwinding error, add a synthetic error frame (which will
348   // appear as a caller of the partially-unwound fragment), for easier
349   // visualization of errors.
350   if (error_code != unwindstack::ERROR_NONE) {
351     PERFETTO_DLOG("Unwinding error %" PRIu8, error_code);
352     unwindstack::FrameData frame_data{};
353     frame_data.function_name =
354         "ERROR " + StringifyLibUnwindstackError(error_code);
355     frame_data.map_name = "ERROR";
356     ret.frames.emplace_back(std::move(frame_data), /*build_id=*/"");
357     ret.unwind_error = error_code;
358   }
359 
360   return ret;
361 }
362 
PostInitiateDataSourceStop(DataSourceInstanceID ds_id)363 void Unwinder::PostInitiateDataSourceStop(DataSourceInstanceID ds_id) {
364   task_runner_->PostTask([this, ds_id] { InitiateDataSourceStop(ds_id); });
365 }
366 
InitiateDataSourceStop(DataSourceInstanceID ds_id)367 void Unwinder::InitiateDataSourceStop(DataSourceInstanceID ds_id) {
368   PERFETTO_DCHECK_THREAD(thread_checker_);
369   PERFETTO_DLOG("Unwinder::InitiateDataSourceStop(%zu)",
370                 static_cast<size_t>(ds_id));
371 
372   auto it = data_sources_.find(ds_id);
373   PERFETTO_CHECK(it != data_sources_.end());
374   DataSourceState& ds = it->second;
375 
376   PERFETTO_CHECK(ds.status == DataSourceState::Status::kActive);
377   ds.status = DataSourceState::Status::kShuttingDown;
378 
379   // Make sure that there's an outstanding task to process the unwinding queue,
380   // as it is the point that evaluates the stop condition.
381   PostProcessQueue();
382 }
383 
FinishDataSourceStop(DataSourceInstanceID ds_id)384 void Unwinder::FinishDataSourceStop(DataSourceInstanceID ds_id) {
385   PERFETTO_DCHECK_THREAD(thread_checker_);
386   PERFETTO_DLOG("Unwinder::FinishDataSourceStop(%zu)",
387                 static_cast<size_t>(ds_id));
388 
389   auto it = data_sources_.find(ds_id);
390   PERFETTO_CHECK(it != data_sources_.end());
391   DataSourceState& ds = it->second;
392 
393   // Drop unwinder's state tied to the source.
394   PERFETTO_CHECK(ds.status == DataSourceState::Status::kShuttingDown);
395   data_sources_.erase(it);
396 
397   // Clean up state if there are no more active sources.
398   if (data_sources_.empty())
399     ResetAndEnableUnwindstackCache();
400 
401   // Inform service thread that the unwinder is done with the source.
402   delegate_->PostFinishDataSourceStop(ds_id);
403 }
404 
PostClearCachedStatePeriodic(DataSourceInstanceID ds_id,uint32_t period_ms)405 void Unwinder::PostClearCachedStatePeriodic(DataSourceInstanceID ds_id,
406                                             uint32_t period_ms) {
407   task_runner_->PostDelayedTask(
408       [this, ds_id, period_ms] { ClearCachedStatePeriodic(ds_id, period_ms); },
409       period_ms);
410 }
411 
412 // See header for rationale.
ClearCachedStatePeriodic(DataSourceInstanceID ds_id,uint32_t period_ms)413 void Unwinder::ClearCachedStatePeriodic(DataSourceInstanceID ds_id,
414                                         uint32_t period_ms) {
415   auto it = data_sources_.find(ds_id);
416   if (it == data_sources_.end())
417     return;  // stop the periodic task
418 
419   DataSourceState& ds = it->second;
420   if (ds.status != DataSourceState::Status::kActive)
421     return;
422 
423   PERFETTO_METATRACE_SCOPED(TAG_PRODUCER, PROFILER_UNWIND_CACHE_CLEAR);
424   PERFETTO_DLOG("Clearing unwinder's cached state.");
425 
426   for (auto& pid_and_process : ds.process_states) {
427     pid_and_process.second.unwind_state->fd_maps.Reset();
428   }
429   ResetAndEnableUnwindstackCache();
430   MaybeReleaseAllocatorMemToOS();
431 
432   PostClearCachedStatePeriodic(ds_id, period_ms);  // repost
433 }
434 
ResetAndEnableUnwindstackCache()435 void Unwinder::ResetAndEnableUnwindstackCache() {
436   PERFETTO_DLOG("Resetting unwindstack cache");
437   // Libunwindstack uses an unsynchronized variable for setting/checking whether
438   // the cache is enabled. Therefore unwinding and cache toggling should stay on
439   // the same thread, but we might be moving unwinding across threads if we're
440   // recreating |Unwinder| instances (during a reconnect to traced). Therefore,
441   // use our own static lock to synchronize the cache toggling.
442   // TODO(rsavitski): consider fixing this in libunwindstack itself.
443   static std::mutex* lock = new std::mutex{};
444   std::lock_guard<std::mutex> guard{*lock};
445   unwindstack::Elf::SetCachingEnabled(false);  // free any existing state
446   unwindstack::Elf::SetCachingEnabled(true);   // reallocate a fresh cache
447 }
448 
449 }  // namespace profiling
450 }  // namespace perfetto
451