• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/traced/probes/ftrace/ftrace_controller.h"
18 
19 #include <fcntl.h>
20 #include <poll.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <sys/utsname.h>
25 #include <sys/wait.h>
26 #include <unistd.h>
27 #include <cstdint>
28 
29 #include <limits>
30 #include <memory>
31 #include <optional>
32 #include <string>
33 #include <tuple>
34 #include <utility>
35 
36 #include "perfetto/base/build_config.h"
37 #include "perfetto/base/logging.h"
38 #include "perfetto/base/time.h"
39 #include "perfetto/ext/base/file_utils.h"
40 #include "perfetto/ext/base/metatrace.h"
41 #include "perfetto/ext/base/scoped_file.h"
42 #include "perfetto/ext/base/string_utils.h"
43 #include "perfetto/ext/tracing/core/trace_writer.h"
44 #include "src/kallsyms/kernel_symbol_map.h"
45 #include "src/kallsyms/lazy_kernel_symbolizer.h"
46 #include "src/traced/probes/ftrace/atrace_hal_wrapper.h"
47 #include "src/traced/probes/ftrace/cpu_reader.h"
48 #include "src/traced/probes/ftrace/cpu_stats_parser.h"
49 #include "src/traced/probes/ftrace/event_info.h"
50 #include "src/traced/probes/ftrace/event_info_constants.h"
51 #include "src/traced/probes/ftrace/ftrace_config_muxer.h"
52 #include "src/traced/probes/ftrace/ftrace_config_utils.h"
53 #include "src/traced/probes/ftrace/ftrace_data_source.h"
54 #include "src/traced/probes/ftrace/ftrace_metadata.h"
55 #include "src/traced/probes/ftrace/ftrace_procfs.h"
56 #include "src/traced/probes/ftrace/ftrace_stats.h"
57 #include "src/traced/probes/ftrace/proto_translation_table.h"
58 #include "src/traced/probes/ftrace/vendor_tracepoints.h"
59 
60 namespace perfetto {
61 namespace {
62 
63 constexpr uint32_t kDefaultTickPeriodMs = 100;
64 constexpr uint32_t kPollBackingTickPeriodMs = 1000;
65 constexpr uint32_t kMinTickPeriodMs = 1;
66 constexpr uint32_t kMaxTickPeriodMs = 1000 * 60;
67 constexpr int kPollRequiredMajorVersion = 6;
68 constexpr int kPollRequiredMinorVersion = 1;
69 
70 // Read at most this many pages of data per cpu per read task. If we hit this
71 // limit on at least one cpu, we stop and repost the read task, letting other
72 // tasks get some cpu time before continuing reading.
73 constexpr size_t kMaxPagesPerCpuPerReadTick = 256;  // 1 MB per cpu
74 
WriteToFile(const char * path,const char * str)75 bool WriteToFile(const char* path, const char* str) {
76   auto fd = base::OpenFile(path, O_WRONLY);
77   if (!fd)
78     return false;
79   const size_t str_len = strlen(str);
80   return base::WriteAll(*fd, str, str_len) == static_cast<ssize_t>(str_len);
81 }
82 
ClearFile(const char * path)83 bool ClearFile(const char* path) {
84   auto fd = base::OpenFile(path, O_WRONLY | O_TRUNC);
85   return !!fd;
86 }
87 
ReadFtraceNowTs(const base::ScopedFile & cpu_stats_fd)88 std::optional<int64_t> ReadFtraceNowTs(const base::ScopedFile& cpu_stats_fd) {
89   PERFETTO_CHECK(cpu_stats_fd);
90 
91   char buf[512];
92   ssize_t res = PERFETTO_EINTR(pread(*cpu_stats_fd, buf, sizeof(buf) - 1, 0));
93   if (res <= 0)
94     return std::nullopt;
95   buf[res] = '\0';
96 
97   FtraceCpuStats stats{};
98   DumpCpuStats(buf, &stats);
99   return static_cast<int64_t>(stats.now_ts * 1000 * 1000 * 1000);
100 }
101 
GetAtraceVendorEvents(FtraceProcfs * tracefs)102 std::map<std::string, std::vector<GroupAndName>> GetAtraceVendorEvents(
103     FtraceProcfs* tracefs) {
104 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
105   if (base::FileExists(vendor_tracepoints::kCategoriesFile)) {
106     std::map<std::string, std::vector<GroupAndName>> vendor_evts;
107     base::Status status =
108         vendor_tracepoints::DiscoverAccessibleVendorTracepointsWithFile(
109             vendor_tracepoints::kCategoriesFile, &vendor_evts, tracefs);
110     if (!status.ok()) {
111       PERFETTO_ELOG("Cannot load vendor categories: %s", status.c_message());
112     }
113     return vendor_evts;
114   } else {
115     AtraceHalWrapper hal;
116     return vendor_tracepoints::DiscoverVendorTracepointsWithHal(&hal, tracefs);
117   }
118 #else
119   base::ignore_result(tracefs);
120   return {};
121 #endif
122 }
123 
124 struct AndroidGkiVersion {
125   uint64_t version = 0;
126   uint64_t patch_level = 0;
127   uint64_t sub_level = 0;
128   uint64_t release = 0;
129   uint64_t kmi_gen = 0;
130 };
131 
132 #define ANDROID_GKI_UNAME_FMT \
133   "%" PRIu64 ".%" PRIu64 ".%" PRIu64 "-android%" PRIu64 "-%" PRIu64
134 
ParseAndroidGkiVersion(const char * s)135 std::optional<AndroidGkiVersion> ParseAndroidGkiVersion(const char* s) {
136   AndroidGkiVersion v = {};
137   if (sscanf(s, ANDROID_GKI_UNAME_FMT, &v.version, &v.patch_level, &v.sub_level,
138              &v.release, &v.kmi_gen) != 5) {
139     return std::nullopt;
140   }
141   return v;
142 }
143 
144 }  // namespace
145 
146 // Method of last resort to reset ftrace state.
147 // We don't know what state the rest of the system and process is so as far
148 // as possible avoid allocations.
HardResetFtraceState()149 bool HardResetFtraceState() {
150   for (const char* const* item = FtraceProcfs::kTracingPaths; *item; ++item) {
151     std::string prefix(*item);
152     PERFETTO_CHECK(base::EndsWith(prefix, "/"));
153     bool res = true;
154     res &= WriteToFile((prefix + "tracing_on").c_str(), "0");
155     res &= WriteToFile((prefix + "buffer_size_kb").c_str(), "4");
156     // Not checking success because these files might not be accessible on
157     // older or release builds of Android:
158     WriteToFile((prefix + "events/enable").c_str(), "0");
159     WriteToFile((prefix + "events/raw_syscalls/filter").c_str(), "0");
160     WriteToFile((prefix + "current_tracer").c_str(), "nop");
161     res &= ClearFile((prefix + "trace").c_str());
162     if (res)
163       return true;
164   }
165   return false;
166 }
167 
168 // static
Create(base::TaskRunner * runner,Observer * observer)169 std::unique_ptr<FtraceController> FtraceController::Create(
170     base::TaskRunner* runner,
171     Observer* observer) {
172   std::unique_ptr<FtraceProcfs> ftrace_procfs =
173       FtraceProcfs::CreateGuessingMountPoint("");
174   if (!ftrace_procfs)
175     return nullptr;
176 
177   std::unique_ptr<ProtoTranslationTable> table = ProtoTranslationTable::Create(
178       ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
179   if (!table)
180     return nullptr;
181 
182   auto atrace_wrapper = std::make_unique<AtraceWrapperImpl>();
183 
184   std::map<std::string, std::vector<GroupAndName>> vendor_evts =
185       GetAtraceVendorEvents(ftrace_procfs.get());
186 
187   SyscallTable syscalls = SyscallTable::FromCurrentArch();
188 
189   auto muxer = std::make_unique<FtraceConfigMuxer>(
190       ftrace_procfs.get(), atrace_wrapper.get(), table.get(),
191       std::move(syscalls), vendor_evts);
192   return std::unique_ptr<FtraceController>(new FtraceController(
193       std::move(ftrace_procfs), std::move(table), std::move(atrace_wrapper),
194       std::move(muxer), runner, observer));
195 }
196 
FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,std::unique_ptr<ProtoTranslationTable> table,std::unique_ptr<AtraceWrapper> atrace_wrapper,std::unique_ptr<FtraceConfigMuxer> muxer,base::TaskRunner * task_runner,Observer * observer)197 FtraceController::FtraceController(
198     std::unique_ptr<FtraceProcfs> ftrace_procfs,
199     std::unique_ptr<ProtoTranslationTable> table,
200     std::unique_ptr<AtraceWrapper> atrace_wrapper,
201     std::unique_ptr<FtraceConfigMuxer> muxer,
202     base::TaskRunner* task_runner,
203     Observer* observer)
204     : task_runner_(task_runner),
205       observer_(observer),
206       atrace_wrapper_(std::move(atrace_wrapper)),
207       primary_(std::move(ftrace_procfs), std::move(table), std::move(muxer)),
208       weak_factory_(this) {}
209 
~FtraceController()210 FtraceController::~FtraceController() {
211   while (!data_sources_.empty()) {
212     RemoveDataSource(*data_sources_.begin());
213   }
214   PERFETTO_DCHECK(data_sources_.empty());
215   PERFETTO_DCHECK(primary_.started_data_sources.empty());
216   PERFETTO_DCHECK(primary_.cpu_readers.empty());
217   PERFETTO_DCHECK(secondary_instances_.empty());
218 }
219 
NowMs() const220 uint64_t FtraceController::NowMs() const {
221   return static_cast<uint64_t>(base::GetWallTimeMs().count());
222 }
223 
224 template <typename F>
ForEachInstance(F fn)225 void FtraceController::ForEachInstance(F fn) {
226   fn(&primary_);
227   for (auto& kv : secondary_instances_) {
228     fn(kv.second.get());
229   }
230 }
231 
StartIfNeeded(FtraceInstanceState * instance,const std::string & instance_name)232 void FtraceController::StartIfNeeded(FtraceInstanceState* instance,
233                                      const std::string& instance_name) {
234   if (buffer_watermark_support_ == PollSupport::kUntested) {
235     buffer_watermark_support_ = VerifyKernelSupportForBufferWatermark();
236   }
237 
238   // If instance is already active, then at most we need to update the buffer
239   // poll callbacks. The periodic |ReadTick| will pick up any updates to the
240   // period the next time it executes.
241   if (instance->started_data_sources.size() > 1) {
242     UpdateBufferWatermarkWatches(instance, instance_name);
243     return;
244   }
245 
246   // Lazily allocate the memory used for reading & parsing ftrace. In the case
247   // of multiple ftrace instances, this might already be valid.
248   parsing_mem_.AllocateIfNeeded();
249 
250   const auto ftrace_clock = instance->ftrace_config_muxer->ftrace_clock();
251   size_t num_cpus = instance->ftrace_procfs->NumberOfCpus();
252   PERFETTO_CHECK(instance->cpu_readers.empty());
253   instance->cpu_readers.reserve(num_cpus);
254   for (size_t cpu = 0; cpu < num_cpus; cpu++) {
255     instance->cpu_readers.emplace_back(
256         cpu, instance->ftrace_procfs->OpenPipeForCpu(cpu),
257         instance->table.get(), &symbolizer_, ftrace_clock,
258         &ftrace_clock_snapshot_);
259   }
260 
261   // Special case for primary instance: if not using the boot clock, take
262   // manual clock snapshots so that the trace parser can do a best effort
263   // conversion back to boot. This is primarily for old kernels that predate
264   // boot support, and therefore default to "global" clock.
265   if (instance == &primary_ &&
266       ftrace_clock != protos::pbzero::FtraceClock::FTRACE_CLOCK_UNSPECIFIED) {
267     cpu_zero_stats_fd_ = primary_.ftrace_procfs->OpenCpuStats(0 /* cpu */);
268     MaybeSnapshotFtraceClock();
269   }
270 
271   // Set up poll callbacks for the buffers if requested by at least one DS.
272   UpdateBufferWatermarkWatches(instance, instance_name);
273 
274   // Start a new repeating read task (even if there is already one posted due
275   // to a different ftrace instance). Any old tasks will stop due to generation
276   // checks.
277   auto generation = ++tick_generation_;
278   auto tick_period_ms = GetTickPeriodMs();
279   auto weak_this = weak_factory_.GetWeakPtr();
280   task_runner_->PostDelayedTask(
281       [weak_this, generation] {
282         if (weak_this)
283           weak_this->ReadTick(generation);
284       },
285       tick_period_ms - (NowMs() % tick_period_ms));
286 }
287 
288 // We handle the ftrace buffers in a repeating task (ReadTick). On a given tick,
289 // we iterate over all per-cpu buffers, parse their contents, and then write out
290 // the serialized packets. This is handled by |CpuReader| instances, which
291 // attempt to read from their respective per-cpu buffer fd until they catch up
292 // to the head of the buffer, or hit a transient error.
293 //
294 // The readers work in batches of |kParsingBufferSizePages| pages for cache
295 // locality, and to limit memory usage.
296 //
297 // However, the reading happens on the primary thread, shared with the rest of
298 // the service (including ipc). If there is a lot of ftrace data to read, we
299 // want to yield to the event loop, re-enqueueing a continuation task at the end
300 // of the immediate queue (letting other enqueued tasks to run before
301 // continuing). Therefore we introduce |kMaxPagesPerCpuPerReadTick|.
ReadTick(int generation)302 void FtraceController::ReadTick(int generation) {
303   metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
304                              metatrace::FTRACE_READ_TICK);
305   if (generation != tick_generation_ || GetStartedDataSourcesCount() == 0) {
306     return;
307   }
308   MaybeSnapshotFtraceClock();
309 
310   // Read all per-cpu buffers.
311   bool all_cpus_done = ReadPassForInstance(&primary_);
312   ForEachInstance([&](FtraceInstanceState* instance) {
313     all_cpus_done &= ReadPassForInstance(instance);
314   });
315   observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
316 
317   auto weak_this = weak_factory_.GetWeakPtr();
318   if (!all_cpus_done) {
319     PERFETTO_DLOG("Reposting immediate ReadTick as there's more work.");
320     task_runner_->PostTask([weak_this, generation] {
321       if (weak_this)
322         weak_this->ReadTick(generation);
323     });
324   } else {
325     // Done until next period.
326     auto tick_period_ms = GetTickPeriodMs();
327     task_runner_->PostDelayedTask(
328         [weak_this, generation] {
329           if (weak_this)
330             weak_this->ReadTick(generation);
331         },
332         tick_period_ms - (NowMs() % tick_period_ms));
333   }
334 
335 #if PERFETTO_DCHECK_IS_ON()
336   // OnFtraceDataWrittenIntoDataSourceBuffers() is supposed to clear
337   // all metadata, including the |kernel_addrs| map for symbolization.
338   ForEachInstance([&](FtraceInstanceState* instance) {
339     for (FtraceDataSource* ds : instance->started_data_sources) {
340       FtraceMetadata* ftrace_metadata = ds->mutable_metadata();
341       PERFETTO_DCHECK(ftrace_metadata->kernel_addrs.empty());
342       PERFETTO_DCHECK(ftrace_metadata->last_kernel_addr_index_written == 0);
343     }
344   });
345 #endif
346 }
347 
ReadPassForInstance(FtraceInstanceState * instance)348 bool FtraceController::ReadPassForInstance(FtraceInstanceState* instance) {
349   if (instance->started_data_sources.empty())
350     return true;
351 
352   bool all_cpus_done = true;
353   for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
354     size_t max_pages = kMaxPagesPerCpuPerReadTick;
355     size_t pages_read = instance->cpu_readers[i].ReadCycle(
356         &parsing_mem_, max_pages, instance->started_data_sources);
357     PERFETTO_DCHECK(pages_read <= max_pages);
358     if (pages_read == max_pages) {
359       all_cpus_done = false;
360     }
361   }
362   return all_cpus_done;
363 }
364 
GetTickPeriodMs()365 uint32_t FtraceController::GetTickPeriodMs() {
366   if (data_sources_.empty())
367     return kDefaultTickPeriodMs;
368   uint32_t kUnsetPeriod = std::numeric_limits<uint32_t>::max();
369   uint32_t min_period_ms = kUnsetPeriod;
370   bool using_poll = true;
371   ForEachInstance([&](FtraceInstanceState* instance) {
372     using_poll &= instance->buffer_watches_posted;
373     for (FtraceDataSource* ds : instance->started_data_sources) {
374       if (ds->config().has_drain_period_ms()) {
375         min_period_ms = std::min(min_period_ms, ds->config().drain_period_ms());
376       }
377     }
378   });
379 
380   // None of the active data sources requested an explicit tick period.
381   // The historical default is 100ms, but if we know that all instances are also
382   // using buffer watermark polling, we can raise it. We don't disable the tick
383   // entirely as it spreads the read work more evenly, and ensures procfs
384   // scrapes of seen TIDs are not too stale.
385   if (min_period_ms == kUnsetPeriod) {
386     return using_poll ? kPollBackingTickPeriodMs : kDefaultTickPeriodMs;
387   }
388 
389   if (min_period_ms < kMinTickPeriodMs || min_period_ms > kMaxTickPeriodMs) {
390     PERFETTO_LOG(
391         "drain_period_ms was %u should be between %u and %u. "
392         "Falling back onto a default.",
393         min_period_ms, kMinTickPeriodMs, kMaxTickPeriodMs);
394     return kDefaultTickPeriodMs;
395   }
396   return min_period_ms;
397 }
398 
UpdateBufferWatermarkWatches(FtraceInstanceState * instance,const std::string & instance_name)399 void FtraceController::UpdateBufferWatermarkWatches(
400     FtraceInstanceState* instance,
401     const std::string& instance_name) {
402   PERFETTO_DCHECK(buffer_watermark_support_ != PollSupport::kUntested);
403   if (buffer_watermark_support_ == PollSupport::kUnsupported)
404     return;
405 
406   bool requested_poll = false;
407   for (const FtraceDataSource* ds : instance->started_data_sources) {
408     requested_poll |= ds->config().has_drain_buffer_percent();
409   }
410 
411   if (!requested_poll || instance->buffer_watches_posted)
412     return;
413 
414   auto weak_this = weak_factory_.GetWeakPtr();
415   for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
416     int fd = instance->cpu_readers[i].RawBufferFd();
417     task_runner_->AddFileDescriptorWatch(fd, [weak_this, instance_name, i] {
418       if (weak_this)
419         weak_this->OnBufferPastWatermark(instance_name, i,
420                                          /*repoll_watermark=*/true);
421     });
422   }
423   instance->buffer_watches_posted = true;
424 }
425 
RemoveBufferWatermarkWatches(FtraceInstanceState * instance)426 void FtraceController::RemoveBufferWatermarkWatches(
427     FtraceInstanceState* instance) {
428   if (!instance->buffer_watches_posted)
429     return;
430 
431   for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
432     int fd = instance->cpu_readers[i].RawBufferFd();
433     task_runner_->RemoveFileDescriptorWatch(fd);
434   }
435   instance->buffer_watches_posted = false;
436 }
437 
438 // TODO(rsavitski): consider calling OnFtraceData only if we're not reposting
439 // a continuation. It's a tradeoff between procfs scrape freshness and urgency
440 // to drain ftrace kernel buffers.
OnBufferPastWatermark(std::string instance_name,size_t cpu,bool repoll_watermark)441 void FtraceController::OnBufferPastWatermark(std::string instance_name,
442                                              size_t cpu,
443                                              bool repoll_watermark) {
444   metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
445                              metatrace::FTRACE_CPU_BUFFER_WATERMARK);
446 
447   // Instance might have been stopped before this callback runs.
448   FtraceInstanceState* instance = GetInstance(instance_name);
449   if (!instance || cpu >= instance->cpu_readers.size())
450     return;
451 
452   // Repoll all per-cpu buffers with zero timeout to confirm that at least
453   // one is still past the watermark. This might not be true if a different
454   // callback / readtick / flush did a read pass before this callback reached
455   // the front of the task runner queue.
456   if (repoll_watermark) {
457     size_t num_cpus = instance->cpu_readers.size();
458     std::vector<struct pollfd> pollfds(num_cpus);
459     for (size_t i = 0; i < num_cpus; i++) {
460       pollfds[i].fd = instance->cpu_readers[i].RawBufferFd();
461       pollfds[i].events = POLLIN;
462     }
463     int r = PERFETTO_EINTR(poll(pollfds.data(), num_cpus, 0));
464     if (r < 0) {
465       PERFETTO_DPLOG("poll failed");
466       return;
467     } else if (r == 0) {  // no buffers below the watermark -> we're done.
468       return;
469     }
470     // Count the number of readable fds, as some poll results might be POLLERR,
471     // as seen in cases with offlined cores. It's still fine to attempt reading
472     // from those buffers as CpuReader will handle the ENODEV.
473     bool has_readable_fd = false;
474     for (size_t i = 0; i < num_cpus; i++) {
475       has_readable_fd |= (pollfds[i].revents & POLLIN);
476     }
477     if (!has_readable_fd) {
478       return;
479     }
480   }
481 
482   MaybeSnapshotFtraceClock();
483   bool all_cpus_done = ReadPassForInstance(instance);
484   observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
485   if (!all_cpus_done) {
486     // More data to be read, but we want to let other task_runner tasks to run.
487     // Repost a continuation task.
488     auto weak_this = weak_factory_.GetWeakPtr();
489     task_runner_->PostTask([weak_this, instance_name, cpu] {
490       if (weak_this)
491         weak_this->OnBufferPastWatermark(instance_name, cpu,
492                                          /*repoll_watermark=*/false);
493     });
494   }
495 }
496 
Flush(FlushRequestID flush_id)497 void FtraceController::Flush(FlushRequestID flush_id) {
498   metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
499                              metatrace::FTRACE_CPU_FLUSH);
500 
501   ForEachInstance([&](FtraceInstanceState* instance) {  // for clang-format
502     FlushForInstance(instance);
503   });
504   observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
505 
506   ForEachInstance([&](FtraceInstanceState* instance) {
507     for (FtraceDataSource* ds : instance->started_data_sources) {
508       ds->OnFtraceFlushComplete(flush_id);
509     }
510   });
511 }
512 
FlushForInstance(FtraceInstanceState * instance)513 void FtraceController::FlushForInstance(FtraceInstanceState* instance) {
514   if (instance->started_data_sources.empty())
515     return;
516 
517   // Read all cpus in one go, limiting the per-cpu read amount to make sure we
518   // don't get stuck chasing the writer if there's a very high bandwidth of
519   // events.
520   size_t max_pages = instance->ftrace_config_muxer->GetPerCpuBufferSizePages();
521   for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
522     instance->cpu_readers[i].ReadCycle(&parsing_mem_, max_pages,
523                                        instance->started_data_sources);
524   }
525 }
526 
527 // We are not implicitly flushing on Stop. The tracing service is supposed to
528 // ask for an explicit flush before stopping, unless it needs to perform a
529 // non-graceful stop.
StopIfNeeded(FtraceInstanceState * instance)530 void FtraceController::StopIfNeeded(FtraceInstanceState* instance) {
531   if (!instance->started_data_sources.empty())
532     return;
533 
534   RemoveBufferWatermarkWatches(instance);
535   instance->cpu_readers.clear();
536   if (instance == &primary_) {
537     cpu_zero_stats_fd_.reset();
538   }
539   // Muxer cannot change the current_tracer until we close the trace pipe fds
540   // (i.e. per_cpu). Hence an explicit request here.
541   instance->ftrace_config_muxer->ResetCurrentTracer();
542 
543   DestroyIfUnusedSeconaryInstance(instance);
544 
545   // Clean up global state if done with all data sources.
546   if (!data_sources_.empty())
547     return;
548 
549   if (!retain_ksyms_on_stop_) {
550     symbolizer_.Destroy();
551   }
552   retain_ksyms_on_stop_ = false;
553 
554   // Note: might have never been allocated if data sources were rejected.
555   parsing_mem_.Release();
556 }
557 
AddDataSource(FtraceDataSource * data_source)558 bool FtraceController::AddDataSource(FtraceDataSource* data_source) {
559   if (!ValidConfig(data_source->config()))
560     return false;
561 
562   FtraceInstanceState* instance =
563       GetOrCreateInstance(data_source->config().instance_name());
564   if (!instance)
565     return false;
566 
567   // note: from this point onwards, need to not leak a possibly created
568   // instance if returning early.
569 
570   FtraceConfigId config_id = next_cfg_id_++;
571   if (!instance->ftrace_config_muxer->SetupConfig(
572           config_id, data_source->config(),
573           data_source->mutable_setup_errors())) {
574     DestroyIfUnusedSeconaryInstance(instance);
575     return false;
576   }
577 
578   const FtraceDataSourceConfig* ds_config =
579       instance->ftrace_config_muxer->GetDataSourceConfig(config_id);
580   auto it_and_inserted = data_sources_.insert(data_source);
581   PERFETTO_DCHECK(it_and_inserted.second);
582   data_source->Initialize(config_id, ds_config);
583   return true;
584 }
585 
StartDataSource(FtraceDataSource * data_source)586 bool FtraceController::StartDataSource(FtraceDataSource* data_source) {
587   PERFETTO_DCHECK(data_sources_.count(data_source) > 0);
588 
589   FtraceConfigId config_id = data_source->config_id();
590   PERFETTO_CHECK(config_id);
591   const std::string& instance_name = data_source->config().instance_name();
592   FtraceInstanceState* instance = GetOrCreateInstance(instance_name);
593   PERFETTO_CHECK(instance);
594 
595   if (!instance->ftrace_config_muxer->ActivateConfig(config_id))
596     return false;
597   instance->started_data_sources.insert(data_source);
598   StartIfNeeded(instance, instance_name);
599 
600   // Parse kernel symbols if required by the config. This can be an expensive
601   // operation (cpu-bound for 500ms+), so delay the StartDataSource
602   // acknowledgement until after we're done. This lets a consumer wait for the
603   // expensive work to be done by waiting on the "all data sources started"
604   // fence. This helps isolate the effects of the cpu-bound work on
605   // frequency scaling of cpus when recording benchmarks (b/236143653).
606   // Note that we're already recording data into the kernel ftrace
607   // buffers while doing the symbol parsing.
608   if (data_source->config().symbolize_ksyms()) {
609     symbolizer_.GetOrCreateKernelSymbolMap();
610     // If at least one config sets the KSYMS_RETAIN flag, keep the ksysm map
611     // around in StopIfNeeded().
612     const auto KRET = FtraceConfig::KSYMS_RETAIN;
613     retain_ksyms_on_stop_ |= data_source->config().ksyms_mem_policy() == KRET;
614   }
615 
616   return true;
617 }
618 
RemoveDataSource(FtraceDataSource * data_source)619 void FtraceController::RemoveDataSource(FtraceDataSource* data_source) {
620   size_t removed = data_sources_.erase(data_source);
621   if (!removed)
622     return;  // can happen if AddDataSource failed
623 
624   FtraceInstanceState* instance =
625       GetOrCreateInstance(data_source->config().instance_name());
626   PERFETTO_CHECK(instance);
627 
628   instance->ftrace_config_muxer->RemoveConfig(data_source->config_id());
629   instance->started_data_sources.erase(data_source);
630   StopIfNeeded(instance);
631 }
632 
DumpFtraceStats(FtraceDataSource * data_source,FtraceStats * stats_out)633 void FtraceController::DumpFtraceStats(FtraceDataSource* data_source,
634                                        FtraceStats* stats_out) {
635   FtraceInstanceState* instance =
636       GetInstance(data_source->config().instance_name());
637   PERFETTO_DCHECK(instance);
638   if (!instance)
639     return;
640 
641   DumpAllCpuStats(instance->ftrace_procfs.get(), stats_out);
642   if (symbolizer_.is_valid()) {
643     auto* symbol_map = symbolizer_.GetOrCreateKernelSymbolMap();
644     stats_out->kernel_symbols_parsed =
645         static_cast<uint32_t>(symbol_map->num_syms());
646     stats_out->kernel_symbols_mem_kb =
647         static_cast<uint32_t>(symbol_map->size_bytes() / 1024);
648   }
649 }
650 
MaybeSnapshotFtraceClock()651 void FtraceController::MaybeSnapshotFtraceClock() {
652   if (!cpu_zero_stats_fd_)
653     return;
654 
655   auto ftrace_clock = primary_.ftrace_config_muxer->ftrace_clock();
656   PERFETTO_DCHECK(ftrace_clock != protos::pbzero::FTRACE_CLOCK_UNSPECIFIED);
657 
658   // Snapshot the boot clock *before* reading CPU stats so that
659   // two clocks are as close togher as possible (i.e. if it was the
660   // other way round, we'd skew by the const of string parsing).
661   ftrace_clock_snapshot_.boot_clock_ts = base::GetBootTimeNs().count();
662 
663   // A value of zero will cause this snapshot to be skipped.
664   ftrace_clock_snapshot_.ftrace_clock_ts =
665       ReadFtraceNowTs(cpu_zero_stats_fd_).value_or(0);
666 }
667 
668 FtraceController::PollSupport
VerifyKernelSupportForBufferWatermark()669 FtraceController::VerifyKernelSupportForBufferWatermark() {
670   struct utsname uts = {};
671   if (uname(&uts) < 0 || strcmp(uts.sysname, "Linux") != 0)
672     return PollSupport::kUnsupported;
673   if (!PollSupportedOnKernelVersion(uts.release))
674     return PollSupport::kUnsupported;
675 
676   // buffer_percent exists and is writable
677   auto* tracefs = primary_.ftrace_procfs.get();
678   uint32_t current = tracefs->ReadBufferPercent();
679   if (!tracefs->SetBufferPercent(current ? current : 50)) {
680     return PollSupport::kUnsupported;
681   }
682 
683   // Polling on per_cpu/cpu0/trace_pipe_raw doesn't return errors.
684   base::ScopedFile fd = tracefs->OpenPipeForCpu(0);
685   struct pollfd pollset = {};
686   pollset.fd = fd.get();
687   pollset.events = POLLIN;
688   int r = PERFETTO_EINTR(poll(&pollset, 1, 0));
689   if (r < 0 || (r > 0 && (pollset.revents & POLLERR))) {
690     return PollSupport::kUnsupported;
691   }
692   return PollSupport::kSupported;
693 }
694 
695 // Check kernel version since the poll implementation has historical bugs.
696 // We're looking for at least 6.1 for the following:
697 //   42fb0a1e84ff tracing/ring-buffer: Have polling block on watermark
698 // Otherwise the poll will wake us up as soon as a single byte is in the
699 // buffer. A more conservative check would look for 6.6 for an extra fix that
700 // reduces excessive kernel-space wakeups:
701 //   1e0cb399c765 ring-buffer: Update "shortest_full" in polling
702 // However that doesn't break functionality, so we'll still use poll if
703 // requested by the config.
704 // static
PollSupportedOnKernelVersion(const char * uts_release)705 bool FtraceController::PollSupportedOnKernelVersion(const char* uts_release) {
706   int major = 0, minor = 0;
707   if (sscanf(uts_release, "%d.%d", &major, &minor) != 2) {
708     return false;
709   }
710   if (major < kPollRequiredMajorVersion ||
711       (major == kPollRequiredMajorVersion &&
712        minor < kPollRequiredMinorVersion)) {
713     // Android: opportunistically detect a few select GKI kernels that are known
714     // to have the fixes. Note: 6.1 and 6.6 GKIs are already covered by the
715     // outer check.
716     std::optional<AndroidGkiVersion> gki = ParseAndroidGkiVersion(uts_release);
717     if (!gki.has_value())
718       return false;
719     // android13-5.10.197 or higher sublevel:
720     //   ef47f25e98de ring-buffer: Update "shortest_full" in polling
721     // android13-5.15.133 and
722     // android14-5.15.133 or higher sublevel:
723     //   b5d00cd7db66 ring-buffer: Update "shortest_full" in polling
724     bool gki_patched =
725         (gki->release == 13 && gki->version == 5 && gki->patch_level == 10 &&
726          gki->sub_level >= 197) ||
727         ((gki->release == 13 || gki->release == 14) && gki->version == 5 &&
728          gki->patch_level == 15 && gki->sub_level >= 133);
729     return gki_patched;
730   }
731   return true;
732 }
733 
GetStartedDataSourcesCount()734 size_t FtraceController::GetStartedDataSourcesCount() {
735   size_t cnt = 0;
736   ForEachInstance([&](FtraceInstanceState* instance) {
737     cnt += instance->started_data_sources.size();
738   });
739   return cnt;
740 }
741 
FtraceInstanceState(std::unique_ptr<FtraceProcfs> ft,std::unique_ptr<ProtoTranslationTable> ptt,std::unique_ptr<FtraceConfigMuxer> fcm)742 FtraceController::FtraceInstanceState::FtraceInstanceState(
743     std::unique_ptr<FtraceProcfs> ft,
744     std::unique_ptr<ProtoTranslationTable> ptt,
745     std::unique_ptr<FtraceConfigMuxer> fcm)
746     : ftrace_procfs(std::move(ft)),
747       table(std::move(ptt)),
748       ftrace_config_muxer(std::move(fcm)) {}
749 
GetOrCreateInstance(const std::string & instance_name)750 FtraceController::FtraceInstanceState* FtraceController::GetOrCreateInstance(
751     const std::string& instance_name) {
752   FtraceInstanceState* maybe_existing = GetInstance(instance_name);
753   if (maybe_existing)
754     return maybe_existing;
755 
756   PERFETTO_DCHECK(!instance_name.empty());
757   std::unique_ptr<FtraceInstanceState> instance =
758       CreateSecondaryInstance(instance_name);
759   if (!instance)
760     return nullptr;
761 
762   auto it_and_inserted = secondary_instances_.emplace(
763       std::piecewise_construct, std::forward_as_tuple(instance_name),
764       std::forward_as_tuple(std::move(instance)));
765   PERFETTO_CHECK(it_and_inserted.second);
766   return it_and_inserted.first->second.get();
767 }
768 
GetInstance(const std::string & instance_name)769 FtraceController::FtraceInstanceState* FtraceController::GetInstance(
770     const std::string& instance_name) {
771   if (instance_name.empty())
772     return &primary_;
773 
774   auto it = secondary_instances_.find(instance_name);
775   return it != secondary_instances_.end() ? it->second.get() : nullptr;
776 }
777 
DestroyIfUnusedSeconaryInstance(FtraceInstanceState * instance)778 void FtraceController::DestroyIfUnusedSeconaryInstance(
779     FtraceInstanceState* instance) {
780   if (instance == &primary_)
781     return;
782   for (auto it = secondary_instances_.begin(); it != secondary_instances_.end();
783        ++it) {
784     if (it->second.get() == instance &&
785         instance->ftrace_config_muxer->GetDataSourcesCount() == 0) {
786       // no data sources left referencing this secondary instance
787       secondary_instances_.erase(it);
788       return;
789     }
790   }
791   PERFETTO_FATAL("Bug in ftrace instance lifetimes");
792 }
793 
794 std::unique_ptr<FtraceController::FtraceInstanceState>
CreateSecondaryInstance(const std::string & instance_name)795 FtraceController::CreateSecondaryInstance(const std::string& instance_name) {
796   std::optional<std::string> instance_path = AbsolutePathForInstance(
797       primary_.ftrace_procfs->GetRootPath(), instance_name);
798   if (!instance_path.has_value()) {
799     PERFETTO_ELOG("Invalid ftrace instance name: \"%s\"",
800                   instance_name.c_str());
801     return nullptr;
802   }
803 
804   auto ftrace_procfs = FtraceProcfs::Create(*instance_path);
805   if (!ftrace_procfs) {
806     PERFETTO_ELOG("Failed to create ftrace procfs for \"%s\"",
807                   instance_path->c_str());
808     return nullptr;
809   }
810 
811   auto table = ProtoTranslationTable::Create(
812       ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
813   if (!table) {
814     PERFETTO_ELOG("Failed to create proto translation table for \"%s\"",
815                   instance_path->c_str());
816     return nullptr;
817   }
818 
819   // secondary instances don't support atrace and vendor tracepoint HAL
820   std::map<std::string, std::vector<GroupAndName>> vendor_evts;
821 
822   auto syscalls = SyscallTable::FromCurrentArch();
823 
824   auto muxer = std::make_unique<FtraceConfigMuxer>(
825       ftrace_procfs.get(), atrace_wrapper_.get(), table.get(),
826       std::move(syscalls), vendor_evts,
827       /* secondary_instance= */ true);
828   return std::make_unique<FtraceInstanceState>(
829       std::move(ftrace_procfs), std::move(table), std::move(muxer));
830 }
831 
832 // TODO(rsavitski): we want to eventually add support for the default
833 // (primary_) tracefs path to be an instance itself, at which point we'll need
834 // to be careful to distinguish the tracefs mount point from the default
835 // instance path.
836 // static
AbsolutePathForInstance(const std::string & tracefs_root,const std::string & raw_cfg_name)837 std::optional<std::string> FtraceController::AbsolutePathForInstance(
838     const std::string& tracefs_root,
839     const std::string& raw_cfg_name) {
840   if (base::Contains(raw_cfg_name, '/') ||
841       base::StartsWith(raw_cfg_name, "..")) {
842     return std::nullopt;
843   }
844 
845   // ARM64 pKVM hypervisor tracing emulates an instance, but is not under
846   // instances/, we special-case that name for now.
847   if (raw_cfg_name == "hyp") {
848     std::string hyp_path = tracefs_root + "hyp/";
849     PERFETTO_LOG(
850         "Config specified reserved \"hyp\" instance name, using %s for events.",
851         hyp_path.c_str());
852     return std::make_optional(hyp_path);
853   }
854 
855   return tracefs_root + "instances/" + raw_cfg_name + "/";
856 }
857 
858 FtraceController::Observer::~Observer() = default;
859 
860 }  // namespace perfetto
861