1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/traced/probes/ftrace/ftrace_controller.h"
18 
19 #include <fcntl.h>
20 #include <poll.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <sys/utsname.h>
25 #include <sys/wait.h>
26 #include <unistd.h>
27 #include <cstdint>
28 
29 #include <limits>
30 #include <memory>
31 #include <optional>
32 #include <string>
33 #include <tuple>
34 #include <utility>
35 
36 #include "perfetto/base/build_config.h"
37 #include "perfetto/base/logging.h"
38 #include "perfetto/base/time.h"
39 #include "perfetto/ext/base/file_utils.h"
40 #include "perfetto/ext/base/metatrace.h"
41 #include "perfetto/ext/base/scoped_file.h"
42 #include "perfetto/ext/base/string_splitter.h"
43 #include "perfetto/ext/base/string_utils.h"
44 #include "perfetto/ext/tracing/core/trace_writer.h"
45 #include "src/kallsyms/kernel_symbol_map.h"
46 #include "src/kallsyms/lazy_kernel_symbolizer.h"
47 #include "src/traced/probes/ftrace/atrace_hal_wrapper.h"
48 #include "src/traced/probes/ftrace/cpu_reader.h"
49 #include "src/traced/probes/ftrace/cpu_stats_parser.h"
50 #include "src/traced/probes/ftrace/event_info.h"
51 #include "src/traced/probes/ftrace/event_info_constants.h"
52 #include "src/traced/probes/ftrace/ftrace_config_muxer.h"
53 #include "src/traced/probes/ftrace/ftrace_config_utils.h"
54 #include "src/traced/probes/ftrace/ftrace_data_source.h"
55 #include "src/traced/probes/ftrace/ftrace_metadata.h"
56 #include "src/traced/probes/ftrace/ftrace_procfs.h"
57 #include "src/traced/probes/ftrace/ftrace_stats.h"
58 #include "src/traced/probes/ftrace/proto_translation_table.h"
59 #include "src/traced/probes/ftrace/vendor_tracepoints.h"
60 
61 namespace perfetto {
62 namespace {
63 
64 constexpr uint32_t kDefaultTickPeriodMs = 100;
65 constexpr uint32_t kPollBackingTickPeriodMs = 1000;
66 constexpr uint32_t kMinTickPeriodMs = 1;
67 constexpr uint32_t kMaxTickPeriodMs = 1000 * 60;
68 constexpr int kPollRequiredMajorVersion = 6;
69 constexpr int kPollRequiredMinorVersion = 9;
70 
71 // Read at most this many pages of data per cpu per read task. If we hit this
72 // limit on at least one cpu, we stop and repost the read task, letting other
73 // tasks get some cpu time before continuing reading.
74 constexpr size_t kMaxPagesPerCpuPerReadTick = 256;  // 1 MB per cpu
75 
WriteToFile(const char * path,const char * str)76 bool WriteToFile(const char* path, const char* str) {
77   auto fd = base::OpenFile(path, O_WRONLY);
78   if (!fd)
79     return false;
80   const size_t str_len = strlen(str);
81   return base::WriteAll(*fd, str, str_len) == static_cast<ssize_t>(str_len);
82 }
83 
ClearFile(const char * path)84 bool ClearFile(const char* path) {
85   auto fd = base::OpenFile(path, O_WRONLY | O_TRUNC);
86   return !!fd;
87 }
88 
ReadFtraceNowTs(const base::ScopedFile & cpu_stats_fd)89 std::optional<int64_t> ReadFtraceNowTs(const base::ScopedFile& cpu_stats_fd) {
90   PERFETTO_CHECK(cpu_stats_fd);
91 
92   char buf[512];
93   ssize_t res = PERFETTO_EINTR(pread(*cpu_stats_fd, buf, sizeof(buf) - 1, 0));
94   if (res <= 0)
95     return std::nullopt;
96   buf[res] = '\0';
97 
98   FtraceCpuStats stats{};
99   DumpCpuStats(buf, &stats);
100   return static_cast<int64_t>(stats.now_ts * 1000 * 1000 * 1000);
101 }
102 
GetAtraceVendorEvents(FtraceProcfs * tracefs)103 std::map<std::string, std::vector<GroupAndName>> GetAtraceVendorEvents(
104     FtraceProcfs* tracefs) {
105 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
106   if (base::FileExists(vendor_tracepoints::kCategoriesFile)) {
107     std::map<std::string, std::vector<GroupAndName>> vendor_evts;
108     base::Status status =
109         vendor_tracepoints::DiscoverAccessibleVendorTracepointsWithFile(
110             vendor_tracepoints::kCategoriesFile, &vendor_evts, tracefs);
111     if (!status.ok()) {
112       PERFETTO_ELOG("Cannot load vendor categories: %s", status.c_message());
113     }
114     return vendor_evts;
115   } else {
116     AtraceHalWrapper hal;
117     return vendor_tracepoints::DiscoverVendorTracepointsWithHal(&hal, tracefs);
118   }
119 #else
120   base::ignore_result(tracefs);
121   return {};
122 #endif
123 }
124 
125 struct AndroidGkiVersion {
126   uint64_t version = 0;
127   uint64_t patch_level = 0;
128   uint64_t sub_level = 0;
129   uint64_t release = 0;
130   uint64_t kmi_gen = 0;
131 };
132 
133 #define ANDROID_GKI_UNAME_FMT \
134   "%" PRIu64 ".%" PRIu64 ".%" PRIu64 "-android%" PRIu64 "-%" PRIu64
135 
ParseAndroidGkiVersion(const char * s)136 std::optional<AndroidGkiVersion> ParseAndroidGkiVersion(const char* s) {
137   AndroidGkiVersion v = {};
138   if (sscanf(s, ANDROID_GKI_UNAME_FMT, &v.version, &v.patch_level, &v.sub_level,
139              &v.release, &v.kmi_gen) != 5) {
140     return std::nullopt;
141   }
142   return v;
143 }
144 
145 }  // namespace
146 
147 // Method of last resort to reset ftrace state.
148 // We don't know what state the rest of the system and process is so as far
149 // as possible avoid allocations.
HardResetFtraceState()150 bool HardResetFtraceState() {
151   for (const char* const* item = FtraceProcfs::kTracingPaths; *item; ++item) {
152     std::string prefix(*item);
153     PERFETTO_CHECK(base::EndsWith(prefix, "/"));
154     bool res = true;
155     res &= WriteToFile((prefix + "tracing_on").c_str(), "0");
156     res &= WriteToFile((prefix + "buffer_size_kb").c_str(), "4");
157     // Not checking success because these files might not be accessible on
158     // older or release builds of Android:
159     WriteToFile((prefix + "events/enable").c_str(), "0");
160     WriteToFile((prefix + "events/raw_syscalls/filter").c_str(), "0");
161     WriteToFile((prefix + "current_tracer").c_str(), "nop");
162     res &= ClearFile((prefix + "trace").c_str());
163     if (res)
164       return true;
165   }
166   return false;
167 }
168 
169 // static
Create(base::TaskRunner * runner,Observer * observer)170 std::unique_ptr<FtraceController> FtraceController::Create(
171     base::TaskRunner* runner,
172     Observer* observer) {
173   std::unique_ptr<FtraceProcfs> ftrace_procfs =
174       FtraceProcfs::CreateGuessingMountPoint("");
175   if (!ftrace_procfs)
176     return nullptr;
177 
178   std::unique_ptr<ProtoTranslationTable> table = ProtoTranslationTable::Create(
179       ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
180   if (!table)
181     return nullptr;
182 
183   auto atrace_wrapper = std::make_unique<AtraceWrapperImpl>();
184 
185   std::map<std::string, std::vector<GroupAndName>> vendor_evts =
186       GetAtraceVendorEvents(ftrace_procfs.get());
187 
188   SyscallTable syscalls = SyscallTable::FromCurrentArch();
189 
190   auto muxer = std::make_unique<FtraceConfigMuxer>(
191       ftrace_procfs.get(), atrace_wrapper.get(), table.get(),
192       std::move(syscalls), vendor_evts);
193   return std::unique_ptr<FtraceController>(new FtraceController(
194       std::move(ftrace_procfs), std::move(table), std::move(atrace_wrapper),
195       std::move(muxer), runner, observer));
196 }
197 
FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,std::unique_ptr<ProtoTranslationTable> table,std::unique_ptr<AtraceWrapper> atrace_wrapper,std::unique_ptr<FtraceConfigMuxer> muxer,base::TaskRunner * task_runner,Observer * observer)198 FtraceController::FtraceController(
199     std::unique_ptr<FtraceProcfs> ftrace_procfs,
200     std::unique_ptr<ProtoTranslationTable> table,
201     std::unique_ptr<AtraceWrapper> atrace_wrapper,
202     std::unique_ptr<FtraceConfigMuxer> muxer,
203     base::TaskRunner* task_runner,
204     Observer* observer)
205     : task_runner_(task_runner),
206       observer_(observer),
207       atrace_wrapper_(std::move(atrace_wrapper)),
208       primary_(std::move(ftrace_procfs), std::move(table), std::move(muxer)),
209       weak_factory_(this) {}
210 
~FtraceController()211 FtraceController::~FtraceController() {
212   while (!data_sources_.empty()) {
213     RemoveDataSource(*data_sources_.begin());
214   }
215   PERFETTO_DCHECK(data_sources_.empty());
216   PERFETTO_DCHECK(primary_.started_data_sources.empty());
217   PERFETTO_DCHECK(primary_.cpu_readers.empty());
218   PERFETTO_DCHECK(secondary_instances_.empty());
219 }
220 
NowMs() const221 uint64_t FtraceController::NowMs() const {
222   return static_cast<uint64_t>(base::GetWallTimeMs().count());
223 }
224 
225 template <typename F>
ForEachInstance(F fn)226 void FtraceController::ForEachInstance(F fn) {
227   fn(&primary_);
228   for (auto& kv : secondary_instances_) {
229     fn(kv.second.get());
230   }
231 }
232 
StartIfNeeded(FtraceInstanceState * instance,const std::string & instance_name)233 void FtraceController::StartIfNeeded(FtraceInstanceState* instance,
234                                      const std::string& instance_name) {
235   if (buffer_watermark_support_ == PollSupport::kUntested) {
236     buffer_watermark_support_ = VerifyKernelSupportForBufferWatermark();
237   }
238 
239   // If instance is already active, then at most we need to update the buffer
240   // poll callbacks. The periodic |ReadTick| will pick up any updates to the
241   // period the next time it executes.
242   if (instance->started_data_sources.size() > 1) {
243     UpdateBufferWatermarkWatches(instance, instance_name);
244     return;
245   }
246 
247   // Lazily allocate the memory used for reading & parsing ftrace. In the case
248   // of multiple ftrace instances, this might already be valid.
249   parsing_mem_.AllocateIfNeeded();
250 
251   const auto ftrace_clock = instance->ftrace_config_muxer->ftrace_clock();
252   size_t num_cpus = instance->ftrace_procfs->NumberOfCpus();
253   PERFETTO_CHECK(instance->cpu_readers.empty());
254   instance->cpu_readers.reserve(num_cpus);
255   for (size_t cpu = 0; cpu < num_cpus; cpu++) {
256     instance->cpu_readers.emplace_back(
257         cpu, instance->ftrace_procfs->OpenPipeForCpu(cpu),
258         instance->table.get(), &symbolizer_, ftrace_clock,
259         &ftrace_clock_snapshot_);
260   }
261 
262   // Special case for primary instance: if not using the boot clock, take
263   // manual clock snapshots so that the trace parser can do a best effort
264   // conversion back to boot. This is primarily for old kernels that predate
265   // boot support, and therefore default to "global" clock.
266   if (instance == &primary_ &&
267       ftrace_clock != protos::pbzero::FtraceClock::FTRACE_CLOCK_UNSPECIFIED) {
268     cpu_zero_stats_fd_ = primary_.ftrace_procfs->OpenCpuStats(0 /* cpu */);
269     MaybeSnapshotFtraceClock();
270   }
271 
272   // Set up poll callbacks for the buffers if requested by at least one DS.
273   UpdateBufferWatermarkWatches(instance, instance_name);
274 
275   // Start a new repeating read task (even if there is already one posted due
276   // to a different ftrace instance). Any old tasks will stop due to generation
277   // checks.
278   auto generation = ++tick_generation_;
279   auto tick_period_ms = GetTickPeriodMs();
280   auto weak_this = weak_factory_.GetWeakPtr();
281   task_runner_->PostDelayedTask(
282       [weak_this, generation] {
283         if (weak_this)
284           weak_this->ReadTick(generation);
285       },
286       tick_period_ms - (NowMs() % tick_period_ms));
287 }
288 
289 // We handle the ftrace buffers in a repeating task (ReadTick). On a given tick,
290 // we iterate over all per-cpu buffers, parse their contents, and then write out
291 // the serialized packets. This is handled by |CpuReader| instances, which
292 // attempt to read from their respective per-cpu buffer fd until they catch up
293 // to the head of the buffer, or hit a transient error.
294 //
295 // The readers work in batches of |kParsingBufferSizePages| pages for cache
296 // locality, and to limit memory usage.
297 //
298 // However, the reading happens on the primary thread, shared with the rest of
299 // the service (including ipc). If there is a lot of ftrace data to read, we
300 // want to yield to the event loop, re-enqueueing a continuation task at the end
301 // of the immediate queue (letting other enqueued tasks to run before
302 // continuing). Therefore we introduce |kMaxPagesPerCpuPerReadTick|.
ReadTick(int generation)303 void FtraceController::ReadTick(int generation) {
304   metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
305                              metatrace::FTRACE_READ_TICK);
306   if (generation != tick_generation_ || GetStartedDataSourcesCount() == 0) {
307     return;
308   }
309   MaybeSnapshotFtraceClock();
310 
311   // Read all per-cpu buffers.
312   bool all_cpus_done = true;
313   ForEachInstance([&](FtraceInstanceState* instance) {
314     all_cpus_done &= ReadPassForInstance(instance);
315   });
316   observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
317 
318   auto weak_this = weak_factory_.GetWeakPtr();
319   if (!all_cpus_done) {
320     PERFETTO_DLOG("Reposting immediate ReadTick as there's more work.");
321     task_runner_->PostTask([weak_this, generation] {
322       if (weak_this)
323         weak_this->ReadTick(generation);
324     });
325   } else {
326     // Done until next period.
327     auto tick_period_ms = GetTickPeriodMs();
328     task_runner_->PostDelayedTask(
329         [weak_this, generation] {
330           if (weak_this)
331             weak_this->ReadTick(generation);
332         },
333         tick_period_ms - (NowMs() % tick_period_ms));
334   }
335 
336 #if PERFETTO_DCHECK_IS_ON()
337   // OnFtraceDataWrittenIntoDataSourceBuffers() is supposed to clear
338   // all metadata, including the |kernel_addrs| map for symbolization.
339   ForEachInstance([&](FtraceInstanceState* instance) {
340     for (FtraceDataSource* ds : instance->started_data_sources) {
341       FtraceMetadata* ftrace_metadata = ds->mutable_metadata();
342       PERFETTO_DCHECK(ftrace_metadata->kernel_addrs.empty());
343       PERFETTO_DCHECK(ftrace_metadata->last_kernel_addr_index_written == 0);
344     }
345   });
346 #endif
347 }
348 
ReadPassForInstance(FtraceInstanceState * instance)349 bool FtraceController::ReadPassForInstance(FtraceInstanceState* instance) {
350   if (instance->started_data_sources.empty())
351     return true;
352 
353   bool all_cpus_done = true;
354   for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
355     size_t max_pages = kMaxPagesPerCpuPerReadTick;
356     size_t pages_read = instance->cpu_readers[i].ReadCycle(
357         &parsing_mem_, max_pages, instance->started_data_sources);
358     PERFETTO_DCHECK(pages_read <= max_pages);
359     if (pages_read == max_pages) {
360       all_cpus_done = false;
361     }
362   }
363   return all_cpus_done;
364 }
365 
GetTickPeriodMs()366 uint32_t FtraceController::GetTickPeriodMs() {
367   if (data_sources_.empty())
368     return kDefaultTickPeriodMs;
369   uint32_t kUnsetPeriod = std::numeric_limits<uint32_t>::max();
370   uint32_t min_period_ms = kUnsetPeriod;
371   bool using_poll = true;
372   ForEachInstance([&](FtraceInstanceState* instance) {
373     using_poll &= instance->buffer_watches_posted;
374     for (FtraceDataSource* ds : instance->started_data_sources) {
375       if (ds->config().has_drain_period_ms()) {
376         min_period_ms = std::min(min_period_ms, ds->config().drain_period_ms());
377       }
378     }
379   });
380 
381   // None of the active data sources requested an explicit tick period.
382   // The historical default is 100ms, but if we know that all instances are also
383   // using buffer watermark polling, we can raise it. We don't disable the tick
384   // entirely as it spreads the read work more evenly, and ensures procfs
385   // scrapes of seen TIDs are not too stale.
386   if (min_period_ms == kUnsetPeriod) {
387     return using_poll ? kPollBackingTickPeriodMs : kDefaultTickPeriodMs;
388   }
389 
390   if (min_period_ms < kMinTickPeriodMs || min_period_ms > kMaxTickPeriodMs) {
391     PERFETTO_LOG(
392         "drain_period_ms was %u should be between %u and %u. "
393         "Falling back onto a default.",
394         min_period_ms, kMinTickPeriodMs, kMaxTickPeriodMs);
395     return kDefaultTickPeriodMs;
396   }
397   return min_period_ms;
398 }
399 
UpdateBufferWatermarkWatches(FtraceInstanceState * instance,const std::string & instance_name)400 void FtraceController::UpdateBufferWatermarkWatches(
401     FtraceInstanceState* instance,
402     const std::string& instance_name) {
403   PERFETTO_DCHECK(buffer_watermark_support_ != PollSupport::kUntested);
404   if (buffer_watermark_support_ == PollSupport::kUnsupported)
405     return;
406 
407   bool requested_poll = false;
408   for (const FtraceDataSource* ds : instance->started_data_sources) {
409     requested_poll |= ds->config().has_drain_buffer_percent();
410   }
411 
412   if (!requested_poll || instance->buffer_watches_posted)
413     return;
414 
415   auto weak_this = weak_factory_.GetWeakPtr();
416   for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
417     int fd = instance->cpu_readers[i].RawBufferFd();
418     task_runner_->AddFileDescriptorWatch(fd, [weak_this, instance_name, i] {
419       if (weak_this)
420         weak_this->OnBufferPastWatermark(instance_name, i,
421                                          /*repoll_watermark=*/true);
422     });
423   }
424   instance->buffer_watches_posted = true;
425 }
426 
RemoveBufferWatermarkWatches(FtraceInstanceState * instance)427 void FtraceController::RemoveBufferWatermarkWatches(
428     FtraceInstanceState* instance) {
429   if (!instance->buffer_watches_posted)
430     return;
431 
432   for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
433     int fd = instance->cpu_readers[i].RawBufferFd();
434     task_runner_->RemoveFileDescriptorWatch(fd);
435   }
436   instance->buffer_watches_posted = false;
437 }
438 
439 // TODO(rsavitski): consider calling OnFtraceData only if we're not reposting
440 // a continuation. It's a tradeoff between procfs scrape freshness and urgency
441 // to drain ftrace kernel buffers.
OnBufferPastWatermark(std::string instance_name,size_t cpu,bool repoll_watermark)442 void FtraceController::OnBufferPastWatermark(std::string instance_name,
443                                              size_t cpu,
444                                              bool repoll_watermark) {
445   metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
446                              metatrace::FTRACE_CPU_BUFFER_WATERMARK);
447 
448   // Instance might have been stopped before this callback runs.
449   FtraceInstanceState* instance = GetInstance(instance_name);
450   if (!instance || cpu >= instance->cpu_readers.size())
451     return;
452 
453   // Repoll all per-cpu buffers with zero timeout to confirm that at least
454   // one is still past the watermark. This might not be true if a different
455   // callback / readtick / flush did a read pass before this callback reached
456   // the front of the task runner queue.
457   if (repoll_watermark) {
458     size_t num_cpus = instance->cpu_readers.size();
459     std::vector<struct pollfd> pollfds(num_cpus);
460     for (size_t i = 0; i < num_cpus; i++) {
461       pollfds[i].fd = instance->cpu_readers[i].RawBufferFd();
462       pollfds[i].events = POLLIN;
463     }
464     int r = PERFETTO_EINTR(poll(pollfds.data(), num_cpus, 0));
465     if (r < 0) {
466       PERFETTO_DPLOG("poll failed");
467       return;
468     } else if (r == 0) {  // no buffers below the watermark -> we're done.
469       return;
470     }
471     // Count the number of readable fds, as some poll results might be POLLERR,
472     // as seen in cases with offlined cores. It's still fine to attempt reading
473     // from those buffers as CpuReader will handle the ENODEV.
474     bool has_readable_fd = false;
475     for (size_t i = 0; i < num_cpus; i++) {
476       has_readable_fd |= (pollfds[i].revents & POLLIN);
477     }
478     if (!has_readable_fd) {
479       return;
480     }
481   }
482 
483   MaybeSnapshotFtraceClock();
484   bool all_cpus_done = ReadPassForInstance(instance);
485   observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
486   if (!all_cpus_done) {
487     // More data to be read, but we want to let other task_runner tasks to run.
488     // Repost a continuation task.
489     auto weak_this = weak_factory_.GetWeakPtr();
490     task_runner_->PostTask([weak_this, instance_name, cpu] {
491       if (weak_this)
492         weak_this->OnBufferPastWatermark(instance_name, cpu,
493                                          /*repoll_watermark=*/false);
494     });
495   }
496 }
497 
Flush(FlushRequestID flush_id)498 void FtraceController::Flush(FlushRequestID flush_id) {
499   metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
500                              metatrace::FTRACE_CPU_FLUSH);
501 
502   ForEachInstance([&](FtraceInstanceState* instance) {  // for clang-format
503     FlushForInstance(instance);
504   });
505   observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
506 
507   ForEachInstance([&](FtraceInstanceState* instance) {
508     for (FtraceDataSource* ds : instance->started_data_sources) {
509       ds->OnFtraceFlushComplete(flush_id);
510     }
511   });
512 }
513 
FlushForInstance(FtraceInstanceState * instance)514 void FtraceController::FlushForInstance(FtraceInstanceState* instance) {
515   if (instance->started_data_sources.empty())
516     return;
517 
518   // Read all cpus in one go, limiting the per-cpu read amount to make sure we
519   // don't get stuck chasing the writer if there's a very high bandwidth of
520   // events.
521   size_t max_pages = instance->ftrace_config_muxer->GetPerCpuBufferSizePages();
522   for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
523     instance->cpu_readers[i].ReadCycle(&parsing_mem_, max_pages,
524                                        instance->started_data_sources);
525   }
526 }
527 
528 // We are not implicitly flushing on Stop. The tracing service is supposed to
529 // ask for an explicit flush before stopping, unless it needs to perform a
530 // non-graceful stop.
StopIfNeeded(FtraceInstanceState * instance)531 void FtraceController::StopIfNeeded(FtraceInstanceState* instance) {
532   if (!instance->started_data_sources.empty())
533     return;
534 
535   RemoveBufferWatermarkWatches(instance);
536   instance->cpu_readers.clear();
537   if (instance == &primary_) {
538     cpu_zero_stats_fd_.reset();
539   }
540   // Muxer cannot change the current_tracer until we close the trace pipe fds
541   // (i.e. per_cpu). Hence an explicit request here.
542   instance->ftrace_config_muxer->ResetCurrentTracer();
543 
544   DestroyIfUnusedSeconaryInstance(instance);
545 
546   // Clean up global state if done with all data sources.
547   if (!data_sources_.empty())
548     return;
549 
550   // The kernel symbol table is discarded by default to save memory as we run as
551   // a long-lived daemon. Check if the config asked to retain the symbols (e.g.
552   // lab tests). And in either case, reset a set-but-empty table to allow trying
553   // again next time a config requests symbols.
554   if (!retain_ksyms_on_stop_ ||
555       (symbolizer_.is_valid() &&
556        symbolizer_.GetOrCreateKernelSymbolMap()->num_syms() == 0)) {
557     symbolizer_.Destroy();
558   }
559   retain_ksyms_on_stop_ = false;
560 
561   // Note: might have never been allocated if data sources were rejected.
562   parsing_mem_.Release();
563 }
564 
AddDataSource(FtraceDataSource * data_source)565 bool FtraceController::AddDataSource(FtraceDataSource* data_source) {
566   if (!ValidConfig(data_source->config()))
567     return false;
568 
569   FtraceInstanceState* instance =
570       GetOrCreateInstance(data_source->config().instance_name());
571   if (!instance)
572     return false;
573 
574   // note: from this point onwards, need to not leak a possibly created
575   // instance if returning early.
576 
577   FtraceConfigId config_id = next_cfg_id_++;
578   if (!instance->ftrace_config_muxer->SetupConfig(
579           config_id, data_source->config(),
580           data_source->mutable_setup_errors())) {
581     DestroyIfUnusedSeconaryInstance(instance);
582     return false;
583   }
584 
585   const FtraceDataSourceConfig* ds_config =
586       instance->ftrace_config_muxer->GetDataSourceConfig(config_id);
587   auto it_and_inserted = data_sources_.insert(data_source);
588   PERFETTO_DCHECK(it_and_inserted.second);
589   data_source->Initialize(config_id, ds_config);
590   return true;
591 }
592 
StartDataSource(FtraceDataSource * data_source)593 bool FtraceController::StartDataSource(FtraceDataSource* data_source) {
594   PERFETTO_DCHECK(data_sources_.count(data_source) > 0);
595 
596   FtraceConfigId config_id = data_source->config_id();
597   PERFETTO_CHECK(config_id);
598   const std::string& instance_name = data_source->config().instance_name();
599   FtraceInstanceState* instance = GetOrCreateInstance(instance_name);
600   PERFETTO_CHECK(instance);
601 
602   if (!instance->ftrace_config_muxer->ActivateConfig(config_id))
603     return false;
604   instance->started_data_sources.insert(data_source);
605   StartIfNeeded(instance, instance_name);
606 
607   // Parse kernel symbols if required by the config. This can be an expensive
608   // operation (cpu-bound for 500ms+), so delay the StartDataSource
609   // acknowledgement until after we're done. This lets a consumer wait for the
610   // expensive work to be done by waiting on the "all data sources started"
611   // fence. This helps isolate the effects of the cpu-bound work on
612   // frequency scaling of cpus when recording benchmarks (b/236143653).
613   // Note that we're already recording data into the kernel ftrace
614   // buffers while doing the symbol parsing.
615   if (data_source->config().symbolize_ksyms()) {
616     symbolizer_.GetOrCreateKernelSymbolMap();
617     // If at least one config sets the KSYMS_RETAIN flag, keep the ksyms map
618     // around in StopIfNeeded().
619     const auto KRET = FtraceConfig::KSYMS_RETAIN;
620     retain_ksyms_on_stop_ |= data_source->config().ksyms_mem_policy() == KRET;
621   }
622 
623   return true;
624 }
625 
RemoveDataSource(FtraceDataSource * data_source)626 void FtraceController::RemoveDataSource(FtraceDataSource* data_source) {
627   size_t removed = data_sources_.erase(data_source);
628   if (!removed)
629     return;  // can happen if AddDataSource failed
630 
631   FtraceInstanceState* instance =
632       GetOrCreateInstance(data_source->config().instance_name());
633   PERFETTO_CHECK(instance);
634 
635   instance->ftrace_config_muxer->RemoveConfig(data_source->config_id());
636   instance->started_data_sources.erase(data_source);
637   StopIfNeeded(instance);
638 }
639 
DumpKprobeStats(const std::string & text,FtraceStats * ftrace_stats)640 bool DumpKprobeStats(const std::string& text, FtraceStats* ftrace_stats) {
641   int64_t hits = 0;
642   int64_t misses = 0;
643 
644   base::StringSplitter line(std::move(text), '\n');
645   while (line.Next()) {
646     base::StringSplitter tok(line.cur_token(), line.cur_token_size() + 1, ' ');
647 
648     if (!tok.Next())
649       return false;
650     // Skip the event name field
651 
652     if (!tok.Next())
653       return false;
654     hits += static_cast<int64_t>(std::strtoll(tok.cur_token(), nullptr, 10));
655 
656     if (!tok.Next())
657       return false;
658     misses += static_cast<int64_t>(std::strtoll(tok.cur_token(), nullptr, 10));
659   }
660 
661   ftrace_stats->kprobe_stats.hits = hits;
662   ftrace_stats->kprobe_stats.misses = misses;
663 
664   return true;
665 }
666 
DumpFtraceStats(FtraceDataSource * data_source,FtraceStats * stats_out)667 void FtraceController::DumpFtraceStats(FtraceDataSource* data_source,
668                                        FtraceStats* stats_out) {
669   FtraceInstanceState* instance =
670       GetInstance(data_source->config().instance_name());
671   PERFETTO_DCHECK(instance);
672   if (!instance)
673     return;
674 
675   DumpAllCpuStats(instance->ftrace_procfs.get(), stats_out);
676   if (symbolizer_.is_valid()) {
677     auto* symbol_map = symbolizer_.GetOrCreateKernelSymbolMap();
678     stats_out->kernel_symbols_parsed =
679         static_cast<uint32_t>(symbol_map->num_syms());
680     stats_out->kernel_symbols_mem_kb =
681         static_cast<uint32_t>(symbol_map->size_bytes() / 1024);
682   }
683 
684   if (data_source->parsing_config()->kprobes.size() > 0) {
685     DumpKprobeStats(instance->ftrace_procfs.get()->ReadKprobeStats(),
686                     stats_out);
687   }
688 }
689 
MaybeSnapshotFtraceClock()690 void FtraceController::MaybeSnapshotFtraceClock() {
691   if (!cpu_zero_stats_fd_)
692     return;
693 
694   auto ftrace_clock = primary_.ftrace_config_muxer->ftrace_clock();
695   PERFETTO_DCHECK(ftrace_clock != protos::pbzero::FTRACE_CLOCK_UNSPECIFIED);
696 
697   // Snapshot the boot clock *before* reading CPU stats so that
698   // two clocks are as close togher as possible (i.e. if it was the
699   // other way round, we'd skew by the const of string parsing).
700   ftrace_clock_snapshot_.boot_clock_ts = base::GetBootTimeNs().count();
701 
702   // A value of zero will cause this snapshot to be skipped.
703   ftrace_clock_snapshot_.ftrace_clock_ts =
704       ReadFtraceNowTs(cpu_zero_stats_fd_).value_or(0);
705 }
706 
707 FtraceController::PollSupport
VerifyKernelSupportForBufferWatermark()708 FtraceController::VerifyKernelSupportForBufferWatermark() {
709   struct utsname uts = {};
710   if (uname(&uts) < 0 || strcmp(uts.sysname, "Linux") != 0)
711     return PollSupport::kUnsupported;
712   if (!PollSupportedOnKernelVersion(uts.release))
713     return PollSupport::kUnsupported;
714 
715   // buffer_percent exists and is writable
716   auto* tracefs = primary_.ftrace_procfs.get();
717   uint32_t current = tracefs->ReadBufferPercent();
718   if (!tracefs->SetBufferPercent(current ? current : 50)) {
719     return PollSupport::kUnsupported;
720   }
721 
722   // Polling on per_cpu/cpu0/trace_pipe_raw doesn't return errors.
723   base::ScopedFile fd = tracefs->OpenPipeForCpu(0);
724   struct pollfd pollset = {};
725   pollset.fd = fd.get();
726   pollset.events = POLLIN;
727   int r = PERFETTO_EINTR(poll(&pollset, 1, 0));
728   if (r < 0 || (r > 0 && (pollset.revents & POLLERR))) {
729     return PollSupport::kUnsupported;
730   }
731   return PollSupport::kSupported;
732 }
733 
734 // Check kernel version since the poll implementation has historical bugs.
735 // We're looking for at least 6.9 for the following:
736 //   ffe3986fece6 ring-buffer: Only update pages_touched when a new page...
737 // static
PollSupportedOnKernelVersion(const char * uts_release)738 bool FtraceController::PollSupportedOnKernelVersion(const char* uts_release) {
739   int major = 0, minor = 0;
740   if (sscanf(uts_release, "%d.%d", &major, &minor) != 2) {
741     return false;
742   }
743   if (major < kPollRequiredMajorVersion ||
744       (major == kPollRequiredMajorVersion &&
745        minor < kPollRequiredMinorVersion)) {
746     // Android: opportunistically detect a few select GKI kernels that are known
747     // to have the fixes.
748     std::optional<AndroidGkiVersion> gki = ParseAndroidGkiVersion(uts_release);
749     if (!gki.has_value())
750       return false;
751     // android14-6.1.86 or higher sublevel:
752     //   2d5f12de4cf5 ring-buffer: Only update pages_touched when a new page...
753     // android15-6.6.27 or higher sublevel:
754     //   a9cd92bc051f ring-buffer: Only update pages_touched when a new page...
755     bool gki_patched = (gki->release == 14 && gki->version == 6 &&
756                         gki->patch_level == 1 && gki->sub_level >= 86) ||
757                        (gki->release == 15 && gki->version == 6 &&
758                         gki->patch_level == 6 && gki->sub_level >= 27);
759     return gki_patched;
760   }
761   return true;
762 }
763 
GetStartedDataSourcesCount()764 size_t FtraceController::GetStartedDataSourcesCount() {
765   size_t cnt = 0;
766   ForEachInstance([&](FtraceInstanceState* instance) {
767     cnt += instance->started_data_sources.size();
768   });
769   return cnt;
770 }
771 
FtraceInstanceState(std::unique_ptr<FtraceProcfs> ft,std::unique_ptr<ProtoTranslationTable> ptt,std::unique_ptr<FtraceConfigMuxer> fcm)772 FtraceController::FtraceInstanceState::FtraceInstanceState(
773     std::unique_ptr<FtraceProcfs> ft,
774     std::unique_ptr<ProtoTranslationTable> ptt,
775     std::unique_ptr<FtraceConfigMuxer> fcm)
776     : ftrace_procfs(std::move(ft)),
777       table(std::move(ptt)),
778       ftrace_config_muxer(std::move(fcm)) {}
779 
GetOrCreateInstance(const std::string & instance_name)780 FtraceController::FtraceInstanceState* FtraceController::GetOrCreateInstance(
781     const std::string& instance_name) {
782   FtraceInstanceState* maybe_existing = GetInstance(instance_name);
783   if (maybe_existing)
784     return maybe_existing;
785 
786   PERFETTO_DCHECK(!instance_name.empty());
787   std::unique_ptr<FtraceInstanceState> instance =
788       CreateSecondaryInstance(instance_name);
789   if (!instance)
790     return nullptr;
791 
792   auto it_and_inserted = secondary_instances_.emplace(
793       std::piecewise_construct, std::forward_as_tuple(instance_name),
794       std::forward_as_tuple(std::move(instance)));
795   PERFETTO_CHECK(it_and_inserted.second);
796   return it_and_inserted.first->second.get();
797 }
798 
GetInstance(const std::string & instance_name)799 FtraceController::FtraceInstanceState* FtraceController::GetInstance(
800     const std::string& instance_name) {
801   if (instance_name.empty())
802     return &primary_;
803 
804   auto it = secondary_instances_.find(instance_name);
805   return it != secondary_instances_.end() ? it->second.get() : nullptr;
806 }
807 
DestroyIfUnusedSeconaryInstance(FtraceInstanceState * instance)808 void FtraceController::DestroyIfUnusedSeconaryInstance(
809     FtraceInstanceState* instance) {
810   if (instance == &primary_)
811     return;
812   for (auto it = secondary_instances_.begin(); it != secondary_instances_.end();
813        ++it) {
814     if (it->second.get() == instance &&
815         instance->ftrace_config_muxer->GetDataSourcesCount() == 0) {
816       // no data sources left referencing this secondary instance
817       secondary_instances_.erase(it);
818       return;
819     }
820   }
821   PERFETTO_FATAL("Bug in ftrace instance lifetimes");
822 }
823 
824 std::unique_ptr<FtraceController::FtraceInstanceState>
CreateSecondaryInstance(const std::string & instance_name)825 FtraceController::CreateSecondaryInstance(const std::string& instance_name) {
826   std::optional<std::string> instance_path = AbsolutePathForInstance(
827       primary_.ftrace_procfs->GetRootPath(), instance_name);
828   if (!instance_path.has_value()) {
829     PERFETTO_ELOG("Invalid ftrace instance name: \"%s\"",
830                   instance_name.c_str());
831     return nullptr;
832   }
833 
834   auto ftrace_procfs = FtraceProcfs::Create(*instance_path);
835   if (!ftrace_procfs) {
836     PERFETTO_ELOG("Failed to create ftrace procfs for \"%s\"",
837                   instance_path->c_str());
838     return nullptr;
839   }
840 
841   auto table = ProtoTranslationTable::Create(
842       ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
843   if (!table) {
844     PERFETTO_ELOG("Failed to create proto translation table for \"%s\"",
845                   instance_path->c_str());
846     return nullptr;
847   }
848 
849   // secondary instances don't support atrace and vendor tracepoint HAL
850   std::map<std::string, std::vector<GroupAndName>> vendor_evts;
851 
852   auto syscalls = SyscallTable::FromCurrentArch();
853 
854   auto muxer = std::make_unique<FtraceConfigMuxer>(
855       ftrace_procfs.get(), atrace_wrapper_.get(), table.get(),
856       std::move(syscalls), vendor_evts,
857       /* secondary_instance= */ true);
858   return std::make_unique<FtraceInstanceState>(
859       std::move(ftrace_procfs), std::move(table), std::move(muxer));
860 }
861 
862 // TODO(rsavitski): we want to eventually add support for the default
863 // (primary_) tracefs path to be an instance itself, at which point we'll need
864 // to be careful to distinguish the tracefs mount point from the default
865 // instance path.
866 // static
AbsolutePathForInstance(const std::string & tracefs_root,const std::string & raw_cfg_name)867 std::optional<std::string> FtraceController::AbsolutePathForInstance(
868     const std::string& tracefs_root,
869     const std::string& raw_cfg_name) {
870   if (base::Contains(raw_cfg_name, '/') ||
871       base::StartsWith(raw_cfg_name, "..")) {
872     return std::nullopt;
873   }
874 
875   // ARM64 pKVM hypervisor tracing emulates an instance, but is not under
876   // instances/, we special-case that name for now.
877   if (raw_cfg_name == "hyp") {
878     std::string hyp_path = tracefs_root + "hyp/";
879     PERFETTO_LOG(
880         "Config specified reserved \"hyp\" instance name, using %s for events.",
881         hyp_path.c_str());
882     return std::make_optional(hyp_path);
883   }
884 
885   return tracefs_root + "instances/" + raw_cfg_name + "/";
886 }
887 
888 FtraceController::Observer::~Observer() = default;
889 
890 }  // namespace perfetto
891