1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/traced/probes/ftrace/ftrace_controller.h"
18
19 #include <fcntl.h>
20 #include <poll.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <sys/utsname.h>
25 #include <sys/wait.h>
26 #include <unistd.h>
27 #include <cstdint>
28
29 #include <limits>
30 #include <memory>
31 #include <optional>
32 #include <string>
33 #include <tuple>
34 #include <utility>
35
36 #include "perfetto/base/build_config.h"
37 #include "perfetto/base/logging.h"
38 #include "perfetto/base/time.h"
39 #include "perfetto/ext/base/file_utils.h"
40 #include "perfetto/ext/base/metatrace.h"
41 #include "perfetto/ext/base/scoped_file.h"
42 #include "perfetto/ext/base/string_utils.h"
43 #include "perfetto/ext/tracing/core/trace_writer.h"
44 #include "src/kallsyms/kernel_symbol_map.h"
45 #include "src/kallsyms/lazy_kernel_symbolizer.h"
46 #include "src/traced/probes/ftrace/atrace_hal_wrapper.h"
47 #include "src/traced/probes/ftrace/cpu_reader.h"
48 #include "src/traced/probes/ftrace/cpu_stats_parser.h"
49 #include "src/traced/probes/ftrace/event_info.h"
50 #include "src/traced/probes/ftrace/event_info_constants.h"
51 #include "src/traced/probes/ftrace/ftrace_config_muxer.h"
52 #include "src/traced/probes/ftrace/ftrace_config_utils.h"
53 #include "src/traced/probes/ftrace/ftrace_data_source.h"
54 #include "src/traced/probes/ftrace/ftrace_metadata.h"
55 #include "src/traced/probes/ftrace/ftrace_procfs.h"
56 #include "src/traced/probes/ftrace/ftrace_stats.h"
57 #include "src/traced/probes/ftrace/proto_translation_table.h"
58 #include "src/traced/probes/ftrace/vendor_tracepoints.h"
59
60 namespace perfetto {
61 namespace {
62
63 constexpr uint32_t kDefaultTickPeriodMs = 100;
64 constexpr uint32_t kPollBackingTickPeriodMs = 1000;
65 constexpr uint32_t kMinTickPeriodMs = 1;
66 constexpr uint32_t kMaxTickPeriodMs = 1000 * 60;
67 constexpr int kPollRequiredMajorVersion = 6;
68 constexpr int kPollRequiredMinorVersion = 1;
69
70 // Read at most this many pages of data per cpu per read task. If we hit this
71 // limit on at least one cpu, we stop and repost the read task, letting other
72 // tasks get some cpu time before continuing reading.
73 constexpr size_t kMaxPagesPerCpuPerReadTick = 256; // 1 MB per cpu
74
WriteToFile(const char * path,const char * str)75 bool WriteToFile(const char* path, const char* str) {
76 auto fd = base::OpenFile(path, O_WRONLY);
77 if (!fd)
78 return false;
79 const size_t str_len = strlen(str);
80 return base::WriteAll(*fd, str, str_len) == static_cast<ssize_t>(str_len);
81 }
82
ClearFile(const char * path)83 bool ClearFile(const char* path) {
84 auto fd = base::OpenFile(path, O_WRONLY | O_TRUNC);
85 return !!fd;
86 }
87
ReadFtraceNowTs(const base::ScopedFile & cpu_stats_fd)88 std::optional<int64_t> ReadFtraceNowTs(const base::ScopedFile& cpu_stats_fd) {
89 PERFETTO_CHECK(cpu_stats_fd);
90
91 char buf[512];
92 ssize_t res = PERFETTO_EINTR(pread(*cpu_stats_fd, buf, sizeof(buf) - 1, 0));
93 if (res <= 0)
94 return std::nullopt;
95 buf[res] = '\0';
96
97 FtraceCpuStats stats{};
98 DumpCpuStats(buf, &stats);
99 return static_cast<int64_t>(stats.now_ts * 1000 * 1000 * 1000);
100 }
101
GetAtraceVendorEvents(FtraceProcfs * tracefs)102 std::map<std::string, std::vector<GroupAndName>> GetAtraceVendorEvents(
103 FtraceProcfs* tracefs) {
104 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
105 if (base::FileExists(vendor_tracepoints::kCategoriesFile)) {
106 std::map<std::string, std::vector<GroupAndName>> vendor_evts;
107 base::Status status =
108 vendor_tracepoints::DiscoverAccessibleVendorTracepointsWithFile(
109 vendor_tracepoints::kCategoriesFile, &vendor_evts, tracefs);
110 if (!status.ok()) {
111 PERFETTO_ELOG("Cannot load vendor categories: %s", status.c_message());
112 }
113 return vendor_evts;
114 } else {
115 AtraceHalWrapper hal;
116 return vendor_tracepoints::DiscoverVendorTracepointsWithHal(&hal, tracefs);
117 }
118 #else
119 base::ignore_result(tracefs);
120 return {};
121 #endif
122 }
123
124 struct AndroidGkiVersion {
125 uint64_t version = 0;
126 uint64_t patch_level = 0;
127 uint64_t sub_level = 0;
128 uint64_t release = 0;
129 uint64_t kmi_gen = 0;
130 };
131
132 #define ANDROID_GKI_UNAME_FMT \
133 "%" PRIu64 ".%" PRIu64 ".%" PRIu64 "-android%" PRIu64 "-%" PRIu64
134
ParseAndroidGkiVersion(const char * s)135 std::optional<AndroidGkiVersion> ParseAndroidGkiVersion(const char* s) {
136 AndroidGkiVersion v = {};
137 if (sscanf(s, ANDROID_GKI_UNAME_FMT, &v.version, &v.patch_level, &v.sub_level,
138 &v.release, &v.kmi_gen) != 5) {
139 return std::nullopt;
140 }
141 return v;
142 }
143
144 } // namespace
145
146 // Method of last resort to reset ftrace state.
147 // We don't know what state the rest of the system and process is so as far
148 // as possible avoid allocations.
HardResetFtraceState()149 bool HardResetFtraceState() {
150 for (const char* const* item = FtraceProcfs::kTracingPaths; *item; ++item) {
151 std::string prefix(*item);
152 PERFETTO_CHECK(base::EndsWith(prefix, "/"));
153 bool res = true;
154 res &= WriteToFile((prefix + "tracing_on").c_str(), "0");
155 res &= WriteToFile((prefix + "buffer_size_kb").c_str(), "4");
156 // Not checking success because these files might not be accessible on
157 // older or release builds of Android:
158 WriteToFile((prefix + "events/enable").c_str(), "0");
159 WriteToFile((prefix + "events/raw_syscalls/filter").c_str(), "0");
160 WriteToFile((prefix + "current_tracer").c_str(), "nop");
161 res &= ClearFile((prefix + "trace").c_str());
162 if (res)
163 return true;
164 }
165 return false;
166 }
167
168 // static
Create(base::TaskRunner * runner,Observer * observer)169 std::unique_ptr<FtraceController> FtraceController::Create(
170 base::TaskRunner* runner,
171 Observer* observer) {
172 std::unique_ptr<FtraceProcfs> ftrace_procfs =
173 FtraceProcfs::CreateGuessingMountPoint("");
174 if (!ftrace_procfs)
175 return nullptr;
176
177 std::unique_ptr<ProtoTranslationTable> table = ProtoTranslationTable::Create(
178 ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
179 if (!table)
180 return nullptr;
181
182 auto atrace_wrapper = std::make_unique<AtraceWrapperImpl>();
183
184 std::map<std::string, std::vector<GroupAndName>> vendor_evts =
185 GetAtraceVendorEvents(ftrace_procfs.get());
186
187 SyscallTable syscalls = SyscallTable::FromCurrentArch();
188
189 auto muxer = std::make_unique<FtraceConfigMuxer>(
190 ftrace_procfs.get(), atrace_wrapper.get(), table.get(),
191 std::move(syscalls), vendor_evts);
192 return std::unique_ptr<FtraceController>(new FtraceController(
193 std::move(ftrace_procfs), std::move(table), std::move(atrace_wrapper),
194 std::move(muxer), runner, observer));
195 }
196
FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,std::unique_ptr<ProtoTranslationTable> table,std::unique_ptr<AtraceWrapper> atrace_wrapper,std::unique_ptr<FtraceConfigMuxer> muxer,base::TaskRunner * task_runner,Observer * observer)197 FtraceController::FtraceController(
198 std::unique_ptr<FtraceProcfs> ftrace_procfs,
199 std::unique_ptr<ProtoTranslationTable> table,
200 std::unique_ptr<AtraceWrapper> atrace_wrapper,
201 std::unique_ptr<FtraceConfigMuxer> muxer,
202 base::TaskRunner* task_runner,
203 Observer* observer)
204 : task_runner_(task_runner),
205 observer_(observer),
206 atrace_wrapper_(std::move(atrace_wrapper)),
207 primary_(std::move(ftrace_procfs), std::move(table), std::move(muxer)),
208 weak_factory_(this) {}
209
~FtraceController()210 FtraceController::~FtraceController() {
211 while (!data_sources_.empty()) {
212 RemoveDataSource(*data_sources_.begin());
213 }
214 PERFETTO_DCHECK(data_sources_.empty());
215 PERFETTO_DCHECK(primary_.started_data_sources.empty());
216 PERFETTO_DCHECK(primary_.cpu_readers.empty());
217 PERFETTO_DCHECK(secondary_instances_.empty());
218 }
219
NowMs() const220 uint64_t FtraceController::NowMs() const {
221 return static_cast<uint64_t>(base::GetWallTimeMs().count());
222 }
223
224 template <typename F>
ForEachInstance(F fn)225 void FtraceController::ForEachInstance(F fn) {
226 fn(&primary_);
227 for (auto& kv : secondary_instances_) {
228 fn(kv.second.get());
229 }
230 }
231
StartIfNeeded(FtraceInstanceState * instance,const std::string & instance_name)232 void FtraceController::StartIfNeeded(FtraceInstanceState* instance,
233 const std::string& instance_name) {
234 if (buffer_watermark_support_ == PollSupport::kUntested) {
235 buffer_watermark_support_ = VerifyKernelSupportForBufferWatermark();
236 }
237
238 // If instance is already active, then at most we need to update the buffer
239 // poll callbacks. The periodic |ReadTick| will pick up any updates to the
240 // period the next time it executes.
241 if (instance->started_data_sources.size() > 1) {
242 UpdateBufferWatermarkWatches(instance, instance_name);
243 return;
244 }
245
246 // Lazily allocate the memory used for reading & parsing ftrace. In the case
247 // of multiple ftrace instances, this might already be valid.
248 parsing_mem_.AllocateIfNeeded();
249
250 const auto ftrace_clock = instance->ftrace_config_muxer->ftrace_clock();
251 size_t num_cpus = instance->ftrace_procfs->NumberOfCpus();
252 PERFETTO_CHECK(instance->cpu_readers.empty());
253 instance->cpu_readers.reserve(num_cpus);
254 for (size_t cpu = 0; cpu < num_cpus; cpu++) {
255 instance->cpu_readers.emplace_back(
256 cpu, instance->ftrace_procfs->OpenPipeForCpu(cpu),
257 instance->table.get(), &symbolizer_, ftrace_clock,
258 &ftrace_clock_snapshot_);
259 }
260
261 // Special case for primary instance: if not using the boot clock, take
262 // manual clock snapshots so that the trace parser can do a best effort
263 // conversion back to boot. This is primarily for old kernels that predate
264 // boot support, and therefore default to "global" clock.
265 if (instance == &primary_ &&
266 ftrace_clock != protos::pbzero::FtraceClock::FTRACE_CLOCK_UNSPECIFIED) {
267 cpu_zero_stats_fd_ = primary_.ftrace_procfs->OpenCpuStats(0 /* cpu */);
268 MaybeSnapshotFtraceClock();
269 }
270
271 // Set up poll callbacks for the buffers if requested by at least one DS.
272 UpdateBufferWatermarkWatches(instance, instance_name);
273
274 // Start a new repeating read task (even if there is already one posted due
275 // to a different ftrace instance). Any old tasks will stop due to generation
276 // checks.
277 auto generation = ++tick_generation_;
278 auto tick_period_ms = GetTickPeriodMs();
279 auto weak_this = weak_factory_.GetWeakPtr();
280 task_runner_->PostDelayedTask(
281 [weak_this, generation] {
282 if (weak_this)
283 weak_this->ReadTick(generation);
284 },
285 tick_period_ms - (NowMs() % tick_period_ms));
286 }
287
288 // We handle the ftrace buffers in a repeating task (ReadTick). On a given tick,
289 // we iterate over all per-cpu buffers, parse their contents, and then write out
290 // the serialized packets. This is handled by |CpuReader| instances, which
291 // attempt to read from their respective per-cpu buffer fd until they catch up
292 // to the head of the buffer, or hit a transient error.
293 //
294 // The readers work in batches of |kParsingBufferSizePages| pages for cache
295 // locality, and to limit memory usage.
296 //
297 // However, the reading happens on the primary thread, shared with the rest of
298 // the service (including ipc). If there is a lot of ftrace data to read, we
299 // want to yield to the event loop, re-enqueueing a continuation task at the end
300 // of the immediate queue (letting other enqueued tasks to run before
301 // continuing). Therefore we introduce |kMaxPagesPerCpuPerReadTick|.
ReadTick(int generation)302 void FtraceController::ReadTick(int generation) {
303 metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
304 metatrace::FTRACE_READ_TICK);
305 if (generation != tick_generation_ || GetStartedDataSourcesCount() == 0) {
306 return;
307 }
308 MaybeSnapshotFtraceClock();
309
310 // Read all per-cpu buffers.
311 bool all_cpus_done = ReadPassForInstance(&primary_);
312 ForEachInstance([&](FtraceInstanceState* instance) {
313 all_cpus_done &= ReadPassForInstance(instance);
314 });
315 observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
316
317 auto weak_this = weak_factory_.GetWeakPtr();
318 if (!all_cpus_done) {
319 PERFETTO_DLOG("Reposting immediate ReadTick as there's more work.");
320 task_runner_->PostTask([weak_this, generation] {
321 if (weak_this)
322 weak_this->ReadTick(generation);
323 });
324 } else {
325 // Done until next period.
326 auto tick_period_ms = GetTickPeriodMs();
327 task_runner_->PostDelayedTask(
328 [weak_this, generation] {
329 if (weak_this)
330 weak_this->ReadTick(generation);
331 },
332 tick_period_ms - (NowMs() % tick_period_ms));
333 }
334
335 #if PERFETTO_DCHECK_IS_ON()
336 // OnFtraceDataWrittenIntoDataSourceBuffers() is supposed to clear
337 // all metadata, including the |kernel_addrs| map for symbolization.
338 ForEachInstance([&](FtraceInstanceState* instance) {
339 for (FtraceDataSource* ds : instance->started_data_sources) {
340 FtraceMetadata* ftrace_metadata = ds->mutable_metadata();
341 PERFETTO_DCHECK(ftrace_metadata->kernel_addrs.empty());
342 PERFETTO_DCHECK(ftrace_metadata->last_kernel_addr_index_written == 0);
343 }
344 });
345 #endif
346 }
347
ReadPassForInstance(FtraceInstanceState * instance)348 bool FtraceController::ReadPassForInstance(FtraceInstanceState* instance) {
349 if (instance->started_data_sources.empty())
350 return true;
351
352 bool all_cpus_done = true;
353 for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
354 size_t max_pages = kMaxPagesPerCpuPerReadTick;
355 size_t pages_read = instance->cpu_readers[i].ReadCycle(
356 &parsing_mem_, max_pages, instance->started_data_sources);
357 PERFETTO_DCHECK(pages_read <= max_pages);
358 if (pages_read == max_pages) {
359 all_cpus_done = false;
360 }
361 }
362 return all_cpus_done;
363 }
364
GetTickPeriodMs()365 uint32_t FtraceController::GetTickPeriodMs() {
366 if (data_sources_.empty())
367 return kDefaultTickPeriodMs;
368 uint32_t kUnsetPeriod = std::numeric_limits<uint32_t>::max();
369 uint32_t min_period_ms = kUnsetPeriod;
370 bool using_poll = true;
371 ForEachInstance([&](FtraceInstanceState* instance) {
372 using_poll &= instance->buffer_watches_posted;
373 for (FtraceDataSource* ds : instance->started_data_sources) {
374 if (ds->config().has_drain_period_ms()) {
375 min_period_ms = std::min(min_period_ms, ds->config().drain_period_ms());
376 }
377 }
378 });
379
380 // None of the active data sources requested an explicit tick period.
381 // The historical default is 100ms, but if we know that all instances are also
382 // using buffer watermark polling, we can raise it. We don't disable the tick
383 // entirely as it spreads the read work more evenly, and ensures procfs
384 // scrapes of seen TIDs are not too stale.
385 if (min_period_ms == kUnsetPeriod) {
386 return using_poll ? kPollBackingTickPeriodMs : kDefaultTickPeriodMs;
387 }
388
389 if (min_period_ms < kMinTickPeriodMs || min_period_ms > kMaxTickPeriodMs) {
390 PERFETTO_LOG(
391 "drain_period_ms was %u should be between %u and %u. "
392 "Falling back onto a default.",
393 min_period_ms, kMinTickPeriodMs, kMaxTickPeriodMs);
394 return kDefaultTickPeriodMs;
395 }
396 return min_period_ms;
397 }
398
UpdateBufferWatermarkWatches(FtraceInstanceState * instance,const std::string & instance_name)399 void FtraceController::UpdateBufferWatermarkWatches(
400 FtraceInstanceState* instance,
401 const std::string& instance_name) {
402 PERFETTO_DCHECK(buffer_watermark_support_ != PollSupport::kUntested);
403 if (buffer_watermark_support_ == PollSupport::kUnsupported)
404 return;
405
406 bool requested_poll = false;
407 for (const FtraceDataSource* ds : instance->started_data_sources) {
408 requested_poll |= ds->config().has_drain_buffer_percent();
409 }
410
411 if (!requested_poll || instance->buffer_watches_posted)
412 return;
413
414 auto weak_this = weak_factory_.GetWeakPtr();
415 for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
416 int fd = instance->cpu_readers[i].RawBufferFd();
417 task_runner_->AddFileDescriptorWatch(fd, [weak_this, instance_name, i] {
418 if (weak_this)
419 weak_this->OnBufferPastWatermark(instance_name, i,
420 /*repoll_watermark=*/true);
421 });
422 }
423 instance->buffer_watches_posted = true;
424 }
425
RemoveBufferWatermarkWatches(FtraceInstanceState * instance)426 void FtraceController::RemoveBufferWatermarkWatches(
427 FtraceInstanceState* instance) {
428 if (!instance->buffer_watches_posted)
429 return;
430
431 for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
432 int fd = instance->cpu_readers[i].RawBufferFd();
433 task_runner_->RemoveFileDescriptorWatch(fd);
434 }
435 instance->buffer_watches_posted = false;
436 }
437
438 // TODO(rsavitski): consider calling OnFtraceData only if we're not reposting
439 // a continuation. It's a tradeoff between procfs scrape freshness and urgency
440 // to drain ftrace kernel buffers.
OnBufferPastWatermark(std::string instance_name,size_t cpu,bool repoll_watermark)441 void FtraceController::OnBufferPastWatermark(std::string instance_name,
442 size_t cpu,
443 bool repoll_watermark) {
444 metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
445 metatrace::FTRACE_CPU_BUFFER_WATERMARK);
446
447 // Instance might have been stopped before this callback runs.
448 FtraceInstanceState* instance = GetInstance(instance_name);
449 if (!instance || cpu >= instance->cpu_readers.size())
450 return;
451
452 // Repoll all per-cpu buffers with zero timeout to confirm that at least
453 // one is still past the watermark. This might not be true if a different
454 // callback / readtick / flush did a read pass before this callback reached
455 // the front of the task runner queue.
456 if (repoll_watermark) {
457 size_t num_cpus = instance->cpu_readers.size();
458 std::vector<struct pollfd> pollfds(num_cpus);
459 for (size_t i = 0; i < num_cpus; i++) {
460 pollfds[i].fd = instance->cpu_readers[i].RawBufferFd();
461 pollfds[i].events = POLLIN;
462 }
463 int r = PERFETTO_EINTR(poll(pollfds.data(), num_cpus, 0));
464 if (r < 0) {
465 PERFETTO_DPLOG("poll failed");
466 return;
467 } else if (r == 0) { // no buffers below the watermark -> we're done.
468 return;
469 }
470 // Count the number of readable fds, as some poll results might be POLLERR,
471 // as seen in cases with offlined cores. It's still fine to attempt reading
472 // from those buffers as CpuReader will handle the ENODEV.
473 bool has_readable_fd = false;
474 for (size_t i = 0; i < num_cpus; i++) {
475 has_readable_fd |= (pollfds[i].revents & POLLIN);
476 }
477 if (!has_readable_fd) {
478 return;
479 }
480 }
481
482 MaybeSnapshotFtraceClock();
483 bool all_cpus_done = ReadPassForInstance(instance);
484 observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
485 if (!all_cpus_done) {
486 // More data to be read, but we want to let other task_runner tasks to run.
487 // Repost a continuation task.
488 auto weak_this = weak_factory_.GetWeakPtr();
489 task_runner_->PostTask([weak_this, instance_name, cpu] {
490 if (weak_this)
491 weak_this->OnBufferPastWatermark(instance_name, cpu,
492 /*repoll_watermark=*/false);
493 });
494 }
495 }
496
Flush(FlushRequestID flush_id)497 void FtraceController::Flush(FlushRequestID flush_id) {
498 metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
499 metatrace::FTRACE_CPU_FLUSH);
500
501 ForEachInstance([&](FtraceInstanceState* instance) { // for clang-format
502 FlushForInstance(instance);
503 });
504 observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
505
506 ForEachInstance([&](FtraceInstanceState* instance) {
507 for (FtraceDataSource* ds : instance->started_data_sources) {
508 ds->OnFtraceFlushComplete(flush_id);
509 }
510 });
511 }
512
FlushForInstance(FtraceInstanceState * instance)513 void FtraceController::FlushForInstance(FtraceInstanceState* instance) {
514 if (instance->started_data_sources.empty())
515 return;
516
517 // Read all cpus in one go, limiting the per-cpu read amount to make sure we
518 // don't get stuck chasing the writer if there's a very high bandwidth of
519 // events.
520 size_t max_pages = instance->ftrace_config_muxer->GetPerCpuBufferSizePages();
521 for (size_t i = 0; i < instance->cpu_readers.size(); i++) {
522 instance->cpu_readers[i].ReadCycle(&parsing_mem_, max_pages,
523 instance->started_data_sources);
524 }
525 }
526
527 // We are not implicitly flushing on Stop. The tracing service is supposed to
528 // ask for an explicit flush before stopping, unless it needs to perform a
529 // non-graceful stop.
StopIfNeeded(FtraceInstanceState * instance)530 void FtraceController::StopIfNeeded(FtraceInstanceState* instance) {
531 if (!instance->started_data_sources.empty())
532 return;
533
534 RemoveBufferWatermarkWatches(instance);
535 instance->cpu_readers.clear();
536 if (instance == &primary_) {
537 cpu_zero_stats_fd_.reset();
538 }
539 // Muxer cannot change the current_tracer until we close the trace pipe fds
540 // (i.e. per_cpu). Hence an explicit request here.
541 instance->ftrace_config_muxer->ResetCurrentTracer();
542
543 DestroyIfUnusedSeconaryInstance(instance);
544
545 // Clean up global state if done with all data sources.
546 if (!data_sources_.empty())
547 return;
548
549 if (!retain_ksyms_on_stop_) {
550 symbolizer_.Destroy();
551 }
552 retain_ksyms_on_stop_ = false;
553
554 // Note: might have never been allocated if data sources were rejected.
555 parsing_mem_.Release();
556 }
557
AddDataSource(FtraceDataSource * data_source)558 bool FtraceController::AddDataSource(FtraceDataSource* data_source) {
559 if (!ValidConfig(data_source->config()))
560 return false;
561
562 FtraceInstanceState* instance =
563 GetOrCreateInstance(data_source->config().instance_name());
564 if (!instance)
565 return false;
566
567 // note: from this point onwards, need to not leak a possibly created
568 // instance if returning early.
569
570 FtraceConfigId config_id = next_cfg_id_++;
571 if (!instance->ftrace_config_muxer->SetupConfig(
572 config_id, data_source->config(),
573 data_source->mutable_setup_errors())) {
574 DestroyIfUnusedSeconaryInstance(instance);
575 return false;
576 }
577
578 const FtraceDataSourceConfig* ds_config =
579 instance->ftrace_config_muxer->GetDataSourceConfig(config_id);
580 auto it_and_inserted = data_sources_.insert(data_source);
581 PERFETTO_DCHECK(it_and_inserted.second);
582 data_source->Initialize(config_id, ds_config);
583 return true;
584 }
585
StartDataSource(FtraceDataSource * data_source)586 bool FtraceController::StartDataSource(FtraceDataSource* data_source) {
587 PERFETTO_DCHECK(data_sources_.count(data_source) > 0);
588
589 FtraceConfigId config_id = data_source->config_id();
590 PERFETTO_CHECK(config_id);
591 const std::string& instance_name = data_source->config().instance_name();
592 FtraceInstanceState* instance = GetOrCreateInstance(instance_name);
593 PERFETTO_CHECK(instance);
594
595 if (!instance->ftrace_config_muxer->ActivateConfig(config_id))
596 return false;
597 instance->started_data_sources.insert(data_source);
598 StartIfNeeded(instance, instance_name);
599
600 // Parse kernel symbols if required by the config. This can be an expensive
601 // operation (cpu-bound for 500ms+), so delay the StartDataSource
602 // acknowledgement until after we're done. This lets a consumer wait for the
603 // expensive work to be done by waiting on the "all data sources started"
604 // fence. This helps isolate the effects of the cpu-bound work on
605 // frequency scaling of cpus when recording benchmarks (b/236143653).
606 // Note that we're already recording data into the kernel ftrace
607 // buffers while doing the symbol parsing.
608 if (data_source->config().symbolize_ksyms()) {
609 symbolizer_.GetOrCreateKernelSymbolMap();
610 // If at least one config sets the KSYMS_RETAIN flag, keep the ksysm map
611 // around in StopIfNeeded().
612 const auto KRET = FtraceConfig::KSYMS_RETAIN;
613 retain_ksyms_on_stop_ |= data_source->config().ksyms_mem_policy() == KRET;
614 }
615
616 return true;
617 }
618
RemoveDataSource(FtraceDataSource * data_source)619 void FtraceController::RemoveDataSource(FtraceDataSource* data_source) {
620 size_t removed = data_sources_.erase(data_source);
621 if (!removed)
622 return; // can happen if AddDataSource failed
623
624 FtraceInstanceState* instance =
625 GetOrCreateInstance(data_source->config().instance_name());
626 PERFETTO_CHECK(instance);
627
628 instance->ftrace_config_muxer->RemoveConfig(data_source->config_id());
629 instance->started_data_sources.erase(data_source);
630 StopIfNeeded(instance);
631 }
632
DumpFtraceStats(FtraceDataSource * data_source,FtraceStats * stats_out)633 void FtraceController::DumpFtraceStats(FtraceDataSource* data_source,
634 FtraceStats* stats_out) {
635 FtraceInstanceState* instance =
636 GetInstance(data_source->config().instance_name());
637 PERFETTO_DCHECK(instance);
638 if (!instance)
639 return;
640
641 DumpAllCpuStats(instance->ftrace_procfs.get(), stats_out);
642 if (symbolizer_.is_valid()) {
643 auto* symbol_map = symbolizer_.GetOrCreateKernelSymbolMap();
644 stats_out->kernel_symbols_parsed =
645 static_cast<uint32_t>(symbol_map->num_syms());
646 stats_out->kernel_symbols_mem_kb =
647 static_cast<uint32_t>(symbol_map->size_bytes() / 1024);
648 }
649 }
650
MaybeSnapshotFtraceClock()651 void FtraceController::MaybeSnapshotFtraceClock() {
652 if (!cpu_zero_stats_fd_)
653 return;
654
655 auto ftrace_clock = primary_.ftrace_config_muxer->ftrace_clock();
656 PERFETTO_DCHECK(ftrace_clock != protos::pbzero::FTRACE_CLOCK_UNSPECIFIED);
657
658 // Snapshot the boot clock *before* reading CPU stats so that
659 // two clocks are as close togher as possible (i.e. if it was the
660 // other way round, we'd skew by the const of string parsing).
661 ftrace_clock_snapshot_.boot_clock_ts = base::GetBootTimeNs().count();
662
663 // A value of zero will cause this snapshot to be skipped.
664 ftrace_clock_snapshot_.ftrace_clock_ts =
665 ReadFtraceNowTs(cpu_zero_stats_fd_).value_or(0);
666 }
667
668 FtraceController::PollSupport
VerifyKernelSupportForBufferWatermark()669 FtraceController::VerifyKernelSupportForBufferWatermark() {
670 struct utsname uts = {};
671 if (uname(&uts) < 0 || strcmp(uts.sysname, "Linux") != 0)
672 return PollSupport::kUnsupported;
673 if (!PollSupportedOnKernelVersion(uts.release))
674 return PollSupport::kUnsupported;
675
676 // buffer_percent exists and is writable
677 auto* tracefs = primary_.ftrace_procfs.get();
678 uint32_t current = tracefs->ReadBufferPercent();
679 if (!tracefs->SetBufferPercent(current ? current : 50)) {
680 return PollSupport::kUnsupported;
681 }
682
683 // Polling on per_cpu/cpu0/trace_pipe_raw doesn't return errors.
684 base::ScopedFile fd = tracefs->OpenPipeForCpu(0);
685 struct pollfd pollset = {};
686 pollset.fd = fd.get();
687 pollset.events = POLLIN;
688 int r = PERFETTO_EINTR(poll(&pollset, 1, 0));
689 if (r < 0 || (r > 0 && (pollset.revents & POLLERR))) {
690 return PollSupport::kUnsupported;
691 }
692 return PollSupport::kSupported;
693 }
694
695 // Check kernel version since the poll implementation has historical bugs.
696 // We're looking for at least 6.1 for the following:
697 // 42fb0a1e84ff tracing/ring-buffer: Have polling block on watermark
698 // Otherwise the poll will wake us up as soon as a single byte is in the
699 // buffer. A more conservative check would look for 6.6 for an extra fix that
700 // reduces excessive kernel-space wakeups:
701 // 1e0cb399c765 ring-buffer: Update "shortest_full" in polling
702 // However that doesn't break functionality, so we'll still use poll if
703 // requested by the config.
704 // static
PollSupportedOnKernelVersion(const char * uts_release)705 bool FtraceController::PollSupportedOnKernelVersion(const char* uts_release) {
706 int major = 0, minor = 0;
707 if (sscanf(uts_release, "%d.%d", &major, &minor) != 2) {
708 return false;
709 }
710 if (major < kPollRequiredMajorVersion ||
711 (major == kPollRequiredMajorVersion &&
712 minor < kPollRequiredMinorVersion)) {
713 // Android: opportunistically detect a few select GKI kernels that are known
714 // to have the fixes. Note: 6.1 and 6.6 GKIs are already covered by the
715 // outer check.
716 std::optional<AndroidGkiVersion> gki = ParseAndroidGkiVersion(uts_release);
717 if (!gki.has_value())
718 return false;
719 // android13-5.10.197 or higher sublevel:
720 // ef47f25e98de ring-buffer: Update "shortest_full" in polling
721 // android13-5.15.133 and
722 // android14-5.15.133 or higher sublevel:
723 // b5d00cd7db66 ring-buffer: Update "shortest_full" in polling
724 bool gki_patched =
725 (gki->release == 13 && gki->version == 5 && gki->patch_level == 10 &&
726 gki->sub_level >= 197) ||
727 ((gki->release == 13 || gki->release == 14) && gki->version == 5 &&
728 gki->patch_level == 15 && gki->sub_level >= 133);
729 return gki_patched;
730 }
731 return true;
732 }
733
GetStartedDataSourcesCount()734 size_t FtraceController::GetStartedDataSourcesCount() {
735 size_t cnt = 0;
736 ForEachInstance([&](FtraceInstanceState* instance) {
737 cnt += instance->started_data_sources.size();
738 });
739 return cnt;
740 }
741
FtraceInstanceState(std::unique_ptr<FtraceProcfs> ft,std::unique_ptr<ProtoTranslationTable> ptt,std::unique_ptr<FtraceConfigMuxer> fcm)742 FtraceController::FtraceInstanceState::FtraceInstanceState(
743 std::unique_ptr<FtraceProcfs> ft,
744 std::unique_ptr<ProtoTranslationTable> ptt,
745 std::unique_ptr<FtraceConfigMuxer> fcm)
746 : ftrace_procfs(std::move(ft)),
747 table(std::move(ptt)),
748 ftrace_config_muxer(std::move(fcm)) {}
749
GetOrCreateInstance(const std::string & instance_name)750 FtraceController::FtraceInstanceState* FtraceController::GetOrCreateInstance(
751 const std::string& instance_name) {
752 FtraceInstanceState* maybe_existing = GetInstance(instance_name);
753 if (maybe_existing)
754 return maybe_existing;
755
756 PERFETTO_DCHECK(!instance_name.empty());
757 std::unique_ptr<FtraceInstanceState> instance =
758 CreateSecondaryInstance(instance_name);
759 if (!instance)
760 return nullptr;
761
762 auto it_and_inserted = secondary_instances_.emplace(
763 std::piecewise_construct, std::forward_as_tuple(instance_name),
764 std::forward_as_tuple(std::move(instance)));
765 PERFETTO_CHECK(it_and_inserted.second);
766 return it_and_inserted.first->second.get();
767 }
768
GetInstance(const std::string & instance_name)769 FtraceController::FtraceInstanceState* FtraceController::GetInstance(
770 const std::string& instance_name) {
771 if (instance_name.empty())
772 return &primary_;
773
774 auto it = secondary_instances_.find(instance_name);
775 return it != secondary_instances_.end() ? it->second.get() : nullptr;
776 }
777
DestroyIfUnusedSeconaryInstance(FtraceInstanceState * instance)778 void FtraceController::DestroyIfUnusedSeconaryInstance(
779 FtraceInstanceState* instance) {
780 if (instance == &primary_)
781 return;
782 for (auto it = secondary_instances_.begin(); it != secondary_instances_.end();
783 ++it) {
784 if (it->second.get() == instance &&
785 instance->ftrace_config_muxer->GetDataSourcesCount() == 0) {
786 // no data sources left referencing this secondary instance
787 secondary_instances_.erase(it);
788 return;
789 }
790 }
791 PERFETTO_FATAL("Bug in ftrace instance lifetimes");
792 }
793
794 std::unique_ptr<FtraceController::FtraceInstanceState>
CreateSecondaryInstance(const std::string & instance_name)795 FtraceController::CreateSecondaryInstance(const std::string& instance_name) {
796 std::optional<std::string> instance_path = AbsolutePathForInstance(
797 primary_.ftrace_procfs->GetRootPath(), instance_name);
798 if (!instance_path.has_value()) {
799 PERFETTO_ELOG("Invalid ftrace instance name: \"%s\"",
800 instance_name.c_str());
801 return nullptr;
802 }
803
804 auto ftrace_procfs = FtraceProcfs::Create(*instance_path);
805 if (!ftrace_procfs) {
806 PERFETTO_ELOG("Failed to create ftrace procfs for \"%s\"",
807 instance_path->c_str());
808 return nullptr;
809 }
810
811 auto table = ProtoTranslationTable::Create(
812 ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
813 if (!table) {
814 PERFETTO_ELOG("Failed to create proto translation table for \"%s\"",
815 instance_path->c_str());
816 return nullptr;
817 }
818
819 // secondary instances don't support atrace and vendor tracepoint HAL
820 std::map<std::string, std::vector<GroupAndName>> vendor_evts;
821
822 auto syscalls = SyscallTable::FromCurrentArch();
823
824 auto muxer = std::make_unique<FtraceConfigMuxer>(
825 ftrace_procfs.get(), atrace_wrapper_.get(), table.get(),
826 std::move(syscalls), vendor_evts,
827 /* secondary_instance= */ true);
828 return std::make_unique<FtraceInstanceState>(
829 std::move(ftrace_procfs), std::move(table), std::move(muxer));
830 }
831
832 // TODO(rsavitski): we want to eventually add support for the default
833 // (primary_) tracefs path to be an instance itself, at which point we'll need
834 // to be careful to distinguish the tracefs mount point from the default
835 // instance path.
836 // static
AbsolutePathForInstance(const std::string & tracefs_root,const std::string & raw_cfg_name)837 std::optional<std::string> FtraceController::AbsolutePathForInstance(
838 const std::string& tracefs_root,
839 const std::string& raw_cfg_name) {
840 if (base::Contains(raw_cfg_name, '/') ||
841 base::StartsWith(raw_cfg_name, "..")) {
842 return std::nullopt;
843 }
844
845 // ARM64 pKVM hypervisor tracing emulates an instance, but is not under
846 // instances/, we special-case that name for now.
847 if (raw_cfg_name == "hyp") {
848 std::string hyp_path = tracefs_root + "hyp/";
849 PERFETTO_LOG(
850 "Config specified reserved \"hyp\" instance name, using %s for events.",
851 hyp_path.c_str());
852 return std::make_optional(hyp_path);
853 }
854
855 return tracefs_root + "instances/" + raw_cfg_name + "/";
856 }
857
858 FtraceController::Observer::~Observer() = default;
859
860 } // namespace perfetto
861