1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/traced/probes/ftrace/ftrace_controller.h"
18
19 #include <fcntl.h>
20 #include <stdint.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <sys/wait.h>
25 #include <unistd.h>
26
27 #include <array>
28 #include <string>
29 #include <utility>
30
31 #include "perfetto/base/build_config.h"
32 #include "perfetto/base/logging.h"
33 #include "perfetto/base/time.h"
34 #include "perfetto/ext/base/file_utils.h"
35 #include "perfetto/ext/base/metatrace.h"
36 #include "perfetto/ext/base/string_utils.h"
37 #include "perfetto/ext/tracing/core/trace_writer.h"
38 #include "src/kallsyms/kernel_symbol_map.h"
39 #include "src/kallsyms/lazy_kernel_symbolizer.h"
40 #include "src/traced/probes/ftrace/atrace_hal_wrapper.h"
41 #include "src/traced/probes/ftrace/cpu_reader.h"
42 #include "src/traced/probes/ftrace/cpu_stats_parser.h"
43 #include "src/traced/probes/ftrace/discover_vendor_tracepoints.h"
44 #include "src/traced/probes/ftrace/event_info.h"
45 #include "src/traced/probes/ftrace/ftrace_config_muxer.h"
46 #include "src/traced/probes/ftrace/ftrace_data_source.h"
47 #include "src/traced/probes/ftrace/ftrace_metadata.h"
48 #include "src/traced/probes/ftrace/ftrace_procfs.h"
49 #include "src/traced/probes/ftrace/ftrace_stats.h"
50 #include "src/traced/probes/ftrace/proto_translation_table.h"
51
52 namespace perfetto {
53 namespace {
54
55 constexpr int kDefaultDrainPeriodMs = 100;
56 constexpr int kMinDrainPeriodMs = 1;
57 constexpr int kMaxDrainPeriodMs = 1000 * 60;
58
59 // Read at most this many pages of data per cpu per read task. If we hit this
60 // limit on at least one cpu, we stop and repost the read task, letting other
61 // tasks get some cpu time before continuing reading.
62 constexpr size_t kMaxPagesPerCpuPerReadTick = 256; // 1 MB per cpu
63
64 // When reading and parsing data for a particular cpu, we do it in batches of
65 // this many pages. In other words, we'll read up to
66 // |kParsingBufferSizePages| into memory, parse them, and then repeat if we
67 // still haven't caught up to the writer. A working set of 32 pages is 128k of
68 // data, which should fit in a typical L2D cache. Furthermore, the batching
69 // limits the memory usage of traced_probes.
70 //
71 // TODO(rsavitski): consider making buffering & parsing page counts independent,
72 // should be a single counter in the cpu_reader, similar to lost_events case.
73 constexpr size_t kParsingBufferSizePages = 32;
74
ClampDrainPeriodMs(uint32_t drain_period_ms)75 uint32_t ClampDrainPeriodMs(uint32_t drain_period_ms) {
76 if (drain_period_ms == 0) {
77 return kDefaultDrainPeriodMs;
78 }
79 if (drain_period_ms < kMinDrainPeriodMs ||
80 kMaxDrainPeriodMs < drain_period_ms) {
81 PERFETTO_LOG("drain_period_ms was %u should be between %u and %u",
82 drain_period_ms, kMinDrainPeriodMs, kMaxDrainPeriodMs);
83 return kDefaultDrainPeriodMs;
84 }
85 return drain_period_ms;
86 }
87
WriteToFile(const char * path,const char * str)88 bool WriteToFile(const char* path, const char* str) {
89 auto fd = base::OpenFile(path, O_WRONLY);
90 if (!fd)
91 return false;
92 const size_t str_len = strlen(str);
93 return base::WriteAll(*fd, str, str_len) == static_cast<ssize_t>(str_len);
94 }
95
ClearFile(const char * path)96 bool ClearFile(const char* path) {
97 auto fd = base::OpenFile(path, O_WRONLY | O_TRUNC);
98 return !!fd;
99 }
100
ReadFtraceNowTs(const base::ScopedFile & cpu_stats_fd)101 base::Optional<int64_t> ReadFtraceNowTs(const base::ScopedFile& cpu_stats_fd) {
102 PERFETTO_CHECK(cpu_stats_fd);
103
104 char buf[512];
105 ssize_t res = PERFETTO_EINTR(pread(*cpu_stats_fd, buf, sizeof(buf) - 1, 0));
106 if (res <= 0)
107 return base::nullopt;
108 buf[res] = '\0';
109
110 FtraceCpuStats stats{};
111 DumpCpuStats(buf, &stats);
112 return static_cast<int64_t>(stats.now_ts * 1000 * 1000 * 1000);
113 }
114
115 } // namespace
116
117 // Method of last resort to reset ftrace state.
118 // We don't know what state the rest of the system and process is so as far
119 // as possible avoid allocations.
HardResetFtraceState()120 bool HardResetFtraceState() {
121 for (const char* const* item = FtraceProcfs::kTracingPaths; *item; ++item) {
122 std::string prefix(*item);
123 PERFETTO_CHECK(base::EndsWith(prefix, "/"));
124 bool res = true;
125 res &= WriteToFile((prefix + "tracing_on").c_str(), "0");
126 res &= WriteToFile((prefix + "buffer_size_kb").c_str(), "4");
127 // We deliberately don't check for this as on some older versions of Android
128 // events/enable was not writable by the shell user.
129 WriteToFile((prefix + "events/enable").c_str(), "0");
130 res &= ClearFile((prefix + "trace").c_str());
131 if (res)
132 return true;
133 }
134 return false;
135 }
136
137 // static
Create(base::TaskRunner * runner,Observer * observer)138 std::unique_ptr<FtraceController> FtraceController::Create(
139 base::TaskRunner* runner,
140 Observer* observer) {
141 std::unique_ptr<FtraceProcfs> ftrace_procfs =
142 FtraceProcfs::CreateGuessingMountPoint();
143
144 if (!ftrace_procfs)
145 return nullptr;
146
147 auto table = ProtoTranslationTable::Create(
148 ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
149
150 if (!table)
151 return nullptr;
152
153 AtraceHalWrapper hal;
154 auto vendor_evts =
155 vendor_tracepoints::DiscoverVendorTracepoints(&hal, ftrace_procfs.get());
156
157 std::unique_ptr<FtraceConfigMuxer> model = std::unique_ptr<FtraceConfigMuxer>(
158 new FtraceConfigMuxer(ftrace_procfs.get(), table.get(), vendor_evts));
159 return std::unique_ptr<FtraceController>(
160 new FtraceController(std::move(ftrace_procfs), std::move(table),
161 std::move(model), runner, observer));
162 }
163
FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,std::unique_ptr<ProtoTranslationTable> table,std::unique_ptr<FtraceConfigMuxer> model,base::TaskRunner * task_runner,Observer * observer)164 FtraceController::FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,
165 std::unique_ptr<ProtoTranslationTable> table,
166 std::unique_ptr<FtraceConfigMuxer> model,
167 base::TaskRunner* task_runner,
168 Observer* observer)
169 : task_runner_(task_runner),
170 observer_(observer),
171 symbolizer_(new LazyKernelSymbolizer()),
172 ftrace_procfs_(std::move(ftrace_procfs)),
173 table_(std::move(table)),
174 ftrace_config_muxer_(std::move(model)),
175 ftrace_clock_snapshot_(new FtraceClockSnapshot()),
176 weak_factory_(this) {}
177
~FtraceController()178 FtraceController::~FtraceController() {
179 for (const auto* data_source : data_sources_)
180 ftrace_config_muxer_->RemoveConfig(data_source->config_id());
181 data_sources_.clear();
182 started_data_sources_.clear();
183 StopIfNeeded();
184 }
185
NowMs() const186 uint64_t FtraceController::NowMs() const {
187 return static_cast<uint64_t>(base::GetWallTimeMs().count());
188 }
189
StartIfNeeded()190 void FtraceController::StartIfNeeded() {
191 using FtraceClock = protos::pbzero::FtraceClock;
192 if (started_data_sources_.size() > 1)
193 return;
194 PERFETTO_DCHECK(!started_data_sources_.empty());
195 PERFETTO_DCHECK(per_cpu_.empty());
196
197 // Lazily allocate the memory used for reading & parsing ftrace.
198 if (!parsing_mem_.IsValid()) {
199 parsing_mem_ =
200 base::PagedMemory::Allocate(base::kPageSize * kParsingBufferSizePages);
201 }
202
203 // If we're not using the boot clock, snapshot the ftrace clock.
204 FtraceClock clock = ftrace_config_muxer_->ftrace_clock();
205 if (clock != FtraceClock::FTRACE_CLOCK_UNSPECIFIED) {
206 cpu_zero_stats_fd_ = ftrace_procfs_->OpenCpuStats(0 /* cpu */);
207 MaybeSnapshotFtraceClock();
208 }
209
210 per_cpu_.clear();
211 per_cpu_.reserve(ftrace_procfs_->NumberOfCpus());
212 size_t period_page_quota = ftrace_config_muxer_->GetPerCpuBufferSizePages();
213 for (size_t cpu = 0; cpu < ftrace_procfs_->NumberOfCpus(); cpu++) {
214 auto reader = std::unique_ptr<CpuReader>(new CpuReader(
215 cpu, table_.get(), symbolizer_.get(), ftrace_clock_snapshot_.get(),
216 ftrace_procfs_->OpenPipeForCpu(cpu)));
217 per_cpu_.emplace_back(std::move(reader), period_page_quota);
218 }
219
220 // Start the repeating read tasks.
221 auto generation = ++generation_;
222 auto drain_period_ms = GetDrainPeriodMs();
223 auto weak_this = weak_factory_.GetWeakPtr();
224 task_runner_->PostDelayedTask(
225 [weak_this, generation] {
226 if (weak_this)
227 weak_this->ReadTick(generation);
228 },
229 drain_period_ms - (NowMs() % drain_period_ms));
230 }
231
232 // We handle the ftrace buffers in a repeating task (ReadTick). On a given tick,
233 // we iterate over all per-cpu buffers, parse their contents, and then write out
234 // the serialized packets. This is handled by |CpuReader| instances, which
235 // attempt to read from their respective per-cpu buffer fd until they catch up
236 // to the head of the buffer, or hit a transient error.
237 //
238 // The readers work in batches of |kParsingBufferSizePages| pages for cache
239 // locality, and to limit memory usage.
240 //
241 // However, the reading happens on the primary thread, shared with the rest of
242 // the service (including ipc). If there is a lot of ftrace data to read, we
243 // want to yield to the event loop, re-enqueueing a continuation task at the end
244 // of the immediate queue (letting other enqueued tasks to run before
245 // continuing). Therefore we introduce |kMaxPagesPerCpuPerReadTick|.
246 //
247 // There is also a possibility that the ftrace bandwidth is particularly high.
248 // We do not want to continue trying to catch up to the event stream (via
249 // continuation tasks) without bound, as we want to limit our cpu% usage. We
250 // assume that given a config saying "per-cpu kernel ftrace buffer is N pages,
251 // and drain every T milliseconds", we should not read more than N pages per
252 // drain period. Therefore we introduce |per_cpu_.period_page_quota|. If the
253 // consumer wants to handle a high bandwidth of ftrace events, they should set
254 // the config values appropriately.
ReadTick(int generation)255 void FtraceController::ReadTick(int generation) {
256 metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
257 metatrace::FTRACE_READ_TICK);
258 if (started_data_sources_.empty() || generation != generation_) {
259 return;
260 }
261
262 #if PERFETTO_DCHECK_IS_ON()
263 // The OnFtraceDataWrittenIntoDataSourceBuffers() below is supposed to clear
264 // all metadata, including the |kernel_addrs| map for symbolization.
265 for (FtraceDataSource* ds : started_data_sources_) {
266 FtraceMetadata* ftrace_metadata = ds->mutable_metadata();
267 PERFETTO_DCHECK(ftrace_metadata->kernel_addrs.empty());
268 PERFETTO_DCHECK(ftrace_metadata->last_kernel_addr_index_written == 0);
269 }
270 #endif
271
272 // Read all cpu buffers with remaining per-period quota.
273 bool all_cpus_done = true;
274 uint8_t* parsing_buf = reinterpret_cast<uint8_t*>(parsing_mem_.Get());
275 const auto ftrace_clock = ftrace_config_muxer_->ftrace_clock();
276 for (size_t i = 0; i < per_cpu_.size(); i++) {
277 size_t orig_quota = per_cpu_[i].period_page_quota;
278 if (orig_quota == 0)
279 continue;
280
281 size_t max_pages = std::min(orig_quota, kMaxPagesPerCpuPerReadTick);
282 CpuReader& cpu_reader = *per_cpu_[i].reader;
283 cpu_reader.set_ftrace_clock(ftrace_clock);
284 size_t pages_read = cpu_reader.ReadCycle(
285 parsing_buf, kParsingBufferSizePages, max_pages, started_data_sources_);
286
287 size_t new_quota = (pages_read >= orig_quota) ? 0 : orig_quota - pages_read;
288 per_cpu_[i].period_page_quota = new_quota;
289
290 // Reader got stopped by the cap on the number of pages (to not do too much
291 // work on the shared thread at once), but can read more in this drain
292 // period. Repost the ReadTick (on the immediate queue) to iterate over all
293 // cpus again. In other words, we will keep reposting work for all cpus as
294 // long as at least one of them hits the read page cap each tick. If all
295 // readers catch up to the event stream (pages_read < max_pages), or exceed
296 // their quota, we will stop for the given period.
297 PERFETTO_DCHECK(pages_read <= max_pages);
298 if (pages_read == max_pages && new_quota > 0)
299 all_cpus_done = false;
300 }
301 observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
302
303 // More work to do in this period.
304 auto weak_this = weak_factory_.GetWeakPtr();
305 if (!all_cpus_done) {
306 PERFETTO_DLOG("Reposting immediate ReadTick as there's more work.");
307 task_runner_->PostTask([weak_this, generation] {
308 if (weak_this)
309 weak_this->ReadTick(generation);
310 });
311 } else {
312 // Done until next drain period.
313 size_t period_page_quota = ftrace_config_muxer_->GetPerCpuBufferSizePages();
314 for (auto& per_cpu : per_cpu_)
315 per_cpu.period_page_quota = period_page_quota;
316
317 // Snapshot the clock so the data in the next period will be clock synced as
318 // well.
319 MaybeSnapshotFtraceClock();
320
321 auto drain_period_ms = GetDrainPeriodMs();
322 task_runner_->PostDelayedTask(
323 [weak_this, generation] {
324 if (weak_this)
325 weak_this->ReadTick(generation);
326 },
327 drain_period_ms - (NowMs() % drain_period_ms));
328 }
329 }
330
GetDrainPeriodMs()331 uint32_t FtraceController::GetDrainPeriodMs() {
332 if (data_sources_.empty())
333 return kDefaultDrainPeriodMs;
334 uint32_t min_drain_period_ms = kMaxDrainPeriodMs + 1;
335 for (const FtraceDataSource* data_source : data_sources_) {
336 if (data_source->config().drain_period_ms() < min_drain_period_ms)
337 min_drain_period_ms = data_source->config().drain_period_ms();
338 }
339 return ClampDrainPeriodMs(min_drain_period_ms);
340 }
341
ClearTrace()342 void FtraceController::ClearTrace() {
343 ftrace_procfs_->ClearTrace();
344 }
345
DisableAllEvents()346 void FtraceController::DisableAllEvents() {
347 ftrace_procfs_->DisableAllEvents();
348 }
349
WriteTraceMarker(const std::string & s)350 void FtraceController::WriteTraceMarker(const std::string& s) {
351 ftrace_procfs_->WriteTraceMarker(s);
352 }
353
Flush(FlushRequestID flush_id)354 void FtraceController::Flush(FlushRequestID flush_id) {
355 metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
356 metatrace::FTRACE_CPU_FLUSH);
357
358 // Read all cpus in one go, limiting the per-cpu read amount to make sure we
359 // don't get stuck chasing the writer if there's a very high bandwidth of
360 // events.
361 size_t per_cpu_buf_size_pages =
362 ftrace_config_muxer_->GetPerCpuBufferSizePages();
363 uint8_t* parsing_buf = reinterpret_cast<uint8_t*>(parsing_mem_.Get());
364 for (size_t i = 0; i < per_cpu_.size(); i++) {
365 per_cpu_[i].reader->ReadCycle(parsing_buf, kParsingBufferSizePages,
366 per_cpu_buf_size_pages,
367 started_data_sources_);
368 }
369 observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
370
371 for (FtraceDataSource* data_source : started_data_sources_)
372 data_source->OnFtraceFlushComplete(flush_id);
373 }
374
StopIfNeeded()375 void FtraceController::StopIfNeeded() {
376 if (!started_data_sources_.empty())
377 return;
378
379 // We are not implicitly flushing on Stop. The tracing service is supposed to
380 // ask for an explicit flush before stopping, unless it needs to perform a
381 // non-graceful stop.
382
383 per_cpu_.clear();
384 symbolizer_->Destroy();
385 cpu_zero_stats_fd_.reset();
386
387 if (parsing_mem_.IsValid()) {
388 parsing_mem_.AdviseDontNeed(parsing_mem_.Get(), parsing_mem_.size());
389 }
390 }
391
AddDataSource(FtraceDataSource * data_source)392 bool FtraceController::AddDataSource(FtraceDataSource* data_source) {
393 if (!ValidConfig(data_source->config()))
394 return false;
395
396 auto config_id = ftrace_config_muxer_->SetupConfig(
397 data_source->config(), data_source->mutable_setup_errors());
398 if (!config_id)
399 return false;
400
401 const FtraceDataSourceConfig* ds_config =
402 ftrace_config_muxer_->GetDataSourceConfig(config_id);
403 auto it_and_inserted = data_sources_.insert(data_source);
404 PERFETTO_DCHECK(it_and_inserted.second);
405 data_source->Initialize(config_id, ds_config);
406 return true;
407 }
408
StartDataSource(FtraceDataSource * data_source)409 bool FtraceController::StartDataSource(FtraceDataSource* data_source) {
410 PERFETTO_DCHECK(data_sources_.count(data_source) > 0);
411
412 FtraceConfigId config_id = data_source->config_id();
413 PERFETTO_CHECK(config_id);
414
415 if (!ftrace_config_muxer_->ActivateConfig(config_id))
416 return false;
417
418 started_data_sources_.insert(data_source);
419 StartIfNeeded();
420
421 // If the config is requesting to symbolize kernel addresses, create the
422 // symbolizer and parse /proc/kallsyms (it will take 200-300 ms). This is not
423 // strictly required here but is to avoid hitting the parsing cost while
424 // processing the first ftrace event batch in CpuReader.
425 if (data_source->config().symbolize_ksyms()) {
426 if (data_source->config().initialize_ksyms_synchronously_for_testing()) {
427 symbolizer_->GetOrCreateKernelSymbolMap();
428 } else {
429 auto weak_this = weak_factory_.GetWeakPtr();
430 task_runner_->PostTask([weak_this] {
431 if (weak_this)
432 weak_this->symbolizer_->GetOrCreateKernelSymbolMap();
433 });
434 }
435 }
436
437 return true;
438 }
439
RemoveDataSource(FtraceDataSource * data_source)440 void FtraceController::RemoveDataSource(FtraceDataSource* data_source) {
441 started_data_sources_.erase(data_source);
442 size_t removed = data_sources_.erase(data_source);
443 if (!removed)
444 return; // Can happen if AddDataSource failed (e.g. too many sessions).
445 ftrace_config_muxer_->RemoveConfig(data_source->config_id());
446 StopIfNeeded();
447 }
448
DumpFtraceStats(FtraceStats * stats)449 void FtraceController::DumpFtraceStats(FtraceStats* stats) {
450 DumpAllCpuStats(ftrace_procfs_.get(), stats);
451 if (symbolizer_ && symbolizer_->is_valid()) {
452 auto* symbol_map = symbolizer_->GetOrCreateKernelSymbolMap();
453 stats->kernel_symbols_parsed =
454 static_cast<uint32_t>(symbol_map->num_syms());
455 stats->kernel_symbols_mem_kb =
456 static_cast<uint32_t>(symbol_map->size_bytes() / 1024);
457 }
458 }
459
MaybeSnapshotFtraceClock()460 void FtraceController::MaybeSnapshotFtraceClock() {
461 if (!cpu_zero_stats_fd_)
462 return;
463
464 auto ftrace_clock = ftrace_config_muxer_->ftrace_clock();
465 PERFETTO_DCHECK(ftrace_clock != protos::pbzero::FTRACE_CLOCK_UNSPECIFIED);
466
467 // Snapshot the boot clock *before* reading CPU stats so that
468 // two clocks are as close togher as possible (i.e. if it was the
469 // other way round, we'd skew by the const of string parsing).
470 ftrace_clock_snapshot_->boot_clock_ts = base::GetBootTimeNs().count();
471
472 // A value of zero will cause this snapshot to be skipped.
473 ftrace_clock_snapshot_->ftrace_clock_ts =
474 ReadFtraceNowTs(cpu_zero_stats_fd_).value_or(0);
475 }
476
477 FtraceController::Observer::~Observer() = default;
478
479 } // namespace perfetto
480