1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/traced/probes/ftrace/ftrace_controller.h"
18
19 #include <fcntl.h>
20 #include <stdint.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <sys/wait.h>
25 #include <unistd.h>
26
27 #include <array>
28 #include <string>
29 #include <utility>
30
31 #include "perfetto/base/build_config.h"
32 #include "perfetto/base/logging.h"
33 #include "perfetto/base/time.h"
34 #include "perfetto/ext/base/file_utils.h"
35 #include "perfetto/ext/base/metatrace.h"
36 #include "perfetto/ext/tracing/core/trace_writer.h"
37 #include "src/kallsyms/kernel_symbol_map.h"
38 #include "src/kallsyms/lazy_kernel_symbolizer.h"
39 #include "src/traced/probes/ftrace/atrace_hal_wrapper.h"
40 #include "src/traced/probes/ftrace/cpu_reader.h"
41 #include "src/traced/probes/ftrace/cpu_stats_parser.h"
42 #include "src/traced/probes/ftrace/discover_vendor_tracepoints.h"
43 #include "src/traced/probes/ftrace/event_info.h"
44 #include "src/traced/probes/ftrace/ftrace_config_muxer.h"
45 #include "src/traced/probes/ftrace/ftrace_data_source.h"
46 #include "src/traced/probes/ftrace/ftrace_metadata.h"
47 #include "src/traced/probes/ftrace/ftrace_procfs.h"
48 #include "src/traced/probes/ftrace/ftrace_stats.h"
49 #include "src/traced/probes/ftrace/proto_translation_table.h"
50
51 namespace perfetto {
52 namespace {
53
54 constexpr int kDefaultDrainPeriodMs = 100;
55 constexpr int kMinDrainPeriodMs = 1;
56 constexpr int kMaxDrainPeriodMs = 1000 * 60;
57
58 // Read at most this many pages of data per cpu per read task. If we hit this
59 // limit on at least one cpu, we stop and repost the read task, letting other
60 // tasks get some cpu time before continuing reading.
61 constexpr size_t kMaxPagesPerCpuPerReadTick = 256; // 1 MB per cpu
62
63 // When reading and parsing data for a particular cpu, we do it in batches of
64 // this many pages. In other words, we'll read up to
65 // |kParsingBufferSizePages| into memory, parse them, and then repeat if we
66 // still haven't caught up to the writer. A working set of 32 pages is 128k of
67 // data, which should fit in a typical L2D cache. Furthermore, the batching
68 // limits the memory usage of traced_probes.
69 //
70 // TODO(rsavitski): consider making buffering & parsing page counts independent,
71 // should be a single counter in the cpu_reader, similar to lost_events case.
72 constexpr size_t kParsingBufferSizePages = 32;
73
ClampDrainPeriodMs(uint32_t drain_period_ms)74 uint32_t ClampDrainPeriodMs(uint32_t drain_period_ms) {
75 if (drain_period_ms == 0) {
76 return kDefaultDrainPeriodMs;
77 }
78 if (drain_period_ms < kMinDrainPeriodMs ||
79 kMaxDrainPeriodMs < drain_period_ms) {
80 PERFETTO_LOG("drain_period_ms was %u should be between %u and %u",
81 drain_period_ms, kMinDrainPeriodMs, kMaxDrainPeriodMs);
82 return kDefaultDrainPeriodMs;
83 }
84 return drain_period_ms;
85 }
86
WriteToFile(const char * path,const char * str)87 void WriteToFile(const char* path, const char* str) {
88 auto fd = base::OpenFile(path, O_WRONLY);
89 if (!fd)
90 return;
91 base::ignore_result(base::WriteAll(*fd, str, strlen(str)));
92 }
93
ClearFile(const char * path)94 void ClearFile(const char* path) {
95 auto fd = base::OpenFile(path, O_WRONLY | O_TRUNC);
96 }
97
98 } // namespace
99
100 // Method of last resort to reset ftrace state.
101 // We don't know what state the rest of the system and process is so as far
102 // as possible avoid allocations.
HardResetFtraceState()103 void HardResetFtraceState() {
104 PERFETTO_LOG("Hard resetting ftrace state.");
105
106 WriteToFile("/sys/kernel/debug/tracing/tracing_on", "0");
107 WriteToFile("/sys/kernel/debug/tracing/buffer_size_kb", "4");
108 WriteToFile("/sys/kernel/debug/tracing/events/enable", "0");
109 ClearFile("/sys/kernel/debug/tracing/trace");
110
111 WriteToFile("/sys/kernel/tracing/tracing_on", "0");
112 WriteToFile("/sys/kernel/tracing/buffer_size_kb", "4");
113 WriteToFile("/sys/kernel/tracing/events/enable", "0");
114 ClearFile("/sys/kernel/tracing/trace");
115 }
116
117 // static
Create(base::TaskRunner * runner,Observer * observer)118 std::unique_ptr<FtraceController> FtraceController::Create(
119 base::TaskRunner* runner,
120 Observer* observer) {
121 std::unique_ptr<FtraceProcfs> ftrace_procfs =
122 FtraceProcfs::CreateGuessingMountPoint();
123
124 if (!ftrace_procfs)
125 return nullptr;
126
127 auto table = ProtoTranslationTable::Create(
128 ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
129
130 if (!table)
131 return nullptr;
132
133 AtraceHalWrapper hal;
134 auto vendor_evts =
135 vendor_tracepoints::DiscoverVendorTracepoints(&hal, ftrace_procfs.get());
136
137 std::unique_ptr<FtraceConfigMuxer> model = std::unique_ptr<FtraceConfigMuxer>(
138 new FtraceConfigMuxer(ftrace_procfs.get(), table.get(), vendor_evts));
139 return std::unique_ptr<FtraceController>(
140 new FtraceController(std::move(ftrace_procfs), std::move(table),
141 std::move(model), runner, observer));
142 }
143
FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,std::unique_ptr<ProtoTranslationTable> table,std::unique_ptr<FtraceConfigMuxer> model,base::TaskRunner * task_runner,Observer * observer)144 FtraceController::FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,
145 std::unique_ptr<ProtoTranslationTable> table,
146 std::unique_ptr<FtraceConfigMuxer> model,
147 base::TaskRunner* task_runner,
148 Observer* observer)
149 : task_runner_(task_runner),
150 observer_(observer),
151 symbolizer_(new LazyKernelSymbolizer()),
152 ftrace_procfs_(std::move(ftrace_procfs)),
153 table_(std::move(table)),
154 ftrace_config_muxer_(std::move(model)),
155 weak_factory_(this) {}
156
~FtraceController()157 FtraceController::~FtraceController() {
158 for (const auto* data_source : data_sources_)
159 ftrace_config_muxer_->RemoveConfig(data_source->config_id());
160 data_sources_.clear();
161 started_data_sources_.clear();
162 StopIfNeeded();
163 }
164
NowMs() const165 uint64_t FtraceController::NowMs() const {
166 return static_cast<uint64_t>(base::GetWallTimeMs().count());
167 }
168
StartIfNeeded()169 void FtraceController::StartIfNeeded() {
170 if (started_data_sources_.size() > 1)
171 return;
172 PERFETTO_DCHECK(!started_data_sources_.empty());
173 PERFETTO_DCHECK(per_cpu_.empty());
174
175 // Lazily allocate the memory used for reading & parsing ftrace.
176 if (!parsing_mem_.IsValid()) {
177 parsing_mem_ =
178 base::PagedMemory::Allocate(base::kPageSize * kParsingBufferSizePages);
179 }
180
181 per_cpu_.clear();
182 per_cpu_.reserve(ftrace_procfs_->NumberOfCpus());
183 size_t period_page_quota = ftrace_config_muxer_->GetPerCpuBufferSizePages();
184 for (size_t cpu = 0; cpu < ftrace_procfs_->NumberOfCpus(); cpu++) {
185 auto reader = std::unique_ptr<CpuReader>(
186 new CpuReader(cpu, table_.get(), symbolizer_.get(),
187 ftrace_procfs_->OpenPipeForCpu(cpu)));
188 per_cpu_.emplace_back(std::move(reader), period_page_quota);
189 }
190
191 // Start the repeating read tasks.
192 auto generation = ++generation_;
193 auto drain_period_ms = GetDrainPeriodMs();
194 auto weak_this = weak_factory_.GetWeakPtr();
195 task_runner_->PostDelayedTask(
196 [weak_this, generation] {
197 if (weak_this)
198 weak_this->ReadTick(generation);
199 },
200 drain_period_ms - (NowMs() % drain_period_ms));
201 }
202
203 // We handle the ftrace buffers in a repeating task (ReadTick). On a given tick,
204 // we iterate over all per-cpu buffers, parse their contents, and then write out
205 // the serialized packets. This is handled by |CpuReader| instances, which
206 // attempt to read from their respective per-cpu buffer fd until they catch up
207 // to the head of the buffer, or hit a transient error.
208 //
209 // The readers work in batches of |kParsingBufferSizePages| pages for cache
210 // locality, and to limit memory usage.
211 //
212 // However, the reading happens on the primary thread, shared with the rest of
213 // the service (including ipc). If there is a lot of ftrace data to read, we
214 // want to yield to the event loop, re-enqueueing a continuation task at the end
215 // of the immediate queue (letting other enqueued tasks to run before
216 // continuing). Therefore we introduce |kMaxPagesPerCpuPerReadTick|.
217 //
218 // There is also a possibility that the ftrace bandwidth is particularly high.
219 // We do not want to continue trying to catch up to the event stream (via
220 // continuation tasks) without bound, as we want to limit our cpu% usage. We
221 // assume that given a config saying "per-cpu kernel ftrace buffer is N pages,
222 // and drain every T milliseconds", we should not read more than N pages per
223 // drain period. Therefore we introduce |per_cpu_.period_page_quota|. If the
224 // consumer wants to handle a high bandwidth of ftrace events, they should set
225 // the config values appropriately.
ReadTick(int generation)226 void FtraceController::ReadTick(int generation) {
227 metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
228 metatrace::FTRACE_READ_TICK);
229 if (started_data_sources_.empty() || generation != generation_) {
230 return;
231 }
232
233 #if PERFETTO_DCHECK_IS_ON()
234 // The OnFtraceDataWrittenIntoDataSourceBuffers() below is supposed to clear
235 // all metadata, including the |kernel_addrs| map for symbolization.
236 for (FtraceDataSource* ds : started_data_sources_) {
237 FtraceMetadata* ftrace_metadata = ds->mutable_metadata();
238 PERFETTO_DCHECK(ftrace_metadata->kernel_addrs.empty());
239 PERFETTO_DCHECK(ftrace_metadata->last_kernel_addr_index_written == 0);
240 }
241 #endif
242
243 // Read all cpu buffers with remaining per-period quota.
244 bool all_cpus_done = true;
245 uint8_t* parsing_buf = reinterpret_cast<uint8_t*>(parsing_mem_.Get());
246 for (size_t i = 0; i < per_cpu_.size(); i++) {
247 size_t orig_quota = per_cpu_[i].period_page_quota;
248 if (orig_quota == 0)
249 continue;
250
251 size_t max_pages = std::min(orig_quota, kMaxPagesPerCpuPerReadTick);
252 size_t pages_read = per_cpu_[i].reader->ReadCycle(
253 parsing_buf, kParsingBufferSizePages, max_pages, started_data_sources_);
254
255 size_t new_quota = (pages_read >= orig_quota) ? 0 : orig_quota - pages_read;
256 per_cpu_[i].period_page_quota = new_quota;
257
258 // Reader got stopped by the cap on the number of pages (to not do too much
259 // work on the shared thread at once), but can read more in this drain
260 // period. Repost the ReadTick (on the immediate queue) to iterate over all
261 // cpus again. In other words, we will keep reposting work for all cpus as
262 // long as at least one of them hits the read page cap each tick. If all
263 // readers catch up to the event stream (pages_read < max_pages), or exceed
264 // their quota, we will stop for the given period.
265 PERFETTO_DCHECK(pages_read <= max_pages);
266 if (pages_read == max_pages && new_quota > 0)
267 all_cpus_done = false;
268 }
269 observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
270
271 // More work to do in this period.
272 auto weak_this = weak_factory_.GetWeakPtr();
273 if (!all_cpus_done) {
274 PERFETTO_DLOG("Reposting immediate ReadTick as there's more work.");
275 task_runner_->PostTask([weak_this, generation] {
276 if (weak_this)
277 weak_this->ReadTick(generation);
278 });
279 } else {
280 // Done until next drain period.
281 size_t period_page_quota = ftrace_config_muxer_->GetPerCpuBufferSizePages();
282 for (auto& per_cpu : per_cpu_)
283 per_cpu.period_page_quota = period_page_quota;
284
285 auto drain_period_ms = GetDrainPeriodMs();
286 task_runner_->PostDelayedTask(
287 [weak_this, generation] {
288 if (weak_this)
289 weak_this->ReadTick(generation);
290 },
291 drain_period_ms - (NowMs() % drain_period_ms));
292 }
293 }
294
GetDrainPeriodMs()295 uint32_t FtraceController::GetDrainPeriodMs() {
296 if (data_sources_.empty())
297 return kDefaultDrainPeriodMs;
298 uint32_t min_drain_period_ms = kMaxDrainPeriodMs + 1;
299 for (const FtraceDataSource* data_source : data_sources_) {
300 if (data_source->config().drain_period_ms() < min_drain_period_ms)
301 min_drain_period_ms = data_source->config().drain_period_ms();
302 }
303 return ClampDrainPeriodMs(min_drain_period_ms);
304 }
305
ClearTrace()306 void FtraceController::ClearTrace() {
307 ftrace_procfs_->ClearTrace();
308 }
309
DisableAllEvents()310 void FtraceController::DisableAllEvents() {
311 ftrace_procfs_->DisableAllEvents();
312 }
313
WriteTraceMarker(const std::string & s)314 void FtraceController::WriteTraceMarker(const std::string& s) {
315 ftrace_procfs_->WriteTraceMarker(s);
316 }
317
Flush(FlushRequestID flush_id)318 void FtraceController::Flush(FlushRequestID flush_id) {
319 metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
320 metatrace::FTRACE_CPU_FLUSH);
321
322 // Read all cpus in one go, limiting the per-cpu read amount to make sure we
323 // don't get stuck chasing the writer if there's a very high bandwidth of
324 // events.
325 size_t per_cpu_buf_size_pages =
326 ftrace_config_muxer_->GetPerCpuBufferSizePages();
327 uint8_t* parsing_buf = reinterpret_cast<uint8_t*>(parsing_mem_.Get());
328 for (size_t i = 0; i < per_cpu_.size(); i++) {
329 per_cpu_[i].reader->ReadCycle(parsing_buf, kParsingBufferSizePages,
330 per_cpu_buf_size_pages,
331 started_data_sources_);
332 }
333 observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
334
335 for (FtraceDataSource* data_source : started_data_sources_)
336 data_source->OnFtraceFlushComplete(flush_id);
337 }
338
StopIfNeeded()339 void FtraceController::StopIfNeeded() {
340 if (!started_data_sources_.empty())
341 return;
342
343 // We are not implicitly flushing on Stop. The tracing service is supposed to
344 // ask for an explicit flush before stopping, unless it needs to perform a
345 // non-graceful stop.
346
347 per_cpu_.clear();
348 symbolizer_->Destroy();
349
350 if (parsing_mem_.IsValid()) {
351 parsing_mem_.AdviseDontNeed(parsing_mem_.Get(), parsing_mem_.size());
352 }
353 }
354
AddDataSource(FtraceDataSource * data_source)355 bool FtraceController::AddDataSource(FtraceDataSource* data_source) {
356 if (!ValidConfig(data_source->config()))
357 return false;
358
359 auto config_id = ftrace_config_muxer_->SetupConfig(data_source->config());
360 if (!config_id)
361 return false;
362
363 const FtraceDataSourceConfig* ds_config =
364 ftrace_config_muxer_->GetDataSourceConfig(config_id);
365 auto it_and_inserted = data_sources_.insert(data_source);
366 PERFETTO_DCHECK(it_and_inserted.second);
367 data_source->Initialize(config_id, ds_config);
368 return true;
369 }
370
StartDataSource(FtraceDataSource * data_source)371 bool FtraceController::StartDataSource(FtraceDataSource* data_source) {
372 PERFETTO_DCHECK(data_sources_.count(data_source) > 0);
373
374 FtraceConfigId config_id = data_source->config_id();
375 PERFETTO_CHECK(config_id);
376
377 if (!ftrace_config_muxer_->ActivateConfig(config_id))
378 return false;
379
380 started_data_sources_.insert(data_source);
381 StartIfNeeded();
382
383 // If the config is requesting to symbolize kernel addresses, create the
384 // symbolizer and parse /proc/kallsyms (it will take 200-300 ms). This is not
385 // strictly required here but is to avoid hitting the parsing cost while
386 // processing the first ftrace event batch in CpuReader.
387 if (data_source->config().symbolize_ksyms()) {
388 if (data_source->config().initialize_ksyms_synchronously_for_testing()) {
389 symbolizer_->GetOrCreateKernelSymbolMap();
390 } else {
391 auto weak_this = weak_factory_.GetWeakPtr();
392 task_runner_->PostTask([weak_this] {
393 if (weak_this)
394 weak_this->symbolizer_->GetOrCreateKernelSymbolMap();
395 });
396 }
397 }
398
399 return true;
400 }
401
RemoveDataSource(FtraceDataSource * data_source)402 void FtraceController::RemoveDataSource(FtraceDataSource* data_source) {
403 started_data_sources_.erase(data_source);
404 size_t removed = data_sources_.erase(data_source);
405 if (!removed)
406 return; // Can happen if AddDataSource failed (e.g. too many sessions).
407 ftrace_config_muxer_->RemoveConfig(data_source->config_id());
408 StopIfNeeded();
409 }
410
DumpFtraceStats(FtraceStats * stats)411 void FtraceController::DumpFtraceStats(FtraceStats* stats) {
412 DumpAllCpuStats(ftrace_procfs_.get(), stats);
413 if (symbolizer_ && symbolizer_->is_valid()) {
414 auto* symbol_map = symbolizer_->GetOrCreateKernelSymbolMap();
415 stats->kernel_symbols_parsed =
416 static_cast<uint32_t>(symbol_map->num_syms());
417 stats->kernel_symbols_mem_kb =
418 static_cast<uint32_t>(symbol_map->size_bytes() / 1024);
419 }
420 }
421
422 FtraceController::Observer::~Observer() = default;
423
424 } // namespace perfetto
425