1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/traced/probes/ftrace/ftrace_controller.h"
18
19 #include <fcntl.h>
20 #include <stdint.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <sys/wait.h>
25 #include <unistd.h>
26
27 #include <array>
28 #include <string>
29 #include <utility>
30
31 #include "perfetto/base/build_config.h"
32 #include "perfetto/base/logging.h"
33 #include "perfetto/base/time.h"
34 #include "perfetto/ext/base/file_utils.h"
35 #include "perfetto/ext/base/metatrace.h"
36 #include "perfetto/ext/tracing/core/trace_writer.h"
37 #include "src/traced/probes/ftrace/atrace_hal_wrapper.h"
38 #include "src/traced/probes/ftrace/cpu_reader.h"
39 #include "src/traced/probes/ftrace/cpu_stats_parser.h"
40 #include "src/traced/probes/ftrace/discover_vendor_tracepoints.h"
41 #include "src/traced/probes/ftrace/event_info.h"
42 #include "src/traced/probes/ftrace/ftrace_config_muxer.h"
43 #include "src/traced/probes/ftrace/ftrace_data_source.h"
44 #include "src/traced/probes/ftrace/ftrace_metadata.h"
45 #include "src/traced/probes/ftrace/ftrace_procfs.h"
46 #include "src/traced/probes/ftrace/ftrace_stats.h"
47 #include "src/traced/probes/ftrace/proto_translation_table.h"
48
49 namespace perfetto {
50 namespace {
51
52 constexpr int kDefaultDrainPeriodMs = 100;
53 constexpr int kMinDrainPeriodMs = 1;
54 constexpr int kMaxDrainPeriodMs = 1000 * 60;
55
56 // Read at most this many pages of data per cpu per read task. If we hit this
57 // limit on at least one cpu, we stop and repost the read task, letting other
58 // tasks get some cpu time before continuing reading.
59 constexpr size_t kMaxPagesPerCpuPerReadTick = 256; // 1 MB per cpu
60
61 // When reading and parsing data for a particular cpu, we do it in batches of
62 // this many pages. In other words, we'll read up to
63 // |kParsingBufferSizePages| into memory, parse them, and then repeat if we
64 // still haven't caught up to the writer. A working set of 32 pages is 128k of
65 // data, which should fit in a typical L2D cache. Furthermore, the batching
66 // limits the memory usage of traced_probes.
67 //
68 // TODO(rsavitski): consider making buffering & parsing page counts independent,
69 // should be a single counter in the cpu_reader, similar to lost_events case.
70 constexpr size_t kParsingBufferSizePages = 32;
71
ClampDrainPeriodMs(uint32_t drain_period_ms)72 uint32_t ClampDrainPeriodMs(uint32_t drain_period_ms) {
73 if (drain_period_ms == 0) {
74 return kDefaultDrainPeriodMs;
75 }
76 if (drain_period_ms < kMinDrainPeriodMs ||
77 kMaxDrainPeriodMs < drain_period_ms) {
78 PERFETTO_LOG("drain_period_ms was %u should be between %u and %u",
79 drain_period_ms, kMinDrainPeriodMs, kMaxDrainPeriodMs);
80 return kDefaultDrainPeriodMs;
81 }
82 return drain_period_ms;
83 }
84
WriteToFile(const char * path,const char * str)85 void WriteToFile(const char* path, const char* str) {
86 auto fd = base::OpenFile(path, O_WRONLY);
87 if (!fd)
88 return;
89 base::ignore_result(base::WriteAll(*fd, str, strlen(str)));
90 }
91
ClearFile(const char * path)92 void ClearFile(const char* path) {
93 auto fd = base::OpenFile(path, O_WRONLY | O_TRUNC);
94 }
95
96 } // namespace
97
98 const char* const FtraceController::kTracingPaths[] = {
99 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
100 "/sys/kernel/tracing/", "/sys/kernel/debug/tracing/", nullptr,
101 #else
102 "/sys/kernel/debug/tracing/", nullptr,
103 #endif
104 };
105
106 // Method of last resort to reset ftrace state.
107 // We don't know what state the rest of the system and process is so as far
108 // as possible avoid allocations.
HardResetFtraceState()109 void HardResetFtraceState() {
110 PERFETTO_LOG("Hard resetting ftrace state.");
111
112 WriteToFile("/sys/kernel/debug/tracing/tracing_on", "0");
113 WriteToFile("/sys/kernel/debug/tracing/buffer_size_kb", "4");
114 WriteToFile("/sys/kernel/debug/tracing/events/enable", "0");
115 ClearFile("/sys/kernel/debug/tracing/trace");
116
117 WriteToFile("/sys/kernel/tracing/tracing_on", "0");
118 WriteToFile("/sys/kernel/tracing/buffer_size_kb", "4");
119 WriteToFile("/sys/kernel/tracing/events/enable", "0");
120 ClearFile("/sys/kernel/tracing/trace");
121 }
122
123 // static
124 // TODO(taylori): Add a test for tracing paths in integration tests.
Create(base::TaskRunner * runner,Observer * observer)125 std::unique_ptr<FtraceController> FtraceController::Create(
126 base::TaskRunner* runner,
127 Observer* observer) {
128 size_t index = 0;
129 std::unique_ptr<FtraceProcfs> ftrace_procfs = nullptr;
130 while (!ftrace_procfs && kTracingPaths[index]) {
131 ftrace_procfs = FtraceProcfs::Create(kTracingPaths[index++]);
132 }
133
134 if (!ftrace_procfs)
135 return nullptr;
136
137 auto table = ProtoTranslationTable::Create(
138 ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
139
140 if (!table)
141 return nullptr;
142
143 AtraceHalWrapper hal;
144 auto vendor_evts =
145 vendor_tracepoints::DiscoverVendorTracepoints(&hal, ftrace_procfs.get());
146
147 std::unique_ptr<FtraceConfigMuxer> model = std::unique_ptr<FtraceConfigMuxer>(
148 new FtraceConfigMuxer(ftrace_procfs.get(), table.get(), vendor_evts));
149 return std::unique_ptr<FtraceController>(
150 new FtraceController(std::move(ftrace_procfs), std::move(table),
151 std::move(model), runner, observer));
152 }
153
FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,std::unique_ptr<ProtoTranslationTable> table,std::unique_ptr<FtraceConfigMuxer> model,base::TaskRunner * task_runner,Observer * observer)154 FtraceController::FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,
155 std::unique_ptr<ProtoTranslationTable> table,
156 std::unique_ptr<FtraceConfigMuxer> model,
157 base::TaskRunner* task_runner,
158 Observer* observer)
159 : task_runner_(task_runner),
160 observer_(observer),
161 ftrace_procfs_(std::move(ftrace_procfs)),
162 table_(std::move(table)),
163 ftrace_config_muxer_(std::move(model)),
164 weak_factory_(this) {}
165
~FtraceController()166 FtraceController::~FtraceController() {
167 for (const auto* data_source : data_sources_)
168 ftrace_config_muxer_->RemoveConfig(data_source->config_id());
169 data_sources_.clear();
170 started_data_sources_.clear();
171 StopIfNeeded();
172 }
173
NowMs() const174 uint64_t FtraceController::NowMs() const {
175 return static_cast<uint64_t>(base::GetWallTimeMs().count());
176 }
177
StartIfNeeded()178 void FtraceController::StartIfNeeded() {
179 if (started_data_sources_.size() > 1)
180 return;
181 PERFETTO_DCHECK(!started_data_sources_.empty());
182 PERFETTO_DCHECK(per_cpu_.empty());
183
184 // Lazily allocate the memory used for reading & parsing ftrace.
185 if (!parsing_mem_.IsValid()) {
186 parsing_mem_ =
187 base::PagedMemory::Allocate(base::kPageSize * kParsingBufferSizePages);
188 }
189
190 per_cpu_.clear();
191 per_cpu_.reserve(ftrace_procfs_->NumberOfCpus());
192 size_t period_page_quota = ftrace_config_muxer_->GetPerCpuBufferSizePages();
193 for (size_t cpu = 0; cpu < ftrace_procfs_->NumberOfCpus(); cpu++) {
194 auto reader = std::unique_ptr<CpuReader>(
195 new CpuReader(cpu, table_.get(), ftrace_procfs_->OpenPipeForCpu(cpu)));
196 per_cpu_.emplace_back(std::move(reader), period_page_quota);
197 }
198
199 // Start the repeating read tasks.
200 auto generation = ++generation_;
201 auto drain_period_ms = GetDrainPeriodMs();
202 auto weak_this = weak_factory_.GetWeakPtr();
203 task_runner_->PostDelayedTask(
204 [weak_this, generation] {
205 if (weak_this)
206 weak_this->ReadTick(generation);
207 },
208 drain_period_ms - (NowMs() % drain_period_ms));
209 }
210
211 // We handle the ftrace buffers in a repeating task (ReadTick). On a given tick,
212 // we iterate over all per-cpu buffers, parse their contents, and then write out
213 // the serialized packets. This is handled by |CpuReader| instances, which
214 // attempt to read from their respective per-cpu buffer fd until they catch up
215 // to the head of the buffer, or hit a transient error.
216 //
217 // The readers work in batches of |kParsingBufferSizePages| pages for cache
218 // locality, and to limit memory usage.
219 //
220 // However, the reading happens on the primary thread, shared with the rest of
221 // the service (including ipc). If there is a lot of ftrace data to read, we
222 // want to yield to the event loop, re-enqueueing a continuation task at the end
223 // of the immediate queue (letting other enqueued tasks to run before
224 // continuing). Therefore we introduce |kMaxPagesPerCpuPerReadTick|.
225 //
226 // There is also a possibility that the ftrace bandwidth is particularly high.
227 // We do not want to continue trying to catch up to the event stream (via
228 // continuation tasks) without bound, as we want to limit our cpu% usage. We
229 // assume that given a config saying "per-cpu kernel ftrace buffer is N pages,
230 // and drain every T milliseconds", we should not read more than N pages per
231 // drain period. Therefore we introduce |per_cpu_.period_page_quota|. If the
232 // consumer wants to handle a high bandwidth of ftrace events, they should set
233 // the config values appropriately.
ReadTick(int generation)234 void FtraceController::ReadTick(int generation) {
235 metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
236 metatrace::FTRACE_READ_TICK);
237 if (started_data_sources_.empty() || generation != generation_) {
238 return;
239 }
240
241 // Read all cpu buffers with remaining per-period quota.
242 bool all_cpus_done = true;
243 uint8_t* parsing_buf = reinterpret_cast<uint8_t*>(parsing_mem_.Get());
244 for (size_t i = 0; i < per_cpu_.size(); i++) {
245 size_t orig_quota = per_cpu_[i].period_page_quota;
246 if (orig_quota == 0)
247 continue;
248
249 size_t max_pages = std::min(orig_quota, kMaxPagesPerCpuPerReadTick);
250 size_t pages_read = per_cpu_[i].reader->ReadCycle(
251 parsing_buf, kParsingBufferSizePages, max_pages, started_data_sources_);
252
253 size_t new_quota = (pages_read >= orig_quota) ? 0 : orig_quota - pages_read;
254 per_cpu_[i].period_page_quota = new_quota;
255
256 // Reader got stopped by the cap on the number of pages (to not do too much
257 // work on the shared thread at once), but can read more in this drain
258 // period. Repost the ReadTick (on the immediate queue) to iterate over all
259 // cpus again. In other words, we will keep reposting work for all cpus as
260 // long as at least one of them hits the read page cap each tick. If all
261 // readers catch up to the event stream (pages_read < max_pages), or exceed
262 // their quota, we will stop for the given period.
263 PERFETTO_DCHECK(pages_read <= max_pages);
264 if (pages_read == max_pages && new_quota > 0)
265 all_cpus_done = false;
266 }
267 observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
268
269 // More work to do in this period.
270 auto weak_this = weak_factory_.GetWeakPtr();
271 if (!all_cpus_done) {
272 PERFETTO_DLOG("Reposting immediate ReadTick as there's more work.");
273 task_runner_->PostTask([weak_this, generation] {
274 if (weak_this)
275 weak_this->ReadTick(generation);
276 });
277 } else {
278 // Done until next drain period.
279 size_t period_page_quota = ftrace_config_muxer_->GetPerCpuBufferSizePages();
280 for (auto& per_cpu : per_cpu_)
281 per_cpu.period_page_quota = period_page_quota;
282
283 auto drain_period_ms = GetDrainPeriodMs();
284 task_runner_->PostDelayedTask(
285 [weak_this, generation] {
286 if (weak_this)
287 weak_this->ReadTick(generation);
288 },
289 drain_period_ms - (NowMs() % drain_period_ms));
290 }
291 }
292
GetDrainPeriodMs()293 uint32_t FtraceController::GetDrainPeriodMs() {
294 if (data_sources_.empty())
295 return kDefaultDrainPeriodMs;
296 uint32_t min_drain_period_ms = kMaxDrainPeriodMs + 1;
297 for (const FtraceDataSource* data_source : data_sources_) {
298 if (data_source->config().drain_period_ms() < min_drain_period_ms)
299 min_drain_period_ms = data_source->config().drain_period_ms();
300 }
301 return ClampDrainPeriodMs(min_drain_period_ms);
302 }
303
ClearTrace()304 void FtraceController::ClearTrace() {
305 ftrace_procfs_->ClearTrace();
306 }
307
DisableAllEvents()308 void FtraceController::DisableAllEvents() {
309 ftrace_procfs_->DisableAllEvents();
310 }
311
WriteTraceMarker(const std::string & s)312 void FtraceController::WriteTraceMarker(const std::string& s) {
313 ftrace_procfs_->WriteTraceMarker(s);
314 }
315
Flush(FlushRequestID flush_id)316 void FtraceController::Flush(FlushRequestID flush_id) {
317 metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
318 metatrace::FTRACE_CPU_FLUSH);
319
320 // Read all cpus in one go, limiting the per-cpu read amount to make sure we
321 // don't get stuck chasing the writer if there's a very high bandwidth of
322 // events.
323 size_t per_cpu_buf_size_pages =
324 ftrace_config_muxer_->GetPerCpuBufferSizePages();
325 uint8_t* parsing_buf = reinterpret_cast<uint8_t*>(parsing_mem_.Get());
326 for (size_t i = 0; i < per_cpu_.size(); i++) {
327 per_cpu_[i].reader->ReadCycle(parsing_buf, kParsingBufferSizePages,
328 per_cpu_buf_size_pages,
329 started_data_sources_);
330 }
331 observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
332
333 for (FtraceDataSource* data_source : started_data_sources_)
334 data_source->OnFtraceFlushComplete(flush_id);
335 }
336
StopIfNeeded()337 void FtraceController::StopIfNeeded() {
338 if (!started_data_sources_.empty())
339 return;
340
341 // We are not implicitly flushing on Stop. The tracing service is supposed to
342 // ask for an explicit flush before stopping, unless it needs to perform a
343 // non-graceful stop.
344
345 per_cpu_.clear();
346
347 if (parsing_mem_.IsValid()) {
348 parsing_mem_.AdviseDontNeed(parsing_mem_.Get(), parsing_mem_.size());
349 }
350 }
351
AddDataSource(FtraceDataSource * data_source)352 bool FtraceController::AddDataSource(FtraceDataSource* data_source) {
353 if (!ValidConfig(data_source->config()))
354 return false;
355
356 auto config_id = ftrace_config_muxer_->SetupConfig(data_source->config());
357 if (!config_id)
358 return false;
359
360 const FtraceDataSourceConfig* ds_config =
361 ftrace_config_muxer_->GetDataSourceConfig(config_id);
362 auto it_and_inserted = data_sources_.insert(data_source);
363 PERFETTO_DCHECK(it_and_inserted.second);
364 data_source->Initialize(config_id, ds_config);
365 return true;
366 }
367
StartDataSource(FtraceDataSource * data_source)368 bool FtraceController::StartDataSource(FtraceDataSource* data_source) {
369 PERFETTO_DCHECK(data_sources_.count(data_source) > 0);
370
371 FtraceConfigId config_id = data_source->config_id();
372 PERFETTO_CHECK(config_id);
373
374 if (!ftrace_config_muxer_->ActivateConfig(config_id))
375 return false;
376
377 started_data_sources_.insert(data_source);
378 StartIfNeeded();
379 return true;
380 }
381
RemoveDataSource(FtraceDataSource * data_source)382 void FtraceController::RemoveDataSource(FtraceDataSource* data_source) {
383 started_data_sources_.erase(data_source);
384 size_t removed = data_sources_.erase(data_source);
385 if (!removed)
386 return; // Can happen if AddDataSource failed (e.g. too many sessions).
387 ftrace_config_muxer_->RemoveConfig(data_source->config_id());
388 StopIfNeeded();
389 }
390
DumpFtraceStats(FtraceStats * stats)391 void FtraceController::DumpFtraceStats(FtraceStats* stats) {
392 DumpAllCpuStats(ftrace_procfs_.get(), stats);
393 }
394
395 FtraceController::Observer::~Observer() = default;
396
397 } // namespace perfetto
398