1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/traced/probes/ftrace/ftrace_controller.h"
18
19 #include <fcntl.h>
20 #include <stdint.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <sys/wait.h>
25 #include <unistd.h>
26
27 #include <array>
28 #include <string>
29 #include <utility>
30
31 #include "perfetto/base/build_config.h"
32 #include "perfetto/base/file_utils.h"
33 #include "perfetto/base/logging.h"
34 #include "perfetto/base/metatrace.h"
35 #include "perfetto/base/time.h"
36 #include "perfetto/tracing/core/trace_writer.h"
37 #include "src/traced/probes/ftrace/cpu_reader.h"
38 #include "src/traced/probes/ftrace/cpu_stats_parser.h"
39 #include "src/traced/probes/ftrace/event_info.h"
40 #include "src/traced/probes/ftrace/ftrace_config_muxer.h"
41 #include "src/traced/probes/ftrace/ftrace_data_source.h"
42 #include "src/traced/probes/ftrace/ftrace_metadata.h"
43 #include "src/traced/probes/ftrace/ftrace_procfs.h"
44 #include "src/traced/probes/ftrace/ftrace_stats.h"
45 #include "src/traced/probes/ftrace/proto_translation_table.h"
46
47 namespace perfetto {
48 namespace {
49
50 constexpr int kDefaultDrainPeriodMs = 100;
51 constexpr int kControllerFlushTimeoutMs = 500;
52 constexpr int kMinDrainPeriodMs = 1;
53 constexpr int kMaxDrainPeriodMs = 1000 * 60;
54
ClampDrainPeriodMs(uint32_t drain_period_ms)55 uint32_t ClampDrainPeriodMs(uint32_t drain_period_ms) {
56 if (drain_period_ms == 0) {
57 return kDefaultDrainPeriodMs;
58 }
59 if (drain_period_ms < kMinDrainPeriodMs ||
60 kMaxDrainPeriodMs < drain_period_ms) {
61 PERFETTO_LOG("drain_period_ms was %u should be between %u and %u",
62 drain_period_ms, kMinDrainPeriodMs, kMaxDrainPeriodMs);
63 return kDefaultDrainPeriodMs;
64 }
65 return drain_period_ms;
66 }
67
WriteToFile(const char * path,const char * str)68 void WriteToFile(const char* path, const char* str) {
69 auto fd = base::OpenFile(path, O_WRONLY);
70 if (!fd)
71 return;
72 base::ignore_result(base::WriteAll(*fd, str, strlen(str)));
73 }
74
ClearFile(const char * path)75 void ClearFile(const char* path) {
76 auto fd = base::OpenFile(path, O_WRONLY | O_TRUNC);
77 }
78
79 } // namespace
80
81 const char* const FtraceController::kTracingPaths[] = {
82 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
83 "/sys/kernel/tracing/", "/sys/kernel/debug/tracing/", nullptr,
84 #else
85 "/sys/kernel/debug/tracing/", nullptr,
86 #endif
87 };
88
89 // Method of last resort to reset ftrace state.
90 // We don't know what state the rest of the system and process is so as far
91 // as possible avoid allocations.
HardResetFtraceState()92 void HardResetFtraceState() {
93 WriteToFile("/sys/kernel/debug/tracing/tracing_on", "0");
94 WriteToFile("/sys/kernel/debug/tracing/buffer_size_kb", "4");
95 WriteToFile("/sys/kernel/debug/tracing/events/enable", "0");
96 ClearFile("/sys/kernel/debug/tracing/trace");
97
98 WriteToFile("/sys/kernel/tracing/tracing_on", "0");
99 WriteToFile("/sys/kernel/tracing/buffer_size_kb", "4");
100 WriteToFile("/sys/kernel/tracing/events/enable", "0");
101 ClearFile("/sys/kernel/tracing/trace");
102 }
103
104 // static
105 // TODO(taylori): Add a test for tracing paths in integration tests.
Create(base::TaskRunner * runner,Observer * observer)106 std::unique_ptr<FtraceController> FtraceController::Create(
107 base::TaskRunner* runner,
108 Observer* observer) {
109 size_t index = 0;
110 std::unique_ptr<FtraceProcfs> ftrace_procfs = nullptr;
111 while (!ftrace_procfs && kTracingPaths[index]) {
112 ftrace_procfs = FtraceProcfs::Create(kTracingPaths[index++]);
113 }
114
115 if (!ftrace_procfs)
116 return nullptr;
117
118 auto table = ProtoTranslationTable::Create(
119 ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
120
121 if (!table)
122 return nullptr;
123
124 std::unique_ptr<FtraceConfigMuxer> model = std::unique_ptr<FtraceConfigMuxer>(
125 new FtraceConfigMuxer(ftrace_procfs.get(), table.get()));
126 return std::unique_ptr<FtraceController>(
127 new FtraceController(std::move(ftrace_procfs), std::move(table),
128 std::move(model), runner, observer));
129 }
130
FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,std::unique_ptr<ProtoTranslationTable> table,std::unique_ptr<FtraceConfigMuxer> model,base::TaskRunner * task_runner,Observer * observer)131 FtraceController::FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,
132 std::unique_ptr<ProtoTranslationTable> table,
133 std::unique_ptr<FtraceConfigMuxer> model,
134 base::TaskRunner* task_runner,
135 Observer* observer)
136 : task_runner_(task_runner),
137 observer_(observer),
138 thread_sync_(task_runner),
139 ftrace_procfs_(std::move(ftrace_procfs)),
140 table_(std::move(table)),
141 ftrace_config_muxer_(std::move(model)),
142 weak_factory_(this) {
143 thread_sync_.trace_controller_weak = GetWeakPtr();
144 }
145
~FtraceController()146 FtraceController::~FtraceController() {
147 PERFETTO_DCHECK_THREAD(thread_checker_);
148 for (const auto* data_source : data_sources_)
149 ftrace_config_muxer_->RemoveConfig(data_source->config_id());
150 data_sources_.clear();
151 started_data_sources_.clear();
152 StopIfNeeded();
153 }
154
NowMs() const155 uint64_t FtraceController::NowMs() const {
156 return static_cast<uint64_t>(base::GetWallTimeMs().count());
157 }
158
159 // The OnCpuReader* methods below are called on the CpuReader worker threads.
160 // Lifetime is guaranteed to be valid, because the FtraceController dtor
161 // (that happens on the main thread) joins the worker threads.
162
163 // static
OnCpuReaderRead(size_t cpu,int generation,FtraceThreadSync * thread_sync)164 void FtraceController::OnCpuReaderRead(size_t cpu,
165 int generation,
166 FtraceThreadSync* thread_sync) {
167 PERFETTO_METATRACE("OnCpuReaderRead()", cpu);
168
169 {
170 std::lock_guard<std::mutex> lock(thread_sync->mutex);
171 // If this was the first CPU to wake up, schedule a drain for the next
172 // drain interval.
173 bool post_drain_task = thread_sync->cpus_to_drain.none();
174 thread_sync->cpus_to_drain[cpu] = true;
175 if (!post_drain_task)
176 return;
177 } // lock(thread_sync_.mutex)
178
179 base::WeakPtr<FtraceController> weak_ctl = thread_sync->trace_controller_weak;
180 base::TaskRunner* task_runner = thread_sync->task_runner;
181
182 // The nested PostTask is used because the FtraceController (and hence
183 // GetDrainPeriodMs()) can be called only on the main thread.
184 task_runner->PostTask([weak_ctl, task_runner, generation] {
185
186 if (!weak_ctl)
187 return;
188 uint32_t drain_period_ms = weak_ctl->GetDrainPeriodMs();
189
190 task_runner->PostDelayedTask(
191 [weak_ctl, generation] {
192 if (weak_ctl)
193 weak_ctl->DrainCPUs(generation);
194 },
195 drain_period_ms - (weak_ctl->NowMs() % drain_period_ms));
196
197 });
198 }
199
200 // static
OnCpuReaderFlush(size_t cpu,int generation,FtraceThreadSync * thread_sync)201 void FtraceController::OnCpuReaderFlush(size_t cpu,
202 int generation,
203 FtraceThreadSync* thread_sync) {
204 // In the case of a flush, we want to drain the data as quickly as possible to
205 // minimize the flush latency, at the cost of more tasks / wakeups (eventually
206 // one task per cpu). Flushes are not supposed to happen too frequently.
207 {
208 std::lock_guard<std::mutex> lock(thread_sync->mutex);
209 thread_sync->cpus_to_drain[cpu] = true;
210 thread_sync->flush_acks[cpu] = true;
211 } // lock(thread_sync_.mutex)
212
213 base::WeakPtr<FtraceController> weak_ctl = thread_sync->trace_controller_weak;
214 thread_sync->task_runner->PostTask([weak_ctl, generation] {
215 if (weak_ctl)
216 weak_ctl->DrainCPUs(generation);
217 });
218 }
219
DrainCPUs(int generation)220 void FtraceController::DrainCPUs(int generation) {
221 PERFETTO_DCHECK_THREAD(thread_checker_);
222 PERFETTO_METATRACE("DrainCPUs()", base::MetaTrace::kMainThreadCpu);
223
224 if (generation != generation_)
225 return;
226
227 const size_t num_cpus = ftrace_procfs_->NumberOfCpus();
228 PERFETTO_DCHECK(cpu_readers_.size() == num_cpus);
229 FlushRequestID ack_flush_request_id = 0;
230 std::bitset<base::kMaxCpus> cpus_to_drain;
231 {
232 std::lock_guard<std::mutex> lock(thread_sync_.mutex);
233 std::swap(cpus_to_drain, thread_sync_.cpus_to_drain);
234
235 // Check also if a flush is pending and if all cpus have acked. If that's
236 // the case, ack the overall Flush() request at the end of this function.
237 if (cur_flush_request_id_ && thread_sync_.flush_acks.count() >= num_cpus) {
238 thread_sync_.flush_acks.reset();
239 ack_flush_request_id = cur_flush_request_id_;
240 cur_flush_request_id_ = 0;
241 }
242 }
243
244 for (size_t cpu = 0; cpu < num_cpus; cpu++) {
245 if (!cpus_to_drain[cpu])
246 continue;
247 // This method reads the pipe and converts the raw ftrace data into
248 // protobufs using the |data_source|'s TraceWriter.
249 cpu_readers_[cpu]->Drain(started_data_sources_);
250 OnDrainCpuForTesting(cpu);
251 }
252
253 // If we filled up any SHM pages while draining the data, we will have posted
254 // a task to notify traced about this. Only unblock the readers after this
255 // notification is sent to make it less likely that they steal CPU time away
256 // from traced. Also, don't unblock the readers until all of them have replied
257 // to the flush.
258 if (!cur_flush_request_id_) {
259 base::WeakPtr<FtraceController> weak_this = weak_factory_.GetWeakPtr();
260 task_runner_->PostTask([weak_this] {
261 if (weak_this)
262 weak_this->UnblockReaders();
263 });
264 }
265
266 observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
267
268 if (ack_flush_request_id) {
269 // Flush completed, all CpuReader(s) acked.
270
271 IssueThreadSyncCmd(FtraceThreadSync::kRun); // Switch back to reading mode.
272
273 // This will call FtraceDataSource::OnFtraceFlushComplete(), which in turn
274 // will flush the userspace buffers and ack the flush to the ProbesProducer
275 // which in turn will ack the flush to the tracing service.
276 NotifyFlushCompleteToStartedDataSources(ack_flush_request_id);
277 }
278 }
279
UnblockReaders()280 void FtraceController::UnblockReaders() {
281 PERFETTO_METATRACE("UnblockReaders()", base::MetaTrace::kMainThreadCpu);
282
283 // If a flush or a quit is pending, do nothing.
284 std::unique_lock<std::mutex> lock(thread_sync_.mutex);
285 if (thread_sync_.cmd != FtraceThreadSync::kRun)
286 return;
287
288 // Unblock all waiting readers to start moving more data into their
289 // respective staging pipes.
290 IssueThreadSyncCmd(FtraceThreadSync::kRun, std::move(lock));
291 }
292
StartIfNeeded()293 void FtraceController::StartIfNeeded() {
294 if (started_data_sources_.size() > 1)
295 return;
296 PERFETTO_DCHECK(!started_data_sources_.empty());
297 PERFETTO_DCHECK(cpu_readers_.empty());
298 base::WeakPtr<FtraceController> weak_this = weak_factory_.GetWeakPtr();
299
300 {
301 std::lock_guard<std::mutex> lock(thread_sync_.mutex);
302 thread_sync_.cpus_to_drain.reset();
303 thread_sync_.cmd = FtraceThreadSync::kRun;
304 thread_sync_.cmd_id++;
305 }
306
307 generation_++;
308 cpu_readers_.clear();
309 cpu_readers_.reserve(ftrace_procfs_->NumberOfCpus());
310 for (size_t cpu = 0; cpu < ftrace_procfs_->NumberOfCpus(); cpu++) {
311 cpu_readers_.emplace_back(
312 new CpuReader(table_.get(), &thread_sync_, cpu, generation_,
313 ftrace_procfs_->OpenPipeForCpu(cpu)));
314 }
315 }
316
GetDrainPeriodMs()317 uint32_t FtraceController::GetDrainPeriodMs() {
318 if (data_sources_.empty())
319 return kDefaultDrainPeriodMs;
320 uint32_t min_drain_period_ms = kMaxDrainPeriodMs + 1;
321 for (const FtraceDataSource* data_source : data_sources_) {
322 if (data_source->config().drain_period_ms() < min_drain_period_ms)
323 min_drain_period_ms = data_source->config().drain_period_ms();
324 }
325 return ClampDrainPeriodMs(min_drain_period_ms);
326 }
327
ClearTrace()328 void FtraceController::ClearTrace() {
329 ftrace_procfs_->ClearTrace();
330 }
331
DisableAllEvents()332 void FtraceController::DisableAllEvents() {
333 ftrace_procfs_->DisableAllEvents();
334 }
335
WriteTraceMarker(const std::string & s)336 void FtraceController::WriteTraceMarker(const std::string& s) {
337 ftrace_procfs_->WriteTraceMarker(s);
338 }
339
Flush(FlushRequestID flush_id)340 void FtraceController::Flush(FlushRequestID flush_id) {
341 PERFETTO_DCHECK_THREAD(thread_checker_);
342
343 if (flush_id == cur_flush_request_id_)
344 return; // Already dealing with this flush request.
345
346 cur_flush_request_id_ = flush_id;
347 {
348 std::unique_lock<std::mutex> lock(thread_sync_.mutex);
349 thread_sync_.flush_acks.reset();
350 IssueThreadSyncCmd(FtraceThreadSync::kFlush, std::move(lock));
351 }
352
353 base::WeakPtr<FtraceController> weak_this = weak_factory_.GetWeakPtr();
354 task_runner_->PostDelayedTask(
355 [weak_this, flush_id] {
356 if (weak_this)
357 weak_this->OnFlushTimeout(flush_id);
358 },
359 kControllerFlushTimeoutMs);
360 }
361
OnFlushTimeout(FlushRequestID flush_request_id)362 void FtraceController::OnFlushTimeout(FlushRequestID flush_request_id) {
363 if (flush_request_id != cur_flush_request_id_)
364 return;
365
366 std::string acks; // For debugging purposes only.
367 {
368 // Unlock the cpu readers and move on.
369 std::unique_lock<std::mutex> lock(thread_sync_.mutex);
370 acks = thread_sync_.flush_acks.to_string();
371 thread_sync_.flush_acks.reset();
372 if (thread_sync_.cmd == FtraceThreadSync::kFlush)
373 IssueThreadSyncCmd(FtraceThreadSync::kRun, std::move(lock));
374 }
375
376 PERFETTO_ELOG("Ftrace flush(%" PRIu64 ") timed out. Acked cpus mask: [%s]",
377 flush_request_id, acks.c_str());
378 cur_flush_request_id_ = 0;
379 NotifyFlushCompleteToStartedDataSources(flush_request_id);
380 }
381
StopIfNeeded()382 void FtraceController::StopIfNeeded() {
383 if (!started_data_sources_.empty())
384 return;
385
386 // We are not implicitly flushing on Stop. The tracing service is supposed to
387 // ask for an explicit flush before stopping, unless it needs to perform a
388 // non-graceful stop.
389
390 IssueThreadSyncCmd(FtraceThreadSync::kQuit);
391
392 // Destroying the CpuReader(s) will join on their worker threads.
393 cpu_readers_.clear();
394 generation_++;
395 }
396
AddDataSource(FtraceDataSource * data_source)397 bool FtraceController::AddDataSource(FtraceDataSource* data_source) {
398 PERFETTO_DCHECK_THREAD(thread_checker_);
399 if (!ValidConfig(data_source->config()))
400 return false;
401
402 auto config_id = ftrace_config_muxer_->SetupConfig(data_source->config());
403 if (!config_id)
404 return false;
405
406 const EventFilter* filter = ftrace_config_muxer_->GetEventFilter(config_id);
407 auto it_and_inserted = data_sources_.insert(data_source);
408 PERFETTO_DCHECK(it_and_inserted.second);
409 data_source->Initialize(config_id, filter);
410 return true;
411 }
412
StartDataSource(FtraceDataSource * data_source)413 bool FtraceController::StartDataSource(FtraceDataSource* data_source) {
414 PERFETTO_DCHECK_THREAD(thread_checker_);
415 PERFETTO_DCHECK(data_sources_.count(data_source) > 0);
416
417 FtraceConfigId config_id = data_source->config_id();
418 PERFETTO_CHECK(config_id);
419
420 if (!ftrace_config_muxer_->ActivateConfig(config_id))
421 return false;
422
423 started_data_sources_.insert(data_source);
424 StartIfNeeded();
425 return true;
426 }
427
RemoveDataSource(FtraceDataSource * data_source)428 void FtraceController::RemoveDataSource(FtraceDataSource* data_source) {
429 PERFETTO_DCHECK_THREAD(thread_checker_);
430 started_data_sources_.erase(data_source);
431 size_t removed = data_sources_.erase(data_source);
432 if (!removed)
433 return; // Can happen if AddDataSource failed (e.g. too many sessions).
434 ftrace_config_muxer_->RemoveConfig(data_source->config_id());
435 StopIfNeeded();
436 }
437
DumpFtraceStats(FtraceStats * stats)438 void FtraceController::DumpFtraceStats(FtraceStats* stats) {
439 DumpAllCpuStats(ftrace_procfs_.get(), stats);
440 }
441
IssueThreadSyncCmd(FtraceThreadSync::Cmd cmd,std::unique_lock<std::mutex> pass_lock_from_caller)442 void FtraceController::IssueThreadSyncCmd(
443 FtraceThreadSync::Cmd cmd,
444 std::unique_lock<std::mutex> pass_lock_from_caller) {
445 PERFETTO_DCHECK_THREAD(thread_checker_);
446 {
447 std::unique_lock<std::mutex> lock(std::move(pass_lock_from_caller));
448 if (!lock.owns_lock())
449 lock = std::unique_lock<std::mutex>(thread_sync_.mutex);
450
451 if (thread_sync_.cmd == FtraceThreadSync::kQuit &&
452 cmd != FtraceThreadSync::kQuit) {
453 // If in kQuit state, we should never issue any other commands.
454 return;
455 }
456
457 thread_sync_.cmd = cmd;
458 thread_sync_.cmd_id++;
459 }
460
461 // Send a SIGPIPE to all worker threads to wake them up if they are sitting in
462 // a blocking splice(). If they are not and instead they are sitting in the
463 // cond-variable.wait(), this, together with the one below, will have at best
464 // the same effect of a spurious wakeup, depending on the implementation of
465 // the condition variable.
466 for (const auto& cpu_reader : cpu_readers_)
467 cpu_reader->InterruptWorkerThreadWithSignal();
468
469 thread_sync_.cond.notify_all();
470 }
471
NotifyFlushCompleteToStartedDataSources(FlushRequestID flush_request_id)472 void FtraceController::NotifyFlushCompleteToStartedDataSources(
473 FlushRequestID flush_request_id) {
474 PERFETTO_DCHECK_THREAD(thread_checker_);
475 for (FtraceDataSource* data_source : started_data_sources_)
476 data_source->OnFtraceFlushComplete(flush_request_id);
477 }
478
479 FtraceController::Observer::~Observer() = default;
480
481 } // namespace perfetto
482