• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/traced/probes/ftrace/ftrace_controller.h"
18 
19 #include <fcntl.h>
20 #include <stdint.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24 #include <sys/wait.h>
25 #include <unistd.h>
26 
27 #include <array>
28 #include <string>
29 #include <utility>
30 
31 #include "perfetto/base/build_config.h"
32 #include "perfetto/base/logging.h"
33 #include "perfetto/base/time.h"
34 #include "perfetto/ext/base/file_utils.h"
35 #include "perfetto/ext/base/metatrace.h"
36 #include "perfetto/ext/base/string_utils.h"
37 #include "perfetto/ext/tracing/core/trace_writer.h"
38 #include "src/kallsyms/kernel_symbol_map.h"
39 #include "src/kallsyms/lazy_kernel_symbolizer.h"
40 #include "src/traced/probes/ftrace/atrace_hal_wrapper.h"
41 #include "src/traced/probes/ftrace/cpu_reader.h"
42 #include "src/traced/probes/ftrace/cpu_stats_parser.h"
43 #include "src/traced/probes/ftrace/discover_vendor_tracepoints.h"
44 #include "src/traced/probes/ftrace/event_info.h"
45 #include "src/traced/probes/ftrace/ftrace_config_muxer.h"
46 #include "src/traced/probes/ftrace/ftrace_data_source.h"
47 #include "src/traced/probes/ftrace/ftrace_metadata.h"
48 #include "src/traced/probes/ftrace/ftrace_procfs.h"
49 #include "src/traced/probes/ftrace/ftrace_stats.h"
50 #include "src/traced/probes/ftrace/proto_translation_table.h"
51 
52 namespace perfetto {
53 namespace {
54 
55 constexpr int kDefaultDrainPeriodMs = 100;
56 constexpr int kMinDrainPeriodMs = 1;
57 constexpr int kMaxDrainPeriodMs = 1000 * 60;
58 
59 // Read at most this many pages of data per cpu per read task. If we hit this
60 // limit on at least one cpu, we stop and repost the read task, letting other
61 // tasks get some cpu time before continuing reading.
62 constexpr size_t kMaxPagesPerCpuPerReadTick = 256;  // 1 MB per cpu
63 
64 // When reading and parsing data for a particular cpu, we do it in batches of
65 // this many pages. In other words, we'll read up to
66 // |kParsingBufferSizePages| into memory, parse them, and then repeat if we
67 // still haven't caught up to the writer. A working set of 32 pages is 128k of
68 // data, which should fit in a typical L2D cache. Furthermore, the batching
69 // limits the memory usage of traced_probes.
70 //
71 // TODO(rsavitski): consider making buffering & parsing page counts independent,
72 // should be a single counter in the cpu_reader, similar to lost_events case.
73 constexpr size_t kParsingBufferSizePages = 32;
74 
ClampDrainPeriodMs(uint32_t drain_period_ms)75 uint32_t ClampDrainPeriodMs(uint32_t drain_period_ms) {
76   if (drain_period_ms == 0) {
77     return kDefaultDrainPeriodMs;
78   }
79   if (drain_period_ms < kMinDrainPeriodMs ||
80       kMaxDrainPeriodMs < drain_period_ms) {
81     PERFETTO_LOG("drain_period_ms was %u should be between %u and %u",
82                  drain_period_ms, kMinDrainPeriodMs, kMaxDrainPeriodMs);
83     return kDefaultDrainPeriodMs;
84   }
85   return drain_period_ms;
86 }
87 
WriteToFile(const char * path,const char * str)88 bool WriteToFile(const char* path, const char* str) {
89   auto fd = base::OpenFile(path, O_WRONLY);
90   if (!fd)
91     return false;
92   const size_t str_len = strlen(str);
93   return base::WriteAll(*fd, str, str_len) == static_cast<ssize_t>(str_len);
94 }
95 
ClearFile(const char * path)96 bool ClearFile(const char* path) {
97   auto fd = base::OpenFile(path, O_WRONLY | O_TRUNC);
98   return !!fd;
99 }
100 
ReadFtraceNowTs(const base::ScopedFile & cpu_stats_fd)101 base::Optional<int64_t> ReadFtraceNowTs(const base::ScopedFile& cpu_stats_fd) {
102   PERFETTO_CHECK(cpu_stats_fd);
103 
104   char buf[512];
105   ssize_t res = PERFETTO_EINTR(pread(*cpu_stats_fd, buf, sizeof(buf) - 1, 0));
106   if (res <= 0)
107     return base::nullopt;
108   buf[res] = '\0';
109 
110   FtraceCpuStats stats{};
111   DumpCpuStats(buf, &stats);
112   return static_cast<int64_t>(stats.now_ts * 1000 * 1000 * 1000);
113 }
114 
115 }  // namespace
116 
117 // Method of last resort to reset ftrace state.
118 // We don't know what state the rest of the system and process is so as far
119 // as possible avoid allocations.
HardResetFtraceState()120 bool HardResetFtraceState() {
121   for (const char* const* item = FtraceProcfs::kTracingPaths; *item; ++item) {
122     std::string prefix(*item);
123     PERFETTO_CHECK(base::EndsWith(prefix, "/"));
124     bool res = true;
125     res &= WriteToFile((prefix + "tracing_on").c_str(), "0");
126     res &= WriteToFile((prefix + "buffer_size_kb").c_str(), "4");
127     // We deliberately don't check for this as on some older versions of Android
128     // events/enable was not writable by the shell user.
129     WriteToFile((prefix + "events/enable").c_str(), "0");
130     res &= ClearFile((prefix + "trace").c_str());
131     if (res)
132       return true;
133   }
134   return false;
135 }
136 
137 // static
Create(base::TaskRunner * runner,Observer * observer)138 std::unique_ptr<FtraceController> FtraceController::Create(
139     base::TaskRunner* runner,
140     Observer* observer) {
141   std::unique_ptr<FtraceProcfs> ftrace_procfs =
142       FtraceProcfs::CreateGuessingMountPoint();
143 
144   if (!ftrace_procfs)
145     return nullptr;
146 
147   auto table = ProtoTranslationTable::Create(
148       ftrace_procfs.get(), GetStaticEventInfo(), GetStaticCommonFieldsInfo());
149 
150   if (!table)
151     return nullptr;
152 
153   AtraceHalWrapper hal;
154   auto vendor_evts =
155       vendor_tracepoints::DiscoverVendorTracepoints(&hal, ftrace_procfs.get());
156 
157   std::unique_ptr<FtraceConfigMuxer> model = std::unique_ptr<FtraceConfigMuxer>(
158       new FtraceConfigMuxer(ftrace_procfs.get(), table.get(), vendor_evts));
159   return std::unique_ptr<FtraceController>(
160       new FtraceController(std::move(ftrace_procfs), std::move(table),
161                            std::move(model), runner, observer));
162 }
163 
FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,std::unique_ptr<ProtoTranslationTable> table,std::unique_ptr<FtraceConfigMuxer> model,base::TaskRunner * task_runner,Observer * observer)164 FtraceController::FtraceController(std::unique_ptr<FtraceProcfs> ftrace_procfs,
165                                    std::unique_ptr<ProtoTranslationTable> table,
166                                    std::unique_ptr<FtraceConfigMuxer> model,
167                                    base::TaskRunner* task_runner,
168                                    Observer* observer)
169     : task_runner_(task_runner),
170       observer_(observer),
171       symbolizer_(new LazyKernelSymbolizer()),
172       ftrace_procfs_(std::move(ftrace_procfs)),
173       table_(std::move(table)),
174       ftrace_config_muxer_(std::move(model)),
175       ftrace_clock_snapshot_(new FtraceClockSnapshot()),
176       weak_factory_(this) {}
177 
~FtraceController()178 FtraceController::~FtraceController() {
179   for (const auto* data_source : data_sources_)
180     ftrace_config_muxer_->RemoveConfig(data_source->config_id());
181   data_sources_.clear();
182   started_data_sources_.clear();
183   StopIfNeeded();
184 }
185 
NowMs() const186 uint64_t FtraceController::NowMs() const {
187   return static_cast<uint64_t>(base::GetWallTimeMs().count());
188 }
189 
StartIfNeeded()190 void FtraceController::StartIfNeeded() {
191   using FtraceClock = protos::pbzero::FtraceClock;
192   if (started_data_sources_.size() > 1)
193     return;
194   PERFETTO_DCHECK(!started_data_sources_.empty());
195   PERFETTO_DCHECK(per_cpu_.empty());
196 
197   // Lazily allocate the memory used for reading & parsing ftrace.
198   if (!parsing_mem_.IsValid()) {
199     parsing_mem_ =
200         base::PagedMemory::Allocate(base::kPageSize * kParsingBufferSizePages);
201   }
202 
203   // If we're not using the boot clock, snapshot the ftrace clock.
204   FtraceClock clock = ftrace_config_muxer_->ftrace_clock();
205   if (clock != FtraceClock::FTRACE_CLOCK_UNSPECIFIED) {
206     cpu_zero_stats_fd_ = ftrace_procfs_->OpenCpuStats(0 /* cpu */);
207     MaybeSnapshotFtraceClock();
208   }
209 
210   per_cpu_.clear();
211   per_cpu_.reserve(ftrace_procfs_->NumberOfCpus());
212   size_t period_page_quota = ftrace_config_muxer_->GetPerCpuBufferSizePages();
213   for (size_t cpu = 0; cpu < ftrace_procfs_->NumberOfCpus(); cpu++) {
214     auto reader = std::unique_ptr<CpuReader>(new CpuReader(
215         cpu, table_.get(), symbolizer_.get(), ftrace_clock_snapshot_.get(),
216         ftrace_procfs_->OpenPipeForCpu(cpu)));
217     per_cpu_.emplace_back(std::move(reader), period_page_quota);
218   }
219 
220   // Start the repeating read tasks.
221   auto generation = ++generation_;
222   auto drain_period_ms = GetDrainPeriodMs();
223   auto weak_this = weak_factory_.GetWeakPtr();
224   task_runner_->PostDelayedTask(
225       [weak_this, generation] {
226         if (weak_this)
227           weak_this->ReadTick(generation);
228       },
229       drain_period_ms - (NowMs() % drain_period_ms));
230 }
231 
232 // We handle the ftrace buffers in a repeating task (ReadTick). On a given tick,
233 // we iterate over all per-cpu buffers, parse their contents, and then write out
234 // the serialized packets. This is handled by |CpuReader| instances, which
235 // attempt to read from their respective per-cpu buffer fd until they catch up
236 // to the head of the buffer, or hit a transient error.
237 //
238 // The readers work in batches of |kParsingBufferSizePages| pages for cache
239 // locality, and to limit memory usage.
240 //
241 // However, the reading happens on the primary thread, shared with the rest of
242 // the service (including ipc). If there is a lot of ftrace data to read, we
243 // want to yield to the event loop, re-enqueueing a continuation task at the end
244 // of the immediate queue (letting other enqueued tasks to run before
245 // continuing). Therefore we introduce |kMaxPagesPerCpuPerReadTick|.
246 //
247 // There is also a possibility that the ftrace bandwidth is particularly high.
248 // We do not want to continue trying to catch up to the event stream (via
249 // continuation tasks) without bound, as we want to limit our cpu% usage.  We
250 // assume that given a config saying "per-cpu kernel ftrace buffer is N pages,
251 // and drain every T milliseconds", we should not read more than N pages per
252 // drain period. Therefore we introduce |per_cpu_.period_page_quota|. If the
253 // consumer wants to handle a high bandwidth of ftrace events, they should set
254 // the config values appropriately.
ReadTick(int generation)255 void FtraceController::ReadTick(int generation) {
256   metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
257                              metatrace::FTRACE_READ_TICK);
258   if (started_data_sources_.empty() || generation != generation_) {
259     return;
260   }
261 
262 #if PERFETTO_DCHECK_IS_ON()
263   // The OnFtraceDataWrittenIntoDataSourceBuffers() below is supposed to clear
264   // all metadata, including the |kernel_addrs| map for symbolization.
265   for (FtraceDataSource* ds : started_data_sources_) {
266     FtraceMetadata* ftrace_metadata = ds->mutable_metadata();
267     PERFETTO_DCHECK(ftrace_metadata->kernel_addrs.empty());
268     PERFETTO_DCHECK(ftrace_metadata->last_kernel_addr_index_written == 0);
269   }
270 #endif
271 
272   // Read all cpu buffers with remaining per-period quota.
273   bool all_cpus_done = true;
274   uint8_t* parsing_buf = reinterpret_cast<uint8_t*>(parsing_mem_.Get());
275   const auto ftrace_clock = ftrace_config_muxer_->ftrace_clock();
276   for (size_t i = 0; i < per_cpu_.size(); i++) {
277     size_t orig_quota = per_cpu_[i].period_page_quota;
278     if (orig_quota == 0)
279       continue;
280 
281     size_t max_pages = std::min(orig_quota, kMaxPagesPerCpuPerReadTick);
282     CpuReader& cpu_reader = *per_cpu_[i].reader;
283     cpu_reader.set_ftrace_clock(ftrace_clock);
284     size_t pages_read = cpu_reader.ReadCycle(
285         parsing_buf, kParsingBufferSizePages, max_pages, started_data_sources_);
286 
287     size_t new_quota = (pages_read >= orig_quota) ? 0 : orig_quota - pages_read;
288     per_cpu_[i].period_page_quota = new_quota;
289 
290     // Reader got stopped by the cap on the number of pages (to not do too much
291     // work on the shared thread at once), but can read more in this drain
292     // period. Repost the ReadTick (on the immediate queue) to iterate over all
293     // cpus again. In other words, we will keep reposting work for all cpus as
294     // long as at least one of them hits the read page cap each tick. If all
295     // readers catch up to the event stream (pages_read < max_pages), or exceed
296     // their quota, we will stop for the given period.
297     PERFETTO_DCHECK(pages_read <= max_pages);
298     if (pages_read == max_pages && new_quota > 0)
299       all_cpus_done = false;
300   }
301   observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
302 
303   // More work to do in this period.
304   auto weak_this = weak_factory_.GetWeakPtr();
305   if (!all_cpus_done) {
306     PERFETTO_DLOG("Reposting immediate ReadTick as there's more work.");
307     task_runner_->PostTask([weak_this, generation] {
308       if (weak_this)
309         weak_this->ReadTick(generation);
310     });
311   } else {
312     // Done until next drain period.
313     size_t period_page_quota = ftrace_config_muxer_->GetPerCpuBufferSizePages();
314     for (auto& per_cpu : per_cpu_)
315       per_cpu.period_page_quota = period_page_quota;
316 
317     // Snapshot the clock so the data in the next period will be clock synced as
318     // well.
319     MaybeSnapshotFtraceClock();
320 
321     auto drain_period_ms = GetDrainPeriodMs();
322     task_runner_->PostDelayedTask(
323         [weak_this, generation] {
324           if (weak_this)
325             weak_this->ReadTick(generation);
326         },
327         drain_period_ms - (NowMs() % drain_period_ms));
328   }
329 }
330 
GetDrainPeriodMs()331 uint32_t FtraceController::GetDrainPeriodMs() {
332   if (data_sources_.empty())
333     return kDefaultDrainPeriodMs;
334   uint32_t min_drain_period_ms = kMaxDrainPeriodMs + 1;
335   for (const FtraceDataSource* data_source : data_sources_) {
336     if (data_source->config().drain_period_ms() < min_drain_period_ms)
337       min_drain_period_ms = data_source->config().drain_period_ms();
338   }
339   return ClampDrainPeriodMs(min_drain_period_ms);
340 }
341 
ClearTrace()342 void FtraceController::ClearTrace() {
343   ftrace_procfs_->ClearTrace();
344 }
345 
DisableAllEvents()346 void FtraceController::DisableAllEvents() {
347   ftrace_procfs_->DisableAllEvents();
348 }
349 
WriteTraceMarker(const std::string & s)350 void FtraceController::WriteTraceMarker(const std::string& s) {
351   ftrace_procfs_->WriteTraceMarker(s);
352 }
353 
Flush(FlushRequestID flush_id)354 void FtraceController::Flush(FlushRequestID flush_id) {
355   metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
356                              metatrace::FTRACE_CPU_FLUSH);
357 
358   // Read all cpus in one go, limiting the per-cpu read amount to make sure we
359   // don't get stuck chasing the writer if there's a very high bandwidth of
360   // events.
361   size_t per_cpu_buf_size_pages =
362       ftrace_config_muxer_->GetPerCpuBufferSizePages();
363   uint8_t* parsing_buf = reinterpret_cast<uint8_t*>(parsing_mem_.Get());
364   for (size_t i = 0; i < per_cpu_.size(); i++) {
365     per_cpu_[i].reader->ReadCycle(parsing_buf, kParsingBufferSizePages,
366                                   per_cpu_buf_size_pages,
367                                   started_data_sources_);
368   }
369   observer_->OnFtraceDataWrittenIntoDataSourceBuffers();
370 
371   for (FtraceDataSource* data_source : started_data_sources_)
372     data_source->OnFtraceFlushComplete(flush_id);
373 }
374 
StopIfNeeded()375 void FtraceController::StopIfNeeded() {
376   if (!started_data_sources_.empty())
377     return;
378 
379   // We are not implicitly flushing on Stop. The tracing service is supposed to
380   // ask for an explicit flush before stopping, unless it needs to perform a
381   // non-graceful stop.
382 
383   per_cpu_.clear();
384   symbolizer_->Destroy();
385   cpu_zero_stats_fd_.reset();
386 
387   if (parsing_mem_.IsValid()) {
388     parsing_mem_.AdviseDontNeed(parsing_mem_.Get(), parsing_mem_.size());
389   }
390 }
391 
AddDataSource(FtraceDataSource * data_source)392 bool FtraceController::AddDataSource(FtraceDataSource* data_source) {
393   if (!ValidConfig(data_source->config()))
394     return false;
395 
396   auto config_id = ftrace_config_muxer_->SetupConfig(
397       data_source->config(), data_source->mutable_setup_errors());
398   if (!config_id)
399     return false;
400 
401   const FtraceDataSourceConfig* ds_config =
402       ftrace_config_muxer_->GetDataSourceConfig(config_id);
403   auto it_and_inserted = data_sources_.insert(data_source);
404   PERFETTO_DCHECK(it_and_inserted.second);
405   data_source->Initialize(config_id, ds_config);
406   return true;
407 }
408 
StartDataSource(FtraceDataSource * data_source)409 bool FtraceController::StartDataSource(FtraceDataSource* data_source) {
410   PERFETTO_DCHECK(data_sources_.count(data_source) > 0);
411 
412   FtraceConfigId config_id = data_source->config_id();
413   PERFETTO_CHECK(config_id);
414 
415   if (!ftrace_config_muxer_->ActivateConfig(config_id))
416     return false;
417 
418   started_data_sources_.insert(data_source);
419   StartIfNeeded();
420 
421   // If the config is requesting to symbolize kernel addresses, create the
422   // symbolizer and parse /proc/kallsyms (it will take 200-300 ms). This is not
423   // strictly required here but is to avoid hitting the parsing cost while
424   // processing the first ftrace event batch in CpuReader.
425   if (data_source->config().symbolize_ksyms()) {
426     if (data_source->config().initialize_ksyms_synchronously_for_testing()) {
427       symbolizer_->GetOrCreateKernelSymbolMap();
428     } else {
429       auto weak_this = weak_factory_.GetWeakPtr();
430       task_runner_->PostTask([weak_this] {
431         if (weak_this)
432           weak_this->symbolizer_->GetOrCreateKernelSymbolMap();
433       });
434     }
435   }
436 
437   return true;
438 }
439 
RemoveDataSource(FtraceDataSource * data_source)440 void FtraceController::RemoveDataSource(FtraceDataSource* data_source) {
441   started_data_sources_.erase(data_source);
442   size_t removed = data_sources_.erase(data_source);
443   if (!removed)
444     return;  // Can happen if AddDataSource failed (e.g. too many sessions).
445   ftrace_config_muxer_->RemoveConfig(data_source->config_id());
446   StopIfNeeded();
447 }
448 
DumpFtraceStats(FtraceStats * stats)449 void FtraceController::DumpFtraceStats(FtraceStats* stats) {
450   DumpAllCpuStats(ftrace_procfs_.get(), stats);
451   if (symbolizer_ && symbolizer_->is_valid()) {
452     auto* symbol_map = symbolizer_->GetOrCreateKernelSymbolMap();
453     stats->kernel_symbols_parsed =
454         static_cast<uint32_t>(symbol_map->num_syms());
455     stats->kernel_symbols_mem_kb =
456         static_cast<uint32_t>(symbol_map->size_bytes() / 1024);
457   }
458 }
459 
MaybeSnapshotFtraceClock()460 void FtraceController::MaybeSnapshotFtraceClock() {
461   if (!cpu_zero_stats_fd_)
462     return;
463 
464   auto ftrace_clock = ftrace_config_muxer_->ftrace_clock();
465   PERFETTO_DCHECK(ftrace_clock != protos::pbzero::FTRACE_CLOCK_UNSPECIFIED);
466 
467   // Snapshot the boot clock *before* reading CPU stats so that
468   // two clocks are as close togher as possible (i.e. if it was the
469   // other way round, we'd skew by the const of string parsing).
470   ftrace_clock_snapshot_->boot_clock_ts = base::GetBootTimeNs().count();
471 
472   // A value of zero will cause this snapshot to be skipped.
473   ftrace_clock_snapshot_->ftrace_clock_ts =
474       ReadFtraceNowTs(cpu_zero_stats_fd_).value_or(0);
475 }
476 
477 FtraceController::Observer::~Observer() = default;
478 
479 }  // namespace perfetto
480