1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/traced/probes/probes_producer.h"
18
19 #include <stdio.h>
20 #include <sys/stat.h>
21
22 #include <algorithm>
23 #include <queue>
24 #include <string>
25
26 #include "perfetto/base/logging.h"
27 #include "perfetto/base/utils.h"
28 #include "perfetto/base/weak_ptr.h"
29 #include "perfetto/traced/traced.h"
30 #include "perfetto/tracing/core/data_source_config.h"
31 #include "perfetto/tracing/core/data_source_descriptor.h"
32 #include "perfetto/tracing/core/ftrace_config.h"
33 #include "perfetto/tracing/core/trace_config.h"
34 #include "perfetto/tracing/core/trace_packet.h"
35 #include "perfetto/tracing/ipc/producer_ipc_client.h"
36 #include "src/traced/probes/android_log/android_log_data_source.h"
37 #include "src/traced/probes/filesystem/inode_file_data_source.h"
38 #include "src/traced/probes/ftrace/ftrace_data_source.h"
39 #include "src/traced/probes/packages_list/packages_list_data_source.h"
40 #include "src/traced/probes/power/android_power_data_source.h"
41 #include "src/traced/probes/probes_data_source.h"
42 #include "src/traced/probes/ps/process_stats_data_source.h"
43 #include "src/traced/probes/sys_stats/sys_stats_data_source.h"
44
45 #include "perfetto/trace/filesystem/inode_file_map.pbzero.h"
46 #include "perfetto/trace/ftrace/ftrace_event_bundle.pbzero.h"
47 #include "perfetto/trace/ftrace/ftrace_stats.pbzero.h"
48 #include "perfetto/trace/trace_packet.pbzero.h"
49
50 namespace perfetto {
51 namespace {
52
53 constexpr uint32_t kInitialConnectionBackoffMs = 100;
54 constexpr uint32_t kMaxConnectionBackoffMs = 30 * 1000;
55
56 // Should be larger than FtraceController::kControllerFlushTimeoutMs.
57 constexpr uint32_t kFlushTimeoutMs = 1000;
58
59 constexpr char kFtraceSourceName[] = "linux.ftrace";
60 constexpr char kProcessStatsSourceName[] = "linux.process_stats";
61 constexpr char kInodeMapSourceName[] = "linux.inode_file_map";
62 constexpr char kSysStatsSourceName[] = "linux.sys_stats";
63 constexpr char kAndroidPowerSourceName[] = "android.power";
64 constexpr char kAndroidLogSourceName[] = "android.log";
65 constexpr char kPackagesListSourceName[] = "android.packages_list";
66
67 } // namespace.
68
69 // State transition diagram:
70 // +----------------------------+
71 // v +
72 // NotStarted -> NotConnected -> Connecting -> Connected
73 // ^ +
74 // +--------------+
75 //
76
ProbesProducer()77 ProbesProducer::ProbesProducer() : weak_factory_(this) {}
~ProbesProducer()78 ProbesProducer::~ProbesProducer() {
79 // The ftrace data sources must be deleted before the ftrace controller.
80 data_sources_.clear();
81 ftrace_.reset();
82 }
83
OnConnect()84 void ProbesProducer::OnConnect() {
85 PERFETTO_DCHECK(state_ == kConnecting);
86 state_ = kConnected;
87 ResetConnectionBackoff();
88 PERFETTO_LOG("Connected to the service");
89
90 {
91 DataSourceDescriptor desc;
92 desc.set_name(kFtraceSourceName);
93 endpoint_->RegisterDataSource(desc);
94 }
95
96 {
97 DataSourceDescriptor desc;
98 desc.set_name(kProcessStatsSourceName);
99 desc.set_handles_incremental_state_clear(true);
100 endpoint_->RegisterDataSource(desc);
101 }
102
103 {
104 DataSourceDescriptor desc;
105 desc.set_name(kInodeMapSourceName);
106 endpoint_->RegisterDataSource(desc);
107 }
108
109 {
110 DataSourceDescriptor desc;
111 desc.set_name(kSysStatsSourceName);
112 endpoint_->RegisterDataSource(desc);
113 }
114
115 {
116 DataSourceDescriptor desc;
117 desc.set_name(kAndroidPowerSourceName);
118 endpoint_->RegisterDataSource(desc);
119 }
120
121 {
122 DataSourceDescriptor desc;
123 desc.set_name(kAndroidLogSourceName);
124 endpoint_->RegisterDataSource(desc);
125 }
126
127 {
128 DataSourceDescriptor desc;
129 desc.set_name(kPackagesListSourceName);
130 endpoint_->RegisterDataSource(desc);
131 }
132 }
133
OnDisconnect()134 void ProbesProducer::OnDisconnect() {
135 PERFETTO_DCHECK(state_ == kConnected || state_ == kConnecting);
136 PERFETTO_LOG("Disconnected from tracing service");
137 if (state_ == kConnected)
138 return task_runner_->PostTask([this] { this->Restart(); });
139
140 state_ = kNotConnected;
141 IncreaseConnectionBackoff();
142 task_runner_->PostDelayedTask([this] { this->Connect(); },
143 connection_backoff_ms_);
144 }
145
Restart()146 void ProbesProducer::Restart() {
147 // We lost the connection with the tracing service. At this point we need
148 // to reset all the data sources. Trying to handle that manually is going to
149 // be error prone. What we do here is simply desroying the instance and
150 // recreating it again.
151 // TODO(hjd): Add e2e test for this.
152
153 base::TaskRunner* task_runner = task_runner_;
154 const char* socket_name = socket_name_;
155
156 // Invoke destructor and then the constructor again.
157 this->~ProbesProducer();
158 new (this) ProbesProducer();
159
160 ConnectWithRetries(socket_name, task_runner);
161 }
162
SetupDataSource(DataSourceInstanceID instance_id,const DataSourceConfig & config)163 void ProbesProducer::SetupDataSource(DataSourceInstanceID instance_id,
164 const DataSourceConfig& config) {
165 PERFETTO_DLOG("SetupDataSource(id=%" PRIu64 ", name=%s)", instance_id,
166 config.name().c_str());
167 PERFETTO_DCHECK(data_sources_.count(instance_id) == 0);
168 TracingSessionID session_id = config.tracing_session_id();
169 PERFETTO_CHECK(session_id > 0);
170
171 std::unique_ptr<ProbesDataSource> data_source;
172 if (config.name() == kFtraceSourceName) {
173 data_source = CreateFtraceDataSource(session_id, config);
174 } else if (config.name() == kInodeMapSourceName) {
175 data_source = CreateInodeFileDataSource(session_id, config);
176 } else if (config.name() == kProcessStatsSourceName) {
177 data_source = CreateProcessStatsDataSource(session_id, config);
178 } else if (config.name() == kSysStatsSourceName) {
179 data_source = CreateSysStatsDataSource(session_id, config);
180 } else if (config.name() == kAndroidPowerSourceName) {
181 data_source = CreateAndroidPowerDataSource(session_id, config);
182 } else if (config.name() == kAndroidLogSourceName) {
183 data_source = CreateAndroidLogDataSource(session_id, config);
184 } else if (config.name() == kPackagesListSourceName) {
185 data_source = CreatePackagesListDataSource(session_id, config);
186 }
187
188 if (!data_source) {
189 PERFETTO_ELOG("Failed to create data source '%s'", config.name().c_str());
190 return;
191 }
192
193 session_data_sources_.emplace(session_id, data_source.get());
194 data_sources_[instance_id] = std::move(data_source);
195 }
196
StartDataSource(DataSourceInstanceID instance_id,const DataSourceConfig & config)197 void ProbesProducer::StartDataSource(DataSourceInstanceID instance_id,
198 const DataSourceConfig& config) {
199 PERFETTO_DLOG("StartDataSource(id=%" PRIu64 ", name=%s)", instance_id,
200 config.name().c_str());
201 auto it = data_sources_.find(instance_id);
202 if (it == data_sources_.end()) {
203 // Can happen if SetupDataSource() failed (e.g. ftrace was busy).
204 PERFETTO_ELOG("Data source id=%" PRIu64 " not found", instance_id);
205 return;
206 }
207 ProbesDataSource* data_source = it->second.get();
208 if (data_source->started)
209 return;
210 if (config.trace_duration_ms() != 0) {
211 uint32_t timeout = 5000 + 2 * config.trace_duration_ms();
212 watchdogs_.emplace(
213 instance_id, base::Watchdog::GetInstance()->CreateFatalTimer(timeout));
214 }
215 data_source->started = true;
216 data_source->Start();
217 }
218
CreateFtraceDataSource(TracingSessionID session_id,const DataSourceConfig & config)219 std::unique_ptr<ProbesDataSource> ProbesProducer::CreateFtraceDataSource(
220 TracingSessionID session_id,
221 const DataSourceConfig& config) {
222 // Don't retry if FtraceController::Create() failed once.
223 // This can legitimately happen on user builds where we cannot access the
224 // debug paths, e.g., because of SELinux rules.
225 if (ftrace_creation_failed_)
226 return nullptr;
227
228 // Lazily create on the first instance.
229 if (!ftrace_) {
230 ftrace_ = FtraceController::Create(task_runner_, this);
231
232 if (!ftrace_) {
233 PERFETTO_ELOG("Failed to create FtraceController");
234 ftrace_creation_failed_ = true;
235 return nullptr;
236 }
237
238 ftrace_->DisableAllEvents();
239 ftrace_->ClearTrace();
240 }
241
242 PERFETTO_LOG("Ftrace setup (target_buf=%" PRIu32 ")", config.target_buffer());
243 const BufferID buffer_id = static_cast<BufferID>(config.target_buffer());
244 std::unique_ptr<FtraceDataSource> data_source(new FtraceDataSource(
245 ftrace_->GetWeakPtr(), session_id, config.ftrace_config(),
246 endpoint_->CreateTraceWriter(buffer_id)));
247 if (!ftrace_->AddDataSource(data_source.get())) {
248 PERFETTO_ELOG(
249 "Failed to setup tracing (too many concurrent sessions or ftrace is "
250 "already in use)");
251 return nullptr;
252 }
253 return std::move(data_source);
254 }
255
CreateInodeFileDataSource(TracingSessionID session_id,DataSourceConfig source_config)256 std::unique_ptr<ProbesDataSource> ProbesProducer::CreateInodeFileDataSource(
257 TracingSessionID session_id,
258 DataSourceConfig source_config) {
259 PERFETTO_LOG("Inode file map setup (target_buf=%" PRIu32 ")",
260 source_config.target_buffer());
261 auto buffer_id = static_cast<BufferID>(source_config.target_buffer());
262 if (system_inodes_.empty())
263 CreateStaticDeviceToInodeMap("/system", &system_inodes_);
264 return std::unique_ptr<InodeFileDataSource>(new InodeFileDataSource(
265 std::move(source_config), task_runner_, session_id, &system_inodes_,
266 &cache_, endpoint_->CreateTraceWriter(buffer_id)));
267 }
268
CreateProcessStatsDataSource(TracingSessionID session_id,const DataSourceConfig & config)269 std::unique_ptr<ProbesDataSource> ProbesProducer::CreateProcessStatsDataSource(
270 TracingSessionID session_id,
271 const DataSourceConfig& config) {
272 auto buffer_id = static_cast<BufferID>(config.target_buffer());
273 return std::unique_ptr<ProcessStatsDataSource>(new ProcessStatsDataSource(
274 task_runner_, session_id, endpoint_->CreateTraceWriter(buffer_id),
275 config));
276 }
277
CreateAndroidPowerDataSource(TracingSessionID session_id,const DataSourceConfig & config)278 std::unique_ptr<ProbesDataSource> ProbesProducer::CreateAndroidPowerDataSource(
279 TracingSessionID session_id,
280 const DataSourceConfig& config) {
281 auto buffer_id = static_cast<BufferID>(config.target_buffer());
282 return std::unique_ptr<ProbesDataSource>(
283 new AndroidPowerDataSource(config, task_runner_, session_id,
284 endpoint_->CreateTraceWriter(buffer_id)));
285 }
286
CreateAndroidLogDataSource(TracingSessionID session_id,const DataSourceConfig & config)287 std::unique_ptr<ProbesDataSource> ProbesProducer::CreateAndroidLogDataSource(
288 TracingSessionID session_id,
289 const DataSourceConfig& config) {
290 auto buffer_id = static_cast<BufferID>(config.target_buffer());
291 return std::unique_ptr<ProbesDataSource>(
292 new AndroidLogDataSource(config, task_runner_, session_id,
293 endpoint_->CreateTraceWriter(buffer_id)));
294 }
295
CreatePackagesListDataSource(TracingSessionID session_id,const DataSourceConfig & config)296 std::unique_ptr<ProbesDataSource> ProbesProducer::CreatePackagesListDataSource(
297 TracingSessionID session_id,
298 const DataSourceConfig& config) {
299 auto buffer_id = static_cast<BufferID>(config.target_buffer());
300 return std::unique_ptr<ProbesDataSource>(new PackagesListDataSource(
301 config, session_id, endpoint_->CreateTraceWriter(buffer_id)));
302 }
303
CreateSysStatsDataSource(TracingSessionID session_id,const DataSourceConfig & config)304 std::unique_ptr<ProbesDataSource> ProbesProducer::CreateSysStatsDataSource(
305 TracingSessionID session_id,
306 const DataSourceConfig& config) {
307 auto buffer_id = static_cast<BufferID>(config.target_buffer());
308 return std::unique_ptr<SysStatsDataSource>(
309 new SysStatsDataSource(task_runner_, session_id,
310 endpoint_->CreateTraceWriter(buffer_id), config));
311 }
312
StopDataSource(DataSourceInstanceID id)313 void ProbesProducer::StopDataSource(DataSourceInstanceID id) {
314 PERFETTO_LOG("Producer stop (id=%" PRIu64 ")", id);
315 auto it = data_sources_.find(id);
316 if (it == data_sources_.end()) {
317 // Can happen if SetupDataSource() failed (e.g. ftrace was busy).
318 PERFETTO_ELOG("Cannot stop data source id=%" PRIu64 ", not found", id);
319 return;
320 }
321 ProbesDataSource* data_source = it->second.get();
322 TracingSessionID session_id = data_source->tracing_session_id;
323 auto range = session_data_sources_.equal_range(session_id);
324 for (auto kv = range.first; kv != range.second; kv++) {
325 if (kv->second != data_source)
326 continue;
327 session_data_sources_.erase(kv);
328 break;
329 }
330 data_sources_.erase(it);
331 watchdogs_.erase(id);
332 }
333
OnTracingSetup()334 void ProbesProducer::OnTracingSetup() {}
335
Flush(FlushRequestID flush_request_id,const DataSourceInstanceID * data_source_ids,size_t num_data_sources)336 void ProbesProducer::Flush(FlushRequestID flush_request_id,
337 const DataSourceInstanceID* data_source_ids,
338 size_t num_data_sources) {
339 PERFETTO_DCHECK(flush_request_id);
340 auto weak_this = weak_factory_.GetWeakPtr();
341
342 // Issue a Flush() to all started data sources.
343 bool flush_queued = false;
344 for (size_t i = 0; i < num_data_sources; i++) {
345 DataSourceInstanceID ds_id = data_source_ids[i];
346 auto it = data_sources_.find(ds_id);
347 if (it == data_sources_.end() || !it->second->started)
348 continue;
349 pending_flushes_.emplace(flush_request_id, ds_id);
350 flush_queued = true;
351 auto flush_callback = [weak_this, flush_request_id, ds_id] {
352 if (weak_this)
353 weak_this->OnDataSourceFlushComplete(flush_request_id, ds_id);
354 };
355 it->second->Flush(flush_request_id, flush_callback);
356 }
357
358 // If there is nothing to flush, ack immediately.
359 if (!flush_queued) {
360 endpoint_->NotifyFlushComplete(flush_request_id);
361 return;
362 }
363
364 // Otherwise, post the timeout task.
365 task_runner_->PostDelayedTask(
366 [weak_this, flush_request_id] {
367 if (weak_this)
368 weak_this->OnFlushTimeout(flush_request_id);
369 },
370 kFlushTimeoutMs);
371 }
372
OnDataSourceFlushComplete(FlushRequestID flush_request_id,DataSourceInstanceID ds_id)373 void ProbesProducer::OnDataSourceFlushComplete(FlushRequestID flush_request_id,
374 DataSourceInstanceID ds_id) {
375 PERFETTO_DLOG("Flush %" PRIu64 " acked by data source %" PRIu64,
376 flush_request_id, ds_id);
377 auto range = pending_flushes_.equal_range(flush_request_id);
378 for (auto it = range.first; it != range.second; it++) {
379 if (it->second == ds_id) {
380 pending_flushes_.erase(it);
381 break;
382 }
383 }
384
385 if (pending_flushes_.count(flush_request_id))
386 return; // Still waiting for other data sources to ack.
387
388 PERFETTO_DLOG("All data sources acked to flush %" PRIu64, flush_request_id);
389 endpoint_->NotifyFlushComplete(flush_request_id);
390 }
391
OnFlushTimeout(FlushRequestID flush_request_id)392 void ProbesProducer::OnFlushTimeout(FlushRequestID flush_request_id) {
393 if (pending_flushes_.count(flush_request_id) == 0)
394 return; // All acked.
395 PERFETTO_ELOG("Flush(%" PRIu64 ") timed out", flush_request_id);
396 pending_flushes_.erase(flush_request_id);
397 endpoint_->NotifyFlushComplete(flush_request_id);
398 }
399
ClearIncrementalState(const DataSourceInstanceID * data_source_ids,size_t num_data_sources)400 void ProbesProducer::ClearIncrementalState(
401 const DataSourceInstanceID* data_source_ids,
402 size_t num_data_sources) {
403 for (size_t i = 0; i < num_data_sources; i++) {
404 DataSourceInstanceID ds_id = data_source_ids[i];
405 auto it = data_sources_.find(ds_id);
406 if (it == data_sources_.end() || !it->second->started)
407 continue;
408
409 it->second->ClearIncrementalState();
410 }
411 }
412
413 // This function is called by the FtraceController in batches, whenever it has
414 // read one or more pages from one or more cpus and written that into the
415 // userspace tracing buffer. If more than one ftrace data sources are active,
416 // this call typically happens after writing for all session has been handled.
OnFtraceDataWrittenIntoDataSourceBuffers()417 void ProbesProducer::OnFtraceDataWrittenIntoDataSourceBuffers() {
418 TracingSessionID last_session_id = 0;
419 FtraceMetadata* metadata = nullptr;
420 InodeFileDataSource* inode_data_source = nullptr;
421 ProcessStatsDataSource* ps_data_source = nullptr;
422
423 // unordered_multimap guarantees that entries with the same key are contiguous
424 // in the iteration.
425 for (auto it = session_data_sources_.begin(); /* check below*/; it++) {
426 // If this is the last iteration or the session id has changed,
427 // dispatch the metadata update to the linked data sources, if any.
428 if (it == session_data_sources_.end() || it->first != last_session_id) {
429 bool has_inodes = metadata && !metadata->inode_and_device.empty();
430 bool has_pids = metadata && !metadata->pids.empty();
431 bool has_rename_pids = metadata && !metadata->rename_pids.empty();
432 if (has_inodes && inode_data_source)
433 inode_data_source->OnInodes(metadata->inode_and_device);
434 // Ordering the rename pids before the seen pids is important so that any
435 // renamed processes get scraped in the OnPids call.
436 if (has_rename_pids && ps_data_source)
437 ps_data_source->OnRenamePids(metadata->rename_pids);
438 if (has_pids && ps_data_source)
439 ps_data_source->OnPids(metadata->pids);
440 if (metadata)
441 metadata->Clear();
442 metadata = nullptr;
443 inode_data_source = nullptr;
444 ps_data_source = nullptr;
445 if (it == session_data_sources_.end())
446 break;
447 last_session_id = it->first;
448 }
449 ProbesDataSource* ds = it->second;
450 if (!ds->started)
451 continue;
452 switch (ds->type_id) {
453 case FtraceDataSource::kTypeId:
454 metadata = static_cast<FtraceDataSource*>(ds)->mutable_metadata();
455 break;
456 case InodeFileDataSource::kTypeId:
457 inode_data_source = static_cast<InodeFileDataSource*>(ds);
458 break;
459 case ProcessStatsDataSource::kTypeId: {
460 // A trace session might have declared more than one ps data source.
461 // In those cases we often use one for a full dump on startup (
462 // targeting a dedicated buffer) and another one for on-demand dumps
463 // targeting the main buffer.
464 // Only use the one that has on-demand dumps enabled, if any.
465 auto ps = static_cast<ProcessStatsDataSource*>(ds);
466 if (ps->on_demand_dumps_enabled())
467 ps_data_source = ps;
468 break;
469 }
470 case SysStatsDataSource::kTypeId:
471 case AndroidLogDataSource::kTypeId:
472 case PackagesListDataSource::kTypeId:
473 break;
474 default:
475 PERFETTO_DFATAL("Invalid data source.");
476 } // switch (type_id)
477 } // for (session_data_sources_)
478 }
479
ConnectWithRetries(const char * socket_name,base::TaskRunner * task_runner)480 void ProbesProducer::ConnectWithRetries(const char* socket_name,
481 base::TaskRunner* task_runner) {
482 PERFETTO_DCHECK(state_ == kNotStarted);
483 state_ = kNotConnected;
484
485 ResetConnectionBackoff();
486 socket_name_ = socket_name;
487 task_runner_ = task_runner;
488 Connect();
489 }
490
Connect()491 void ProbesProducer::Connect() {
492 PERFETTO_DCHECK(state_ == kNotConnected);
493 state_ = kConnecting;
494 endpoint_ = ProducerIPCClient::Connect(
495 socket_name_, this, "perfetto.traced_probes", task_runner_);
496 }
497
IncreaseConnectionBackoff()498 void ProbesProducer::IncreaseConnectionBackoff() {
499 connection_backoff_ms_ *= 2;
500 if (connection_backoff_ms_ > kMaxConnectionBackoffMs)
501 connection_backoff_ms_ = kMaxConnectionBackoffMs;
502 }
503
ResetConnectionBackoff()504 void ProbesProducer::ResetConnectionBackoff() {
505 connection_backoff_ms_ = kInitialConnectionBackoffMs;
506 }
507
508 } // namespace perfetto
509