1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <inttypes.h>
18 #include <libgen.h>
19 #include <signal.h>
20 #include <sys/prctl.h>
21 #include <sys/utsname.h>
22 #include <time.h>
23 #include <unistd.h>
24 #include <set>
25 #include <string>
26 #include <unordered_map>
27 #include <vector>
28
29 #include <android-base/logging.h>
30 #include <android-base/file.h>
31 #include <android-base/parsedouble.h>
32 #include <android-base/parseint.h>
33 #include <android-base/strings.h>
34 #include <android-base/test_utils.h>
35 #if defined(__ANDROID__)
36 #include <android-base/properties.h>
37 #endif
38
39 #include "CallChainJoiner.h"
40 #include "command.h"
41 #include "environment.h"
42 #include "event_selection_set.h"
43 #include "event_type.h"
44 #include "IOEventLoop.h"
45 #include "OfflineUnwinder.h"
46 #include "perf_clock.h"
47 #include "read_apk.h"
48 #include "read_elf.h"
49 #include "record.h"
50 #include "record_file.h"
51 #include "thread_tree.h"
52 #include "tracing.h"
53 #include "utils.h"
54 #include "workload.h"
55
56 using namespace simpleperf;
57
58 static std::string default_measured_event_type = "cpu-cycles";
59
60 static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = {
61 {"u", PERF_SAMPLE_BRANCH_USER},
62 {"k", PERF_SAMPLE_BRANCH_KERNEL},
63 {"any", PERF_SAMPLE_BRANCH_ANY},
64 {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL},
65 {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN},
66 {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL},
67 };
68
69 static std::unordered_map<std::string, int> clockid_map = {
70 {"realtime", CLOCK_REALTIME},
71 {"monotonic", CLOCK_MONOTONIC},
72 {"monotonic_raw", CLOCK_MONOTONIC_RAW},
73 {"boottime", CLOCK_BOOTTIME},
74 };
75
76 // The max size of records dumped by kernel is 65535, and dump stack size
77 // should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528.
78 constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;
79
80 // The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK).
81 // Here 1024 is a desired value for pages in mapped buffer. If mapped
82 // successfully, the buffer size = 1024 * 4K (page size) = 4M.
83 constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;
84
85 // Cache size used by CallChainJoiner to cache call chains in memory.
86 constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * 1024 * 1024;
87
88 class RecordCommand : public Command {
89 public:
RecordCommand()90 RecordCommand()
91 : Command(
92 "record", "record sampling info in perf.data",
93 // clang-format off
94 "Usage: simpleperf record [options] [--] [command [command-args]]\n"
95 " Gather sampling information of running [command]. And -a/-p/-t option\n"
96 " can be used to change target of sampling information.\n"
97 " The default options are: -e cpu-cycles -f 4000 -o perf.data.\n"
98 "Select monitored threads:\n"
99 "-a System-wide collection.\n"
100 #if defined(__ANDROID__)
101 "--app package_name Profile the process of an Android application.\n"
102 " On non-rooted devices, the app must be debuggable,\n"
103 " because we use run-as to switch to the app's context.\n"
104 #endif
105 "-p pid1,pid2,... Record events on existing processes. Mutually exclusive\n"
106 " with -a.\n"
107 "-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n"
108 "\n"
109 "Select monitored event types:\n"
110 "-e event1[:modifier1],event2[:modifier2],...\n"
111 " Select the event list to sample. Use `simpleperf list` to find\n"
112 " all possible event names. Modifiers can be added to define how\n"
113 " the event should be monitored.\n"
114 " Possible modifiers are:\n"
115 " u - monitor user space events only\n"
116 " k - monitor kernel space events only\n"
117 "--group event1[:modifier],event2[:modifier2],...\n"
118 " Similar to -e option. But events specified in the same --group\n"
119 " option are monitored as a group, and scheduled in and out at the\n"
120 " same time.\n"
121 "--trace-offcpu Generate samples when threads are scheduled off cpu.\n"
122 " Similar to \"-c 1 -e sched:sched_switch\".\n"
123 "\n"
124 "Select monitoring options:\n"
125 "-f freq Set event sample frequency. It means recording at most [freq]\n"
126 " samples every second. For non-tracepoint events, the default\n"
127 " option is -f 4000. A -f/-c option affects all event types\n"
128 " following it until meeting another -f/-c option. For example,\n"
129 " for \"-f 1000 cpu-cycles -c 1 -e sched:sched_switch\", cpu-cycles\n"
130 " has sample freq 1000, sched:sched_switch event has sample period 1.\n"
131 "-c count Set event sample period. It means recording one sample when\n"
132 " [count] events happen. For tracepoint events, the default option\n"
133 " is -c 1.\n"
134 "--call-graph fp | dwarf[,<dump_stack_size>]\n"
135 " Enable call graph recording. Use frame pointer or dwarf debug\n"
136 " frame as the method to parse call graph in stack.\n"
137 " Default is dwarf,65528.\n"
138 "-g Same as '--call-graph dwarf'.\n"
139 "--clockid clock_id Generate timestamps of samples using selected clock.\n"
140 " Possible values are: realtime, monotonic,\n"
141 " monotonic_raw, boottime, perf. Default is perf.\n"
142 "--cpu cpu_item1,cpu_item2,...\n"
143 " Collect samples only on the selected cpus. cpu_item can be cpu\n"
144 " number like 1, or cpu range like 0-3.\n"
145 "--duration time_in_sec Monitor for time_in_sec seconds instead of running\n"
146 " [command]. Here time_in_sec may be any positive\n"
147 " floating point number.\n"
148 "-j branch_filter1,branch_filter2,...\n"
149 " Enable taken branch stack sampling. Each sample captures a series\n"
150 " of consecutive taken branches.\n"
151 " The following filters are defined:\n"
152 " any: any type of branch\n"
153 " any_call: any function call or system call\n"
154 " any_ret: any function return or system call return\n"
155 " ind_call: any indirect branch\n"
156 " u: only when the branch target is at the user level\n"
157 " k: only when the branch target is in the kernel\n"
158 " This option requires at least one branch type among any, any_call,\n"
159 " any_ret, ind_call.\n"
160 "-b Enable taken branch stack sampling. Same as '-j any'.\n"
161 "-m mmap_pages Set the size of the buffer used to receiving sample data from\n"
162 " the kernel. It should be a power of 2. If not set, the max\n"
163 " possible value <= 1024 will be used.\n"
164 "--no-inherit Don't record created child threads/processes.\n"
165 "\n"
166 "Dwarf unwinding options:\n"
167 "--no-post-unwind If `--call-graph dwarf` option is used, then the user's stack\n"
168 " will be recorded in perf.data and unwound after recording.\n"
169 " However, this takes a lot of disk space. Use this option to\n"
170 " unwind while recording.\n"
171 "--no-unwind If `--call-graph dwarf` option is used, then the user's stack\n"
172 " will be unwound by default. Use this option to disable the\n"
173 " unwinding of the user's stack.\n"
174 "--no-callchain-joiner If `--call-graph dwarf` option is used, then by default\n"
175 " callchain joiner is used to break the 64k stack limit\n"
176 " and build more complete call graphs. However, the built\n"
177 " call graphs may not be correct in all cases.\n"
178 "--callchain-joiner-min-matching-nodes count\n"
179 " When callchain joiner is used, set the matched nodes needed to join\n"
180 " callchains. The count should be >= 1. By default it is 1.\n"
181 "\n"
182 "Recording file options:\n"
183 "--no-dump-kernel-symbols Don't dump kernel symbols in perf.data. By default\n"
184 " kernel symbols will be dumped when needed.\n"
185 "--no-dump-symbols Don't dump symbols in perf.data. By default symbols are\n"
186 " dumped in perf.data, to support reporting in another\n"
187 " environment.\n"
188 "-o record_file_name Set record file name, default is perf.data.\n"
189 "--exit-with-parent Stop recording when the process starting\n"
190 " simpleperf dies.\n"
191 "--start_profiling_fd fd_no After starting profiling, write \"STARTED\" to\n"
192 " <fd_no>, then close <fd_no>.\n"
193 "--symfs <dir> Look for files with symbols relative to this directory.\n"
194 " This option is used to provide files with symbol table and\n"
195 " debug information, which are used for unwinding and dumping symbols.\n"
196 #if 0
197 // Below options are only used internally and shouldn't be visible to the public.
198 "--in-app We are already running in the app's context.\n"
199 "--tracepoint-events file_name Read tracepoint events from [file_name] instead of tracefs.\n"
200 #endif
201 // clang-format on
202 ),
203 system_wide_collection_(false),
204 branch_sampling_(0),
205 fp_callchain_sampling_(false),
206 dwarf_callchain_sampling_(false),
207 dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE),
208 unwind_dwarf_callchain_(true),
209 post_unwind_(true),
210 child_inherit_(true),
211 duration_in_sec_(0),
212 can_dump_kernel_symbols_(true),
213 dump_symbols_(true),
214 clockid_("perf"),
215 event_selection_set_(false),
216 mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)),
217 record_filename_("perf.data"),
218 start_sampling_time_in_ns_(0),
219 sample_record_count_(0),
220 lost_record_count_(0),
221 start_profiling_fd_(-1),
222 in_app_context_(false),
223 trace_offcpu_(false),
224 exclude_kernel_callchain_(false),
225 allow_callchain_joiner_(true),
226 callchain_joiner_min_matching_nodes_(1u) {
227 // If we run `adb shell simpleperf record xxx` and stop profiling by ctrl-c, adb closes
228 // sockets connecting simpleperf. After that, simpleperf will receive SIGPIPE when writing
229 // to stdout/stderr, which is a problem when we use '--app' option. So ignore SIGPIPE to
230 // finish properly.
231 signal(SIGPIPE, SIG_IGN);
232 app_package_name_ = GetDefaultAppPackageName();
233 }
234
235 bool Run(const std::vector<std::string>& args);
236
237 private:
238 bool ParseOptions(const std::vector<std::string>& args,
239 std::vector<std::string>* non_option_args);
240 bool PrepareRecording(Workload* workload);
241 bool DoRecording(Workload* workload);
242 bool PostProcessRecording(const std::vector<std::string>& args);
243 bool TraceOffCpu();
244 bool SetEventSelectionFlags();
245 bool CreateAndInitRecordFile();
246 std::unique_ptr<RecordFileWriter> CreateRecordFile(
247 const std::string& filename);
248 bool DumpKernelSymbol();
249 bool DumpTracingData();
250 bool DumpKernelAndModuleMmaps(const perf_event_attr& attr, uint64_t event_id);
251 bool DumpThreadCommAndMmaps(const perf_event_attr& attr, uint64_t event_id);
252 bool ProcessRecord(Record* record);
253 bool SaveRecordForPostUnwinding(Record* record);
254 bool SaveRecordAfterUnwinding(Record* record);
255 bool SaveRecordWithoutUnwinding(Record* record);
256
257 void UpdateRecordForEmbeddedElfPath(Record* record);
258 bool UnwindRecord(SampleRecord& r);
259 bool PostUnwindRecords();
260 bool JoinCallChains();
261 bool DumpAdditionalFeatures(const std::vector<std::string>& args);
262 bool DumpBuildIdFeature();
263 bool DumpFileFeature();
264 bool DumpMetaInfoFeature();
265 void CollectHitFileInfo(const SampleRecord& r);
266
267 std::unique_ptr<SampleSpeed> sample_speed_;
268 bool system_wide_collection_;
269 uint64_t branch_sampling_;
270 bool fp_callchain_sampling_;
271 bool dwarf_callchain_sampling_;
272 uint32_t dump_stack_size_in_dwarf_sampling_;
273 bool unwind_dwarf_callchain_;
274 bool post_unwind_;
275 std::unique_ptr<OfflineUnwinder> offline_unwinder_;
276 bool child_inherit_;
277 double duration_in_sec_;
278 bool can_dump_kernel_symbols_;
279 bool dump_symbols_;
280 std::string clockid_;
281 std::vector<int> cpus_;
282 EventSelectionSet event_selection_set_;
283
284 std::pair<size_t, size_t> mmap_page_range_;
285
286 ThreadTree thread_tree_;
287 std::string record_filename_;
288 std::unique_ptr<RecordFileWriter> record_file_writer_;
289
290 uint64_t start_sampling_time_in_ns_; // nanoseconds from machine starting
291
292 uint64_t sample_record_count_;
293 uint64_t lost_record_count_;
294 int start_profiling_fd_;
295 std::string app_package_name_;
296 bool in_app_context_;
297 bool trace_offcpu_;
298 bool exclude_kernel_callchain_;
299
300 // For CallChainJoiner
301 bool allow_callchain_joiner_;
302 size_t callchain_joiner_min_matching_nodes_;
303 std::unique_ptr<CallChainJoiner> callchain_joiner_;
304 };
305
Run(const std::vector<std::string> & args)306 bool RecordCommand::Run(const std::vector<std::string>& args) {
307 ScopedCurrentArch scoped_arch(GetMachineArch());
308 if (!CheckPerfEventLimit()) {
309 return false;
310 }
311 AllowMoreOpenedFiles();
312
313 std::vector<std::string> workload_args;
314 if (!ParseOptions(args, &workload_args)) {
315 return false;
316 }
317 ScopedTempFiles scoped_temp_files(android::base::Dirname(record_filename_));
318 if (!app_package_name_.empty() && !in_app_context_) {
319 // Some users want to profile non debuggable apps on rooted devices. If we use run-as,
320 // it will be impossible when using --app. So don't switch to app's context when we are
321 // root.
322 if (!IsRoot()) {
323 return RunInAppContext(app_package_name_, "record", args, workload_args.size(),
324 record_filename_, true);
325 }
326 }
327 std::unique_ptr<Workload> workload;
328 if (!workload_args.empty()) {
329 workload = Workload::CreateWorkload(workload_args);
330 if (workload == nullptr) {
331 return false;
332 }
333 }
334 if (!PrepareRecording(workload.get())) {
335 return false;
336 }
337 if (!DoRecording(workload.get())) {
338 return false;
339 }
340 return PostProcessRecording(args);
341 }
342
PrepareRecording(Workload * workload)343 bool RecordCommand::PrepareRecording(Workload* workload) {
344 // 1. Prepare in other modules.
345 if (!InitPerfClock()) {
346 return false;
347 }
348 PrepareVdsoFile();
349
350 // 2. Add default event type.
351 if (event_selection_set_.empty()) {
352 size_t group_id;
353 if (!event_selection_set_.AddEventType(default_measured_event_type, &group_id)) {
354 return false;
355 }
356 if (sample_speed_) {
357 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
358 }
359 }
360
361 // 3. Process options before opening perf event files.
362 exclude_kernel_callchain_ = event_selection_set_.ExcludeKernel();
363 if (trace_offcpu_ && !TraceOffCpu()) {
364 return false;
365 }
366 if (!SetEventSelectionFlags()) {
367 return false;
368 }
369 if (unwind_dwarf_callchain_) {
370 offline_unwinder_.reset(new OfflineUnwinder(false));
371 }
372 if (unwind_dwarf_callchain_ && allow_callchain_joiner_) {
373 callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE,
374 callchain_joiner_min_matching_nodes_,
375 false));
376 }
377
378 // 4. Add monitored targets.
379 bool need_to_check_targets = false;
380 if (system_wide_collection_) {
381 event_selection_set_.AddMonitoredThreads({-1});
382 } else if (!event_selection_set_.HasMonitoredTarget()) {
383 if (workload != nullptr) {
384 event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
385 event_selection_set_.SetEnableOnExec(true);
386 if (event_selection_set_.HasInplaceSampler()) {
387 // Start worker early, because the worker process has to setup inplace-sampler server
388 // before we try to connect it.
389 if (!workload->Start()) {
390 return false;
391 }
392 }
393 } else if (!app_package_name_.empty()) {
394 // If app process is not created, wait for it. This allows simpleperf starts before
395 // app process. In this way, we can have a better support of app start-up time profiling.
396 std::set<pid_t> pids = WaitForAppProcesses(app_package_name_);
397 event_selection_set_.AddMonitoredProcesses(pids);
398 need_to_check_targets = true;
399 } else {
400 LOG(ERROR)
401 << "No threads to monitor. Try `simpleperf help record` for help";
402 return false;
403 }
404 } else {
405 need_to_check_targets = true;
406 }
407
408 // 5. Open perf event files and create mapped buffers.
409 if (!event_selection_set_.OpenEventFiles(cpus_)) {
410 return false;
411 }
412 if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first,
413 mmap_page_range_.second)) {
414 return false;
415 }
416
417 // 6. Create perf.data.
418 if (!CreateAndInitRecordFile()) {
419 return false;
420 }
421
422 // 7. Add read/signal/periodic Events.
423 auto callback =
424 std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1);
425 if (!event_selection_set_.PrepareToReadMmapEventData(callback)) {
426 return false;
427 }
428 if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
429 return false;
430 }
431 IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
432 if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM},
433 [loop]() { return loop->ExitLoop(); })) {
434 return false;
435 }
436
437 // Only add an event for SIGHUP if we didn't inherit SIG_IGN (e.g. from nohup).
438 if (!SignalIsIgnored(SIGHUP)) {
439 if (!loop->AddSignalEvent(SIGHUP, [loop]() { return loop->ExitLoop(); })) {
440 return false;
441 }
442 }
443
444 if (duration_in_sec_ != 0) {
445 if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
446 [loop]() { return loop->ExitLoop(); })) {
447 return false;
448 }
449 }
450 return true;
451 }
452
DoRecording(Workload * workload)453 bool RecordCommand::DoRecording(Workload* workload) {
454 // Write records in mapped buffers of perf_event_files to output file while workload is running.
455 start_sampling_time_in_ns_ = GetPerfClock();
456 LOG(VERBOSE) << "start_sampling_time is " << start_sampling_time_in_ns_ << " ns";
457 if (workload != nullptr && !workload->IsStarted() && !workload->Start()) {
458 return false;
459 }
460 if (start_profiling_fd_ != -1) {
461 if (!android::base::WriteStringToFd("STARTED", start_profiling_fd_)) {
462 PLOG(ERROR) << "failed to write to start_profiling_fd_";
463 }
464 close(start_profiling_fd_);
465 }
466 if (!event_selection_set_.GetIOEventLoop()->RunLoop()) {
467 return false;
468 }
469 if (!event_selection_set_.FinishReadMmapEventData()) {
470 return false;
471 }
472 return true;
473 }
474
PostProcessRecording(const std::vector<std::string> & args)475 bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) {
476 // 1. Post unwind dwarf callchain.
477 if (unwind_dwarf_callchain_ && post_unwind_) {
478 if (!PostUnwindRecords()) {
479 return false;
480 }
481 }
482
483 // 2. Optionally join Callchains.
484 if (callchain_joiner_) {
485 JoinCallChains();
486 }
487
488 // 3. Dump additional features, and close record file.
489 if (!DumpAdditionalFeatures(args)) {
490 return false;
491 }
492 if (!record_file_writer_->Close()) {
493 return false;
494 }
495
496 // 4. Show brief record result.
497 LOG(INFO) << "Samples recorded: " << sample_record_count_
498 << ". Samples lost: " << lost_record_count_ << ".";
499 if (sample_record_count_ + lost_record_count_ != 0) {
500 double lost_percent = static_cast<double>(lost_record_count_) /
501 (lost_record_count_ + sample_record_count_);
502 constexpr double LOST_PERCENT_WARNING_BAR = 0.1;
503 if (lost_percent >= LOST_PERCENT_WARNING_BAR) {
504 LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, "
505 << "consider increasing mmap_pages(-m), "
506 << "or decreasing sample frequency(-f), "
507 << "or increasing sample period(-c).";
508 }
509 }
510 if (callchain_joiner_) {
511 callchain_joiner_->DumpStat();
512 }
513 return true;
514 }
515
ParseOptions(const std::vector<std::string> & args,std::vector<std::string> * non_option_args)516 bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
517 std::vector<std::string>* non_option_args) {
518 std::vector<size_t> wait_setting_speed_event_groups_;
519 size_t i;
520 for (i = 0; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) {
521 if (args[i] == "-a") {
522 system_wide_collection_ = true;
523 } else if (args[i] == "--app") {
524 if (!NextArgumentOrError(args, &i)) {
525 return false;
526 }
527 app_package_name_ = args[i];
528 } else if (args[i] == "-b") {
529 branch_sampling_ = branch_sampling_type_map["any"];
530 } else if (args[i] == "-c" || args[i] == "-f") {
531 if (!NextArgumentOrError(args, &i)) {
532 return false;
533 }
534 char* endptr;
535 uint64_t value = strtoull(args[i].c_str(), &endptr, 0);
536 if (*endptr != '\0' || value == 0) {
537 LOG(ERROR) << "Invalid option for " << args[i-1] << ": '" << args[i] << "'";
538 return false;
539 }
540 if (args[i-1] == "-c") {
541 sample_speed_.reset(new SampleSpeed(0, value));
542 } else {
543 sample_speed_.reset(new SampleSpeed(value, 0));
544 }
545 for (auto group_id : wait_setting_speed_event_groups_) {
546 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
547 }
548 wait_setting_speed_event_groups_.clear();
549
550 } else if (args[i] == "--call-graph") {
551 if (!NextArgumentOrError(args, &i)) {
552 return false;
553 }
554 std::vector<std::string> strs = android::base::Split(args[i], ",");
555 if (strs[0] == "fp") {
556 fp_callchain_sampling_ = true;
557 dwarf_callchain_sampling_ = false;
558 } else if (strs[0] == "dwarf") {
559 fp_callchain_sampling_ = false;
560 dwarf_callchain_sampling_ = true;
561 if (strs.size() > 1) {
562 char* endptr;
563 uint64_t size = strtoull(strs[1].c_str(), &endptr, 0);
564 if (*endptr != '\0' || size > UINT_MAX) {
565 LOG(ERROR) << "invalid dump stack size in --call-graph option: "
566 << strs[1];
567 return false;
568 }
569 if ((size & 7) != 0) {
570 LOG(ERROR) << "dump stack size " << size
571 << " is not 8-byte aligned.";
572 return false;
573 }
574 if (size >= MAX_DUMP_STACK_SIZE) {
575 LOG(ERROR) << "dump stack size " << size
576 << " is bigger than max allowed size "
577 << MAX_DUMP_STACK_SIZE << ".";
578 return false;
579 }
580 dump_stack_size_in_dwarf_sampling_ = static_cast<uint32_t>(size);
581 }
582 } else {
583 LOG(ERROR) << "unexpected argument for --call-graph option: "
584 << args[i];
585 return false;
586 }
587 } else if (args[i] == "--clockid") {
588 if (!NextArgumentOrError(args, &i)) {
589 return false;
590 }
591 if (args[i] != "perf") {
592 if (!IsSettingClockIdSupported()) {
593 LOG(ERROR) << "Setting clockid is not supported by the kernel.";
594 return false;
595 }
596 if (clockid_map.find(args[i]) == clockid_map.end()) {
597 LOG(ERROR) << "Invalid clockid: " << args[i];
598 return false;
599 }
600 }
601 clockid_ = args[i];
602 } else if (args[i] == "--cpu") {
603 if (!NextArgumentOrError(args, &i)) {
604 return false;
605 }
606 cpus_ = GetCpusFromString(args[i]);
607 } else if (args[i] == "--duration") {
608 if (!NextArgumentOrError(args, &i)) {
609 return false;
610 }
611 if (!android::base::ParseDouble(args[i].c_str(), &duration_in_sec_,
612 1e-9)) {
613 LOG(ERROR) << "Invalid duration: " << args[i].c_str();
614 return false;
615 }
616 } else if (args[i] == "-e") {
617 if (!NextArgumentOrError(args, &i)) {
618 return false;
619 }
620 std::vector<std::string> event_types = android::base::Split(args[i], ",");
621 for (auto& event_type : event_types) {
622 size_t group_id;
623 if (!event_selection_set_.AddEventType(event_type, &group_id)) {
624 return false;
625 }
626 if (sample_speed_) {
627 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
628 } else {
629 wait_setting_speed_event_groups_.push_back(group_id);
630 }
631 }
632 } else if (args[i] == "--exit-with-parent") {
633 prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
634 } else if (args[i] == "-g") {
635 fp_callchain_sampling_ = false;
636 dwarf_callchain_sampling_ = true;
637 } else if (args[i] == "--group") {
638 if (!NextArgumentOrError(args, &i)) {
639 return false;
640 }
641 std::vector<std::string> event_types = android::base::Split(args[i], ",");
642 size_t group_id;
643 if (!event_selection_set_.AddEventGroup(event_types, &group_id)) {
644 return false;
645 }
646 if (sample_speed_) {
647 event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
648 } else {
649 wait_setting_speed_event_groups_.push_back(group_id);
650 }
651 } else if (args[i] == "--in-app") {
652 in_app_context_ = true;
653 } else if (args[i] == "-j") {
654 if (!NextArgumentOrError(args, &i)) {
655 return false;
656 }
657 std::vector<std::string> branch_sampling_types =
658 android::base::Split(args[i], ",");
659 for (auto& type : branch_sampling_types) {
660 auto it = branch_sampling_type_map.find(type);
661 if (it == branch_sampling_type_map.end()) {
662 LOG(ERROR) << "unrecognized branch sampling filter: " << type;
663 return false;
664 }
665 branch_sampling_ |= it->second;
666 }
667 } else if (args[i] == "-m") {
668 if (!NextArgumentOrError(args, &i)) {
669 return false;
670 }
671 char* endptr;
672 uint64_t pages = strtoull(args[i].c_str(), &endptr, 0);
673 if (*endptr != '\0' || !IsPowerOfTwo(pages)) {
674 LOG(ERROR) << "Invalid mmap_pages: '" << args[i] << "'";
675 return false;
676 }
677 mmap_page_range_.first = mmap_page_range_.second = pages;
678 } else if (args[i] == "--no-dump-kernel-symbols") {
679 can_dump_kernel_symbols_ = false;
680 } else if (args[i] == "--no-dump-symbols") {
681 dump_symbols_ = false;
682 } else if (args[i] == "--no-inherit") {
683 child_inherit_ = false;
684 } else if (args[i] == "--no-unwind") {
685 unwind_dwarf_callchain_ = false;
686 } else if (args[i] == "--no-callchain-joiner") {
687 allow_callchain_joiner_ = false;
688 } else if (args[i] == "--callchain-joiner-min-matching-nodes") {
689 if (!NextArgumentOrError(args, &i)) {
690 return false;
691 }
692 if (!android::base::ParseUint(args[i].c_str(), &callchain_joiner_min_matching_nodes_) ||
693 callchain_joiner_min_matching_nodes_ < 1u) {
694 LOG(ERROR) << "unexpected argument for " << args[i - 1] << " option";
695 return false;
696 }
697 } else if (args[i] == "-o") {
698 if (!NextArgumentOrError(args, &i)) {
699 return false;
700 }
701 record_filename_ = args[i];
702 } else if (args[i] == "-p") {
703 if (!NextArgumentOrError(args, &i)) {
704 return false;
705 }
706 std::set<pid_t> pids;
707 if (!GetValidThreadsFromThreadString(args[i], &pids)) {
708 return false;
709 }
710 event_selection_set_.AddMonitoredProcesses(pids);
711 } else if (args[i] == "--no-post-unwind") {
712 post_unwind_ = false;
713 } else if (args[i] == "--start_profiling_fd") {
714 if (!NextArgumentOrError(args, &i)) {
715 return false;
716 }
717 if (!android::base::ParseInt(args[i].c_str(), &start_profiling_fd_, 0)) {
718 LOG(ERROR) << "Invalid start_profiling_fd: " << args[i];
719 return false;
720 }
721 } else if (args[i] == "--symfs") {
722 if (!NextArgumentOrError(args, &i)) {
723 return false;
724 }
725 if (!Dso::SetSymFsDir(args[i])) {
726 return false;
727 }
728 } else if (args[i] == "-t") {
729 if (!NextArgumentOrError(args, &i)) {
730 return false;
731 }
732 std::set<pid_t> tids;
733 if (!GetValidThreadsFromThreadString(args[i], &tids)) {
734 return false;
735 }
736 event_selection_set_.AddMonitoredThreads(tids);
737 } else if (args[i] == "--trace-offcpu") {
738 trace_offcpu_ = true;
739 } else if (args[i] == "--tracepoint-events") {
740 if (!NextArgumentOrError(args, &i)) {
741 return false;
742 }
743 if (!SetTracepointEventsFilePath(args[i])) {
744 return false;
745 }
746 } else if (args[i] == "--") {
747 i++;
748 break;
749 } else {
750 ReportUnknownOption(args, i);
751 return false;
752 }
753 }
754
755 if (!dwarf_callchain_sampling_) {
756 if (!unwind_dwarf_callchain_) {
757 LOG(ERROR)
758 << "--no-unwind is only used with `--call-graph dwarf` option.";
759 return false;
760 }
761 unwind_dwarf_callchain_ = false;
762 }
763 if (post_unwind_) {
764 if (!dwarf_callchain_sampling_ || !unwind_dwarf_callchain_) {
765 post_unwind_ = false;
766 }
767 } else {
768 if (!dwarf_callchain_sampling_) {
769 LOG(ERROR)
770 << "--no-post-unwind is only used with `--call-graph dwarf` option.";
771 return false;
772 }
773 if (!unwind_dwarf_callchain_) {
774 LOG(ERROR) << "--no-post-unwind can't be used with `--no-unwind` option.";
775 return false;
776 }
777 }
778
779 if (fp_callchain_sampling_) {
780 if (GetBuildArch() == ARCH_ARM) {
781 LOG(WARNING) << "`--callgraph fp` option doesn't work well on arm architecture, "
782 << "consider using `-g` option or profiling on aarch64 architecture.";
783 }
784 }
785
786 if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
787 LOG(ERROR) << "Record system wide and existing processes/threads can't be "
788 "used at the same time.";
789 return false;
790 }
791
792 if (system_wide_collection_ && !IsRoot()) {
793 LOG(ERROR) << "System wide profiling needs root privilege.";
794 return false;
795 }
796
797 if (dump_symbols_ && can_dump_kernel_symbols_) {
798 // No need to dump kernel symbols as we will dump all required symbols.
799 can_dump_kernel_symbols_ = false;
800 }
801
802 non_option_args->clear();
803 for (; i < args.size(); ++i) {
804 non_option_args->push_back(args[i]);
805 }
806 return true;
807 }
808
TraceOffCpu()809 bool RecordCommand::TraceOffCpu() {
810 if (FindEventTypeByName("sched:sched_switch") == nullptr) {
811 LOG(ERROR) << "Can't trace off cpu because sched:sched_switch event is not available";
812 return false;
813 }
814 for (auto& event_type : event_selection_set_.GetTracepointEvents()) {
815 if (event_type->name == "sched:sched_switch") {
816 LOG(ERROR) << "Trace offcpu can't be used together with sched:sched_switch event";
817 return false;
818 }
819 }
820 if (!IsDumpingRegsForTracepointEventsSupported()) {
821 LOG(ERROR) << "Dumping regs for tracepoint events is not supported by the kernel";
822 return false;
823 }
824 return event_selection_set_.AddEventType("sched:sched_switch");
825 }
826
SetEventSelectionFlags()827 bool RecordCommand::SetEventSelectionFlags() {
828 event_selection_set_.SampleIdAll();
829 if (!event_selection_set_.SetBranchSampling(branch_sampling_)) {
830 return false;
831 }
832 if (fp_callchain_sampling_) {
833 event_selection_set_.EnableFpCallChainSampling();
834 } else if (dwarf_callchain_sampling_) {
835 if (!event_selection_set_.EnableDwarfCallChainSampling(
836 dump_stack_size_in_dwarf_sampling_)) {
837 return false;
838 }
839 }
840 event_selection_set_.SetInherit(child_inherit_);
841 if (clockid_ != "perf") {
842 event_selection_set_.SetClockId(clockid_map[clockid_]);
843 }
844 return true;
845 }
846
CreateAndInitRecordFile()847 bool RecordCommand::CreateAndInitRecordFile() {
848 record_file_writer_ = CreateRecordFile(record_filename_);
849 if (record_file_writer_ == nullptr) {
850 return false;
851 }
852 // Use first perf_event_attr and first event id to dump mmap and comm records.
853 EventAttrWithId attr_id = event_selection_set_.GetEventAttrWithId()[0];
854 if (!DumpKernelSymbol()) {
855 return false;
856 }
857 if (!DumpTracingData()) {
858 return false;
859 }
860 if (!DumpKernelAndModuleMmaps(*attr_id.attr, attr_id.ids[0])) {
861 return false;
862 }
863 if (!DumpThreadCommAndMmaps(*attr_id.attr, attr_id.ids[0])) {
864 return false;
865 }
866 return true;
867 }
868
CreateRecordFile(const std::string & filename)869 std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile(
870 const std::string& filename) {
871 std::unique_ptr<RecordFileWriter> writer =
872 RecordFileWriter::CreateInstance(filename);
873 if (writer == nullptr) {
874 return nullptr;
875 }
876
877 if (!writer->WriteAttrSection(event_selection_set_.GetEventAttrWithId())) {
878 return nullptr;
879 }
880 return writer;
881 }
882
DumpKernelSymbol()883 bool RecordCommand::DumpKernelSymbol() {
884 if (can_dump_kernel_symbols_) {
885 std::string kallsyms;
886 if (event_selection_set_.NeedKernelSymbol() &&
887 CheckKernelSymbolAddresses()) {
888 if (!android::base::ReadFileToString("/proc/kallsyms", &kallsyms)) {
889 PLOG(ERROR) << "failed to read /proc/kallsyms";
890 return false;
891 }
892 KernelSymbolRecord r(kallsyms);
893 if (!ProcessRecord(&r)) {
894 return false;
895 }
896 }
897 }
898 return true;
899 }
900
DumpTracingData()901 bool RecordCommand::DumpTracingData() {
902 std::vector<const EventType*> tracepoint_event_types =
903 event_selection_set_.GetTracepointEvents();
904 if (tracepoint_event_types.empty() || !CanRecordRawData()) {
905 return true; // No need to dump tracing data, or can't do it.
906 }
907 std::vector<char> tracing_data;
908 if (!GetTracingData(tracepoint_event_types, &tracing_data)) {
909 return false;
910 }
911 TracingDataRecord record(tracing_data);
912 if (!ProcessRecord(&record)) {
913 return false;
914 }
915 return true;
916 }
917
DumpKernelAndModuleMmaps(const perf_event_attr & attr,uint64_t event_id)918 bool RecordCommand::DumpKernelAndModuleMmaps(const perf_event_attr& attr,
919 uint64_t event_id) {
920 KernelMmap kernel_mmap;
921 std::vector<KernelMmap> module_mmaps;
922 GetKernelAndModuleMmaps(&kernel_mmap, &module_mmaps);
923
924 MmapRecord mmap_record(attr, true, UINT_MAX, 0, kernel_mmap.start_addr,
925 kernel_mmap.len, 0, kernel_mmap.filepath, event_id);
926 if (!ProcessRecord(&mmap_record)) {
927 return false;
928 }
929 for (auto& module_mmap : module_mmaps) {
930 MmapRecord mmap_record(attr, true, UINT_MAX, 0, module_mmap.start_addr,
931 module_mmap.len, 0, module_mmap.filepath, event_id);
932 if (!ProcessRecord(&mmap_record)) {
933 return false;
934 }
935 }
936 return true;
937 }
938
DumpThreadCommAndMmaps(const perf_event_attr & attr,uint64_t event_id)939 bool RecordCommand::DumpThreadCommAndMmaps(const perf_event_attr& attr,
940 uint64_t event_id) {
941 // Decide which processes and threads to dump.
942 // For system_wide profiling, dump all threads.
943 // For non system wide profiling, build dump_threads.
944 bool all_threads = system_wide_collection_;
945 std::set<pid_t> dump_threads = event_selection_set_.GetMonitoredThreads();
946 for (const auto& pid : event_selection_set_.GetMonitoredProcesses()) {
947 std::vector<pid_t> tids = GetThreadsInProcess(pid);
948 dump_threads.insert(tids.begin(), tids.end());
949 }
950
951 // Collect processes to dump.
952 std::vector<pid_t> processes;
953 if (all_threads) {
954 processes = GetAllProcesses();
955 } else {
956 std::set<pid_t> process_set;
957 for (const auto& tid : dump_threads) {
958 pid_t pid;
959 if (!GetProcessForThread(tid, &pid)) {
960 continue;
961 }
962 process_set.insert(pid);
963 }
964 processes.insert(processes.end(), process_set.begin(), process_set.end());
965 }
966
967 // Dump each process and its threads.
968 for (auto& pid : processes) {
969 // Dump mmap records.
970 std::vector<ThreadMmap> thread_mmaps;
971 if (!GetThreadMmapsInProcess(pid, &thread_mmaps)) {
972 // The process may exit before we get its info.
973 continue;
974 }
975 for (const auto& map : thread_mmaps) {
976 if (map.executable == 0) {
977 continue; // No need to dump non-executable mmap info.
978 }
979 MmapRecord record(attr, false, pid, pid, map.start_addr, map.len,
980 map.pgoff, map.name, event_id);
981 if (!ProcessRecord(&record)) {
982 return false;
983 }
984 }
985 // Dump process name.
986 std::string name;
987 if (GetThreadName(pid, &name)) {
988 CommRecord record(attr, pid, pid, name, event_id, 0);
989 if (!ProcessRecord(&record)) {
990 return false;
991 }
992 }
993 // Dump thread info.
994 std::vector<pid_t> threads = GetThreadsInProcess(pid);
995 for (const auto& tid : threads) {
996 if (tid == pid) {
997 continue;
998 }
999 if (all_threads || dump_threads.find(tid) != dump_threads.end()) {
1000 ForkRecord fork_record(attr, pid, tid, pid, pid, event_id);
1001 if (!ProcessRecord(&fork_record)) {
1002 return false;
1003 }
1004 if (GetThreadName(tid, &name)) {
1005 CommRecord comm_record(attr, pid, tid, name, event_id, 0);
1006 if (!ProcessRecord(&comm_record)) {
1007 return false;
1008 }
1009 }
1010 }
1011 }
1012 }
1013 return true;
1014 }
1015
ProcessRecord(Record * record)1016 bool RecordCommand::ProcessRecord(Record* record) {
1017 if (unwind_dwarf_callchain_) {
1018 if (post_unwind_) {
1019 return SaveRecordForPostUnwinding(record);
1020 }
1021 return SaveRecordAfterUnwinding(record);
1022 }
1023 return SaveRecordWithoutUnwinding(record);
1024 }
1025
SaveRecordForPostUnwinding(Record * record)1026 bool RecordCommand::SaveRecordForPostUnwinding(Record* record) {
1027 if (record->type() == PERF_RECORD_SAMPLE) {
1028 static_cast<SampleRecord*>(record)->RemoveInvalidStackData();
1029 }
1030 if (!record_file_writer_->WriteRecord(*record)) {
1031 LOG(ERROR) << "If there isn't enough space for storing profiling data, consider using "
1032 << "--no-post-unwind option.";
1033 return false;
1034 }
1035 return true;
1036 }
1037
SaveRecordAfterUnwinding(Record * record)1038 bool RecordCommand::SaveRecordAfterUnwinding(Record* record) {
1039 if (record->type() == PERF_RECORD_SAMPLE) {
1040 auto& r = *static_cast<SampleRecord*>(record);
1041 // AdjustCallChainGeneratedByKernel() should go before UnwindRecord(). Because we don't want
1042 // to adjust callchains generated by dwarf unwinder.
1043 r.AdjustCallChainGeneratedByKernel();
1044 if (!UnwindRecord(r)) {
1045 return false;
1046 }
1047 // ExcludeKernelCallChain() should go after UnwindRecord() to notice the generated user call
1048 // chain.
1049 if (r.InKernel() && exclude_kernel_callchain_ && r.ExcludeKernelCallChain() == 0u) {
1050 // If current record contains no user callchain, skip it.
1051 return true;
1052 }
1053 sample_record_count_++;
1054 } else if (record->type() == PERF_RECORD_LOST) {
1055 lost_record_count_ += static_cast<LostRecord*>(record)->lost;
1056 } else {
1057 UpdateRecordForEmbeddedElfPath(record);
1058 thread_tree_.Update(*record);
1059 }
1060 return record_file_writer_->WriteRecord(*record);
1061 }
1062
SaveRecordWithoutUnwinding(Record * record)1063 bool RecordCommand::SaveRecordWithoutUnwinding(Record* record) {
1064 if (record->type() == PERF_RECORD_SAMPLE) {
1065 auto& r = *static_cast<SampleRecord*>(record);
1066 if (fp_callchain_sampling_ || dwarf_callchain_sampling_) {
1067 r.AdjustCallChainGeneratedByKernel();
1068 }
1069 if (r.InKernel() && exclude_kernel_callchain_ && r.ExcludeKernelCallChain() == 0u) {
1070 // If current record contains no user callchain, skip it.
1071 return true;
1072 }
1073 sample_record_count_++;
1074 } else if (record->type() == PERF_RECORD_LOST) {
1075 lost_record_count_ += static_cast<LostRecord*>(record)->lost;
1076 }
1077 return record_file_writer_->WriteRecord(*record);
1078 }
1079
1080 template <class RecordType>
UpdateMmapRecordForEmbeddedElfPath(RecordType * record)1081 void UpdateMmapRecordForEmbeddedElfPath(RecordType* record) {
1082 RecordType& r = *record;
1083 if (!r.InKernel() && r.data->pgoff != 0) {
1084 // For the case of a shared library "foobar.so" embedded
1085 // inside an APK, we rewrite the original MMAP from
1086 // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W]
1087 // so as to make the library name explicit. This update is
1088 // done here (as part of the record operation) as opposed to
1089 // on the host during the report, since we want to report
1090 // the correct library name even if the the APK in question
1091 // is not present on the host. The new offset W is
1092 // calculated to be with respect to the start of foobar.so,
1093 // not to the start of path.apk.
1094 EmbeddedElf* ee =
1095 ApkInspector::FindElfInApkByOffset(r.filename, r.data->pgoff);
1096 if (ee != nullptr) {
1097 // Compute new offset relative to start of elf in APK.
1098 auto data = *r.data;
1099 data.pgoff -= ee->entry_offset();
1100 r.SetDataAndFilename(data, GetUrlInApk(r.filename, ee->entry_name()));
1101 }
1102 }
1103 }
1104
UpdateRecordForEmbeddedElfPath(Record * record)1105 void RecordCommand::UpdateRecordForEmbeddedElfPath(Record* record) {
1106 if (record->type() == PERF_RECORD_MMAP) {
1107 UpdateMmapRecordForEmbeddedElfPath(static_cast<MmapRecord*>(record));
1108 } else if (record->type() == PERF_RECORD_MMAP2) {
1109 UpdateMmapRecordForEmbeddedElfPath(static_cast<Mmap2Record*>(record));
1110 }
1111 }
1112
UnwindRecord(SampleRecord & r)1113 bool RecordCommand::UnwindRecord(SampleRecord& r) {
1114 if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) &&
1115 (r.sample_type & PERF_SAMPLE_REGS_USER) &&
1116 (r.regs_user_data.reg_mask != 0) &&
1117 (r.sample_type & PERF_SAMPLE_STACK_USER) &&
1118 (r.GetValidStackSize() > 0)) {
1119 ThreadEntry* thread =
1120 thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
1121 RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
1122 std::vector<uint64_t> ips;
1123 std::vector<uint64_t> sps;
1124 if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
1125 r.GetValidStackSize(), &ips, &sps)) {
1126 return false;
1127 }
1128 r.ReplaceRegAndStackWithCallChain(ips);
1129 if (callchain_joiner_) {
1130 return callchain_joiner_->AddCallChain(r.tid_data.pid, r.tid_data.tid,
1131 CallChainJoiner::ORIGINAL_OFFLINE, ips, sps);
1132 }
1133 }
1134 return true;
1135 }
1136
PostUnwindRecords()1137 bool RecordCommand::PostUnwindRecords() {
1138 // 1. Move records from record_filename_ to a temporary file.
1139 if (!record_file_writer_->Close()) {
1140 return false;
1141 }
1142 record_file_writer_.reset();
1143 std::unique_ptr<TemporaryFile> tmp_file = ScopedTempFiles::CreateTempFile();
1144 if (!Workload::RunCmd({"mv", record_filename_, tmp_file->path})) {
1145 return false;
1146 }
1147 std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmp_file->path);
1148 if (!reader) {
1149 return false;
1150 }
1151
1152 // 2. Read records from the temporary file, and write unwound records back to record_filename_.
1153 record_file_writer_ = CreateRecordFile(record_filename_);
1154 if (!record_file_writer_) {
1155 return false;
1156 }
1157 sample_record_count_ = 0;
1158 lost_record_count_ = 0;
1159 auto callback = [this](std::unique_ptr<Record> record) {
1160 return SaveRecordAfterUnwinding(record.get());
1161 };
1162 return reader->ReadDataSection(callback, false);
1163 }
1164
JoinCallChains()1165 bool RecordCommand::JoinCallChains() {
1166 // 1. Prepare joined callchains.
1167 if (!callchain_joiner_->JoinCallChains()) {
1168 return false;
1169 }
1170 // 2. Move records from record_filename_ to a temporary file.
1171 if (!record_file_writer_->Close()) {
1172 return false;
1173 }
1174 record_file_writer_.reset();
1175 std::unique_ptr<TemporaryFile> tmp_file = ScopedTempFiles::CreateTempFile();
1176 if (!Workload::RunCmd({"mv", record_filename_, tmp_file->path})) {
1177 return false;
1178 }
1179
1180 // 3. Read records from the temporary file, and write record with joined call chains back
1181 // to record_filename_.
1182 std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmp_file->path);
1183 record_file_writer_ = CreateRecordFile(record_filename_);
1184 if (!reader || !record_file_writer_) {
1185 return false;
1186 }
1187
1188 auto record_callback = [&](std::unique_ptr<Record> r) {
1189 if (r->type() != PERF_RECORD_SAMPLE) {
1190 return record_file_writer_->WriteRecord(*r);
1191 }
1192 SampleRecord& sr = *static_cast<SampleRecord*>(r.get());
1193 if (!sr.HasUserCallChain()) {
1194 return record_file_writer_->WriteRecord(sr);
1195 }
1196 pid_t pid;
1197 pid_t tid;
1198 CallChainJoiner::ChainType type;
1199 std::vector<uint64_t> ips;
1200 std::vector<uint64_t> sps;
1201 if (!callchain_joiner_->GetNextCallChain(pid, tid, type, ips, sps)) {
1202 return false;
1203 }
1204 CHECK_EQ(type, CallChainJoiner::JOINED_OFFLINE);
1205 CHECK_EQ(pid, static_cast<pid_t>(sr.tid_data.pid));
1206 CHECK_EQ(tid, static_cast<pid_t>(sr.tid_data.tid));
1207 sr.UpdateUserCallChain(ips);
1208 return record_file_writer_->WriteRecord(sr);
1209 };
1210 return reader->ReadDataSection(record_callback, false);
1211 }
1212
DumpAdditionalFeatures(const std::vector<std::string> & args)1213 bool RecordCommand::DumpAdditionalFeatures(
1214 const std::vector<std::string>& args) {
1215 // Read data section of perf.data to collect hit file information.
1216 thread_tree_.ClearThreadAndMap();
1217 if (CheckKernelSymbolAddresses()) {
1218 Dso::ReadKernelSymbolsFromProc();
1219 }
1220 auto callback = [&](const Record* r) {
1221 thread_tree_.Update(*r);
1222 if (r->type() == PERF_RECORD_SAMPLE) {
1223 CollectHitFileInfo(*reinterpret_cast<const SampleRecord*>(r));
1224 }
1225 };
1226 if (!record_file_writer_->ReadDataSection(callback)) {
1227 return false;
1228 }
1229
1230 size_t feature_count = 5;
1231 if (branch_sampling_) {
1232 feature_count++;
1233 }
1234 if (dump_symbols_) {
1235 feature_count++;
1236 }
1237 if (!record_file_writer_->BeginWriteFeatures(feature_count)) {
1238 return false;
1239 }
1240 if (!DumpBuildIdFeature()) {
1241 return false;
1242 }
1243 if (dump_symbols_ && !DumpFileFeature()) {
1244 return false;
1245 }
1246 utsname uname_buf;
1247 if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) {
1248 PLOG(ERROR) << "uname() failed";
1249 return false;
1250 }
1251 if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE,
1252 uname_buf.release)) {
1253 return false;
1254 }
1255 if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH,
1256 uname_buf.machine)) {
1257 return false;
1258 }
1259
1260 std::string exec_path = android::base::GetExecutablePath();
1261 if (exec_path.empty()) exec_path = "simpleperf";
1262 std::vector<std::string> cmdline;
1263 cmdline.push_back(exec_path);
1264 cmdline.push_back("record");
1265 cmdline.insert(cmdline.end(), args.begin(), args.end());
1266 if (!record_file_writer_->WriteCmdlineFeature(cmdline)) {
1267 return false;
1268 }
1269 if (branch_sampling_ != 0 &&
1270 !record_file_writer_->WriteBranchStackFeature()) {
1271 return false;
1272 }
1273 if (!DumpMetaInfoFeature()) {
1274 return false;
1275 }
1276
1277 if (!record_file_writer_->EndWriteFeatures()) {
1278 return false;
1279 }
1280 return true;
1281 }
1282
DumpBuildIdFeature()1283 bool RecordCommand::DumpBuildIdFeature() {
1284 std::vector<BuildIdRecord> build_id_records;
1285 BuildId build_id;
1286 std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
1287 for (Dso* dso : dso_v) {
1288 if (!dso->HasDumpId()) {
1289 continue;
1290 }
1291 if (dso->type() == DSO_KERNEL) {
1292 if (!GetKernelBuildId(&build_id)) {
1293 continue;
1294 }
1295 build_id_records.push_back(
1296 BuildIdRecord(true, UINT_MAX, build_id, dso->Path()));
1297 } else if (dso->type() == DSO_KERNEL_MODULE) {
1298 std::string path = dso->Path();
1299 std::string module_name = basename(&path[0]);
1300 if (android::base::EndsWith(module_name, ".ko")) {
1301 module_name = module_name.substr(0, module_name.size() - 3);
1302 }
1303 if (!GetModuleBuildId(module_name, &build_id)) {
1304 LOG(DEBUG) << "can't read build_id for module " << module_name;
1305 continue;
1306 }
1307 build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, path));
1308 } else {
1309 if (dso->Path() == DEFAULT_EXECNAME_FOR_THREAD_MMAP) {
1310 continue;
1311 }
1312 auto tuple = SplitUrlInApk(dso->Path());
1313 if (std::get<0>(tuple)) {
1314 ElfStatus result = GetBuildIdFromApkFile(std::get<1>(tuple),
1315 std::get<2>(tuple), &build_id);
1316 if (result != ElfStatus::NO_ERROR) {
1317 LOG(DEBUG) << "can't read build_id from file " << dso->Path() << ": "
1318 << result;
1319 continue;
1320 }
1321 } else {
1322 ElfStatus result = GetBuildIdFromElfFile(dso->Path(), &build_id);
1323 if (result != ElfStatus::NO_ERROR) {
1324 LOG(DEBUG) << "can't read build_id from file " << dso->Path() << ": "
1325 << result;
1326 continue;
1327 }
1328 }
1329 build_id_records.push_back(
1330 BuildIdRecord(false, UINT_MAX, build_id, dso->Path()));
1331 }
1332 }
1333 if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) {
1334 return false;
1335 }
1336 return true;
1337 }
1338
DumpFileFeature()1339 bool RecordCommand::DumpFileFeature() {
1340 std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
1341 return record_file_writer_->WriteFileFeatures(thread_tree_.GetAllDsos());
1342 }
1343
DumpMetaInfoFeature()1344 bool RecordCommand::DumpMetaInfoFeature() {
1345 std::unordered_map<std::string, std::string> info_map;
1346 info_map["simpleperf_version"] = GetSimpleperfVersion();
1347 info_map["system_wide_collection"] = system_wide_collection_ ? "true" : "false";
1348 info_map["trace_offcpu"] = trace_offcpu_ ? "true" : "false";
1349 // By storing event types information in perf.data, the readers of perf.data have the same
1350 // understanding of event types, even if they are on another machine.
1351 info_map["event_type_info"] = ScopedEventTypes::BuildString(event_selection_set_.GetEvents());
1352 #if defined(__ANDROID__)
1353 info_map["product_props"] = android::base::StringPrintf("%s:%s:%s",
1354 android::base::GetProperty("ro.product.manufacturer", "").c_str(),
1355 android::base::GetProperty("ro.product.model", "").c_str(),
1356 android::base::GetProperty("ro.product.name", "").c_str());
1357 info_map["android_version"] = android::base::GetProperty("ro.build.version.release", "");
1358 #endif
1359 info_map["clockid"] = clockid_;
1360 info_map["timestamp"] = std::to_string(time(nullptr));
1361 return record_file_writer_->WriteMetaInfoFeature(info_map);
1362 }
1363
CollectHitFileInfo(const SampleRecord & r)1364 void RecordCommand::CollectHitFileInfo(const SampleRecord& r) {
1365 const ThreadEntry* thread =
1366 thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
1367 const MapEntry* map =
1368 thread_tree_.FindMap(thread, r.ip_data.ip, r.InKernel());
1369 Dso* dso = map->dso;
1370 const Symbol* symbol;
1371 if (dump_symbols_) {
1372 symbol = thread_tree_.FindSymbol(map, r.ip_data.ip, nullptr, &dso);
1373 if (!symbol->HasDumpId()) {
1374 dso->CreateSymbolDumpId(symbol);
1375 }
1376 }
1377 if (!dso->HasDumpId()) {
1378 dso->CreateDumpId();
1379 }
1380 if (r.sample_type & PERF_SAMPLE_CALLCHAIN) {
1381 bool in_kernel = r.InKernel();
1382 bool first_ip = true;
1383 for (uint64_t i = 0; i < r.callchain_data.ip_nr; ++i) {
1384 uint64_t ip = r.callchain_data.ips[i];
1385 if (ip >= PERF_CONTEXT_MAX) {
1386 switch (ip) {
1387 case PERF_CONTEXT_KERNEL:
1388 in_kernel = true;
1389 break;
1390 case PERF_CONTEXT_USER:
1391 in_kernel = false;
1392 break;
1393 default:
1394 LOG(DEBUG) << "Unexpected perf_context in callchain: " << std::hex
1395 << ip;
1396 }
1397 } else {
1398 if (first_ip) {
1399 first_ip = false;
1400 // Remove duplication with sample ip.
1401 if (ip == r.ip_data.ip) {
1402 continue;
1403 }
1404 }
1405 map = thread_tree_.FindMap(thread, ip, in_kernel);
1406 dso = map->dso;
1407 if (dump_symbols_) {
1408 symbol = thread_tree_.FindSymbol(map, ip, nullptr, &dso);
1409 if (!symbol->HasDumpId()) {
1410 dso->CreateSymbolDumpId(symbol);
1411 }
1412 }
1413 if (!dso->HasDumpId()) {
1414 dso->CreateDumpId();
1415 }
1416 }
1417 }
1418 }
1419 }
1420
RegisterRecordCommand()1421 void RegisterRecordCommand() {
1422 RegisterCommand("record",
1423 [] { return std::unique_ptr<Command>(new RecordCommand()); });
1424 }
1425