1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <libgen.h>
18 #include <signal.h>
19 #include <sys/prctl.h>
20 #include <sys/utsname.h>
21 #include <unistd.h>
22 #include <set>
23 #include <string>
24 #include <unordered_map>
25 #include <vector>
26
27 #include <android-base/logging.h>
28 #include <android-base/file.h>
29 #include <android-base/parsedouble.h>
30 #include <android-base/parseint.h>
31 #include <android-base/strings.h>
32
33 #include "command.h"
34 #include "dwarf_unwind.h"
35 #include "environment.h"
36 #include "event_selection_set.h"
37 #include "event_type.h"
38 #include "IOEventLoop.h"
39 #include "perf_clock.h"
40 #include "read_apk.h"
41 #include "read_elf.h"
42 #include "record.h"
43 #include "record_file.h"
44 #include "thread_tree.h"
45 #include "tracing.h"
46 #include "utils.h"
47 #include "workload.h"
48
49 static std::string default_measured_event_type = "cpu-cycles";
50
51 static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = {
52 {"u", PERF_SAMPLE_BRANCH_USER},
53 {"k", PERF_SAMPLE_BRANCH_KERNEL},
54 {"any", PERF_SAMPLE_BRANCH_ANY},
55 {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL},
56 {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN},
57 {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL},
58 };
59
60 // The max size of records dumped by kernel is 65535, and dump stack size
61 // should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528.
62 constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;
63
64 // The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK).
65 // Here 1024 is a desired value for pages in mapped buffer. If mapped
66 // successfully, the buffer size = 1024 * 4K (page size) = 4M.
67 constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;
68
69 class RecordCommand : public Command {
70 public:
RecordCommand()71 RecordCommand()
72 : Command(
73 "record", "record sampling info in perf.data",
74 // clang-format off
75 "Usage: simpleperf record [options] [command [command-args]]\n"
76 " Gather sampling information of running [command]. And -a/-p/-t option\n"
77 " can be used to change target of sampling information.\n"
78 "-a System-wide collection.\n"
79 "-b Enable take branch stack sampling. Same as '-j any'\n"
80 "-c count Set event sample period. It means recording one sample when\n"
81 " [count] events happen. Can't be used with -f/-F option.\n"
82 " For tracepoint events, the default option is -c 1.\n"
83 "--call-graph fp | dwarf[,<dump_stack_size>]\n"
84 " Enable call graph recording. Use frame pointer or dwarf debug\n"
85 " frame as the method to parse call graph in stack.\n"
86 " Default is dwarf,65528.\n"
87 "--cpu cpu_item1,cpu_item2,...\n"
88 " Collect samples only on the selected cpus. cpu_item can be cpu\n"
89 " number like 1, or cpu range like 0-3.\n"
90 "--dump-symbols Dump symbols in perf.data. By default perf.data doesn't contain\n"
91 " symbol information for samples. This option is used when there\n"
92 " is no symbol information in report environment.\n"
93 "--duration time_in_sec Monitor for time_in_sec seconds instead of running\n"
94 " [command]. Here time_in_sec may be any positive\n"
95 " floating point number.\n"
96 "-e event1[:modifier1],event2[:modifier2],...\n"
97 " Select the event list to sample. Use `simpleperf list` to find\n"
98 " all possible event names. Modifiers can be added to define how\n"
99 " the event should be monitored.\n"
100 " Possible modifiers are:\n"
101 " u - monitor user space events only\n"
102 " k - monitor kernel space events only\n"
103 "-f freq Set event sample frequency. It means recording at most [freq]\n"
104 " samples every second. For non-tracepoint events, the default\n"
105 " option is -f 4000.\n"
106 "-F freq Same as '-f freq'.\n"
107 "-g Same as '--call-graph dwarf'.\n"
108 "--group event1[:modifier],event2[:modifier2],...\n"
109 " Similar to -e option. But events specified in the same --group\n"
110 " option are monitored as a group, and scheduled in and out at the\n"
111 " same time.\n"
112 "-j branch_filter1,branch_filter2,...\n"
113 " Enable taken branch stack sampling. Each sample captures a series\n"
114 " of consecutive taken branches.\n"
115 " The following filters are defined:\n"
116 " any: any type of branch\n"
117 " any_call: any function call or system call\n"
118 " any_ret: any function return or system call return\n"
119 " ind_call: any indirect branch\n"
120 " u: only when the branch target is at the user level\n"
121 " k: only when the branch target is in the kernel\n"
122 " This option requires at least one branch type among any, any_call,\n"
123 " any_ret, ind_call.\n"
124 "-m mmap_pages Set the size of the buffer used to receiving sample data from\n"
125 " the kernel. It should be a power of 2. If not set, the max\n"
126 " possible value <= 1024 will be used.\n"
127 "--no-dump-kernel-symbols Don't dump kernel symbols in perf.data. By default\n"
128 " kernel symbols will be dumped when needed.\n"
129 "--no-inherit Don't record created child threads/processes.\n"
130 "--no-unwind If `--call-graph dwarf` option is used, then the user's stack\n"
131 " will be unwound by default. Use this option to disable the\n"
132 " unwinding of the user's stack.\n"
133 "-o record_file_name Set record file name, default is perf.data.\n"
134 "-p pid1,pid2,... Record events on existing processes. Mutually exclusive\n"
135 " with -a.\n"
136 "--post-unwind If `--call-graph dwarf` option is used, then the user's stack\n"
137 " will be unwound while recording by default. But it may lose\n"
138 " records as stacking unwinding can be time consuming. Use this\n"
139 " option to unwind the user's stack after recording.\n"
140 "--symfs <dir> Look for files with symbols relative to this directory.\n"
141 " This option is used to provide files with symbol table and\n"
142 " debug information, which are used by --dump-symbols and -g.\n"
143 "-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n"
144 // clang-format on
145 ),
146 use_sample_freq_(false),
147 sample_freq_(0),
148 use_sample_period_(false),
149 sample_period_(0),
150 system_wide_collection_(false),
151 branch_sampling_(0),
152 fp_callchain_sampling_(false),
153 dwarf_callchain_sampling_(false),
154 dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE),
155 unwind_dwarf_callchain_(true),
156 post_unwind_(false),
157 child_inherit_(true),
158 duration_in_sec_(0),
159 can_dump_kernel_symbols_(true),
160 dump_symbols_(false),
161 event_selection_set_(false),
162 mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)),
163 record_filename_("perf.data"),
164 start_sampling_time_in_ns_(0),
165 sample_record_count_(0),
166 lost_record_count_(0) {
167 // Stop profiling if parent exits.
168 prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
169 }
170
171 bool Run(const std::vector<std::string>& args);
172
173 private:
174 bool ParseOptions(const std::vector<std::string>& args,
175 std::vector<std::string>* non_option_args);
176 bool SetEventSelectionFlags();
177 bool CreateAndInitRecordFile();
178 std::unique_ptr<RecordFileWriter> CreateRecordFile(
179 const std::string& filename);
180 bool DumpKernelSymbol();
181 bool DumpTracingData();
182 bool DumpKernelAndModuleMmaps(const perf_event_attr& attr, uint64_t event_id);
183 bool DumpThreadCommAndMmaps(const perf_event_attr& attr, uint64_t event_id);
184 bool ProcessRecord(Record* record);
185 void UpdateRecordForEmbeddedElfPath(Record* record);
186 bool UnwindRecord(Record* record);
187 bool PostUnwind(const std::vector<std::string>& args);
188 bool DumpAdditionalFeatures(const std::vector<std::string>& args);
189 bool DumpBuildIdFeature();
190 bool DumpFileFeature();
191 void CollectHitFileInfo(const SampleRecord& r);
192
193 bool use_sample_freq_;
194 uint64_t sample_freq_; // Sample 'sample_freq_' times per second.
195 bool use_sample_period_;
196 uint64_t sample_period_; // Sample once when 'sample_period_' events occur.
197
198 bool system_wide_collection_;
199 uint64_t branch_sampling_;
200 bool fp_callchain_sampling_;
201 bool dwarf_callchain_sampling_;
202 uint32_t dump_stack_size_in_dwarf_sampling_;
203 bool unwind_dwarf_callchain_;
204 bool post_unwind_;
205 bool child_inherit_;
206 double duration_in_sec_;
207 bool can_dump_kernel_symbols_;
208 bool dump_symbols_;
209 std::vector<int> cpus_;
210 EventSelectionSet event_selection_set_;
211
212 std::pair<size_t, size_t> mmap_page_range_;
213
214 ThreadTree thread_tree_;
215 std::string record_filename_;
216 std::unique_ptr<RecordFileWriter> record_file_writer_;
217
218 uint64_t start_sampling_time_in_ns_; // nanoseconds from machine starting
219
220 uint64_t sample_record_count_;
221 uint64_t lost_record_count_;
222 };
223
Run(const std::vector<std::string> & args)224 bool RecordCommand::Run(const std::vector<std::string>& args) {
225 if (!CheckPerfEventLimit()) {
226 return false;
227 }
228 if (!InitPerfClock()) {
229 return false;
230 }
231
232 // 1. Parse options, and use default measured event type if not given.
233 std::vector<std::string> workload_args;
234 if (!ParseOptions(args, &workload_args)) {
235 return false;
236 }
237 if (event_selection_set_.empty()) {
238 if (!event_selection_set_.AddEventType(default_measured_event_type)) {
239 return false;
240 }
241 }
242 if (!SetEventSelectionFlags()) {
243 return false;
244 }
245 ScopedCurrentArch scoped_arch(GetMachineArch());
246
247 // 2. Create workload.
248 std::unique_ptr<Workload> workload;
249 if (!workload_args.empty()) {
250 workload = Workload::CreateWorkload(workload_args);
251 if (workload == nullptr) {
252 return false;
253 }
254 }
255 bool need_to_check_targets = false;
256 if (system_wide_collection_) {
257 event_selection_set_.AddMonitoredThreads({-1});
258 } else if (!event_selection_set_.HasMonitoredTarget()) {
259 if (workload != nullptr) {
260 event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
261 event_selection_set_.SetEnableOnExec(true);
262 if (event_selection_set_.HasInplaceSampler()) {
263 // Start worker early, because the worker process has to setup inplace-sampler server
264 // before we try to connect it.
265 if (!workload->Start()) {
266 return false;
267 }
268 }
269 } else {
270 LOG(ERROR)
271 << "No threads to monitor. Try `simpleperf help record` for help";
272 return false;
273 }
274 } else {
275 need_to_check_targets = true;
276 }
277
278 // 3. Open perf_event_files, create mapped buffers for perf_event_files.
279 if (!event_selection_set_.OpenEventFiles(cpus_)) {
280 return false;
281 }
282 if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first,
283 mmap_page_range_.second)) {
284 return false;
285 }
286
287 // 4. Create perf.data.
288 if (!CreateAndInitRecordFile()) {
289 return false;
290 }
291
292 // 5. Add read/signal/periodic Events.
293 auto callback =
294 std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1);
295 if (!event_selection_set_.PrepareToReadMmapEventData(callback)) {
296 return false;
297 }
298 if (!event_selection_set_.HandleCpuHotplugEvents(cpus_)) {
299 return false;
300 }
301 if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
302 return false;
303 }
304 IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
305 if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP},
306 [&]() { return loop->ExitLoop(); })) {
307 return false;
308 }
309 if (duration_in_sec_ != 0) {
310 if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
311 [&]() { return loop->ExitLoop(); })) {
312 return false;
313 }
314 }
315
316 // 6. Write records in mapped buffers of perf_event_files to output file while
317 // workload is running.
318 start_sampling_time_in_ns_ = GetPerfClock();
319 LOG(VERBOSE) << "start_sampling_time is " << start_sampling_time_in_ns_
320 << " ns";
321 if (workload != nullptr && !workload->IsStarted() && !workload->Start()) {
322 return false;
323 }
324 if (!loop->RunLoop()) {
325 return false;
326 }
327 if (!event_selection_set_.FinishReadMmapEventData()) {
328 return false;
329 }
330
331 // 7. Dump additional features, and close record file.
332 if (!DumpAdditionalFeatures(args)) {
333 return false;
334 }
335 if (!record_file_writer_->Close()) {
336 return false;
337 }
338
339 // 8. Unwind dwarf callchain.
340 if (post_unwind_) {
341 if (!PostUnwind(args)) {
342 return false;
343 }
344 }
345
346 // 9. Show brief record result.
347 LOG(INFO) << "Samples recorded: " << sample_record_count_
348 << ". Samples lost: " << lost_record_count_ << ".";
349 if (sample_record_count_ + lost_record_count_ != 0) {
350 double lost_percent = static_cast<double>(lost_record_count_) /
351 (lost_record_count_ + sample_record_count_);
352 constexpr double LOST_PERCENT_WARNING_BAR = 0.1;
353 if (lost_percent >= LOST_PERCENT_WARNING_BAR) {
354 LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, "
355 << "consider increasing mmap_pages(-m), "
356 << "or decreasing sample frequency(-f), "
357 << "or increasing sample period(-c).";
358 }
359 }
360 return true;
361 }
362
ParseOptions(const std::vector<std::string> & args,std::vector<std::string> * non_option_args)363 bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
364 std::vector<std::string>* non_option_args) {
365 size_t i;
366 for (i = 0; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) {
367 if (args[i] == "-a") {
368 system_wide_collection_ = true;
369 } else if (args[i] == "-b") {
370 branch_sampling_ = branch_sampling_type_map["any"];
371 } else if (args[i] == "-c") {
372 if (!NextArgumentOrError(args, &i)) {
373 return false;
374 }
375 char* endptr;
376 sample_period_ = strtoull(args[i].c_str(), &endptr, 0);
377 if (*endptr != '\0' || sample_period_ == 0) {
378 LOG(ERROR) << "Invalid sample period: '" << args[i] << "'";
379 return false;
380 }
381 use_sample_period_ = true;
382 } else if (args[i] == "--call-graph") {
383 if (!NextArgumentOrError(args, &i)) {
384 return false;
385 }
386 std::vector<std::string> strs = android::base::Split(args[i], ",");
387 if (strs[0] == "fp") {
388 fp_callchain_sampling_ = true;
389 dwarf_callchain_sampling_ = false;
390 } else if (strs[0] == "dwarf") {
391 fp_callchain_sampling_ = false;
392 dwarf_callchain_sampling_ = true;
393 if (strs.size() > 1) {
394 char* endptr;
395 uint64_t size = strtoull(strs[1].c_str(), &endptr, 0);
396 if (*endptr != '\0' || size > UINT_MAX) {
397 LOG(ERROR) << "invalid dump stack size in --call-graph option: "
398 << strs[1];
399 return false;
400 }
401 if ((size & 7) != 0) {
402 LOG(ERROR) << "dump stack size " << size
403 << " is not 8-byte aligned.";
404 return false;
405 }
406 if (size >= MAX_DUMP_STACK_SIZE) {
407 LOG(ERROR) << "dump stack size " << size
408 << " is bigger than max allowed size "
409 << MAX_DUMP_STACK_SIZE << ".";
410 return false;
411 }
412 dump_stack_size_in_dwarf_sampling_ = static_cast<uint32_t>(size);
413 }
414 } else {
415 LOG(ERROR) << "unexpected argument for --call-graph option: "
416 << args[i];
417 return false;
418 }
419 } else if (args[i] == "--cpu") {
420 if (!NextArgumentOrError(args, &i)) {
421 return false;
422 }
423 cpus_ = GetCpusFromString(args[i]);
424 } else if (args[i] == "--dump-symbols") {
425 dump_symbols_ = true;
426 } else if (args[i] == "--duration") {
427 if (!NextArgumentOrError(args, &i)) {
428 return false;
429 }
430 if (!android::base::ParseDouble(args[i].c_str(), &duration_in_sec_,
431 1e-9)) {
432 LOG(ERROR) << "Invalid duration: " << args[i].c_str();
433 return false;
434 }
435 } else if (args[i] == "-e") {
436 if (!NextArgumentOrError(args, &i)) {
437 return false;
438 }
439 std::vector<std::string> event_types = android::base::Split(args[i], ",");
440 for (auto& event_type : event_types) {
441 if (!event_selection_set_.AddEventType(event_type)) {
442 return false;
443 }
444 }
445 } else if (args[i] == "-f" || args[i] == "-F") {
446 if (!NextArgumentOrError(args, &i)) {
447 return false;
448 }
449 if (!android::base::ParseUint(args[i].c_str(), &sample_freq_)) {
450 LOG(ERROR) << "Invalid sample frequency: " << args[i];
451 return false;
452 }
453 if (!CheckSampleFrequency(sample_freq_)) {
454 return false;
455 }
456 use_sample_freq_ = true;
457 } else if (args[i] == "-g") {
458 fp_callchain_sampling_ = false;
459 dwarf_callchain_sampling_ = true;
460 } else if (args[i] == "--group") {
461 if (!NextArgumentOrError(args, &i)) {
462 return false;
463 }
464 std::vector<std::string> event_types = android::base::Split(args[i], ",");
465 if (!event_selection_set_.AddEventGroup(event_types)) {
466 return false;
467 }
468 } else if (args[i] == "-j") {
469 if (!NextArgumentOrError(args, &i)) {
470 return false;
471 }
472 std::vector<std::string> branch_sampling_types =
473 android::base::Split(args[i], ",");
474 for (auto& type : branch_sampling_types) {
475 auto it = branch_sampling_type_map.find(type);
476 if (it == branch_sampling_type_map.end()) {
477 LOG(ERROR) << "unrecognized branch sampling filter: " << type;
478 return false;
479 }
480 branch_sampling_ |= it->second;
481 }
482 } else if (args[i] == "-m") {
483 if (!NextArgumentOrError(args, &i)) {
484 return false;
485 }
486 char* endptr;
487 uint64_t pages = strtoull(args[i].c_str(), &endptr, 0);
488 if (*endptr != '\0' || !IsPowerOfTwo(pages)) {
489 LOG(ERROR) << "Invalid mmap_pages: '" << args[i] << "'";
490 return false;
491 }
492 mmap_page_range_.first = mmap_page_range_.second = pages;
493 } else if (args[i] == "--no-dump-kernel-symbols") {
494 can_dump_kernel_symbols_ = false;
495 } else if (args[i] == "--no-inherit") {
496 child_inherit_ = false;
497 } else if (args[i] == "--no-unwind") {
498 unwind_dwarf_callchain_ = false;
499 } else if (args[i] == "-o") {
500 if (!NextArgumentOrError(args, &i)) {
501 return false;
502 }
503 record_filename_ = args[i];
504 } else if (args[i] == "-p") {
505 if (!NextArgumentOrError(args, &i)) {
506 return false;
507 }
508 std::set<pid_t> pids;
509 if (!GetValidThreadsFromThreadString(args[i], &pids)) {
510 return false;
511 }
512 event_selection_set_.AddMonitoredProcesses(pids);
513 } else if (args[i] == "--post-unwind") {
514 post_unwind_ = true;
515 } else if (args[i] == "--symfs") {
516 if (!NextArgumentOrError(args, &i)) {
517 return false;
518 }
519 if (!Dso::SetSymFsDir(args[i])) {
520 return false;
521 }
522 } else if (args[i] == "-t") {
523 if (!NextArgumentOrError(args, &i)) {
524 return false;
525 }
526 std::set<pid_t> tids;
527 if (!GetValidThreadsFromThreadString(args[i], &tids)) {
528 return false;
529 }
530 event_selection_set_.AddMonitoredThreads(tids);
531 } else {
532 ReportUnknownOption(args, i);
533 return false;
534 }
535 }
536
537 if (use_sample_freq_ && use_sample_period_) {
538 LOG(ERROR) << "-f option can't be used with -c option.";
539 return false;
540 }
541
542 if (!dwarf_callchain_sampling_) {
543 if (!unwind_dwarf_callchain_) {
544 LOG(ERROR)
545 << "--no-unwind is only used with `--call-graph dwarf` option.";
546 return false;
547 }
548 unwind_dwarf_callchain_ = false;
549 }
550 if (post_unwind_) {
551 if (!dwarf_callchain_sampling_) {
552 LOG(ERROR)
553 << "--post-unwind is only used with `--call-graph dwarf` option.";
554 return false;
555 }
556 if (!unwind_dwarf_callchain_) {
557 LOG(ERROR) << "--post-unwind can't be used with `--no-unwind` option.";
558 return false;
559 }
560 }
561
562 if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
563 LOG(ERROR) << "Record system wide and existing processes/threads can't be "
564 "used at the same time.";
565 return false;
566 }
567
568 if (system_wide_collection_ && !IsRoot()) {
569 LOG(ERROR) << "System wide profiling needs root privilege.";
570 return false;
571 }
572
573 if (dump_symbols_ && can_dump_kernel_symbols_) {
574 // No need to dump kernel symbols as we will dump all required symbols.
575 can_dump_kernel_symbols_ = false;
576 }
577
578 non_option_args->clear();
579 for (; i < args.size(); ++i) {
580 non_option_args->push_back(args[i]);
581 }
582 return true;
583 }
584
SetEventSelectionFlags()585 bool RecordCommand::SetEventSelectionFlags() {
586 if (use_sample_freq_) {
587 event_selection_set_.SetSampleFreq(sample_freq_);
588 } else if (use_sample_period_) {
589 event_selection_set_.SetSamplePeriod(sample_period_);
590 } else {
591 event_selection_set_.UseDefaultSampleFreq();
592 }
593 event_selection_set_.SampleIdAll();
594 if (!event_selection_set_.SetBranchSampling(branch_sampling_)) {
595 return false;
596 }
597 if (fp_callchain_sampling_) {
598 event_selection_set_.EnableFpCallChainSampling();
599 } else if (dwarf_callchain_sampling_) {
600 if (!event_selection_set_.EnableDwarfCallChainSampling(
601 dump_stack_size_in_dwarf_sampling_)) {
602 return false;
603 }
604 }
605 event_selection_set_.SetInherit(child_inherit_);
606 return true;
607 }
608
CreateAndInitRecordFile()609 bool RecordCommand::CreateAndInitRecordFile() {
610 record_file_writer_ = CreateRecordFile(record_filename_);
611 if (record_file_writer_ == nullptr) {
612 return false;
613 }
614 // Use first perf_event_attr and first event id to dump mmap and comm records.
615 EventAttrWithId attr_id = event_selection_set_.GetEventAttrWithId()[0];
616 if (!DumpKernelSymbol()) {
617 return false;
618 }
619 if (!DumpTracingData()) {
620 return false;
621 }
622 if (!DumpKernelAndModuleMmaps(*attr_id.attr, attr_id.ids[0])) {
623 return false;
624 }
625 if (!DumpThreadCommAndMmaps(*attr_id.attr, attr_id.ids[0])) {
626 return false;
627 }
628 return true;
629 }
630
CreateRecordFile(const std::string & filename)631 std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile(
632 const std::string& filename) {
633 std::unique_ptr<RecordFileWriter> writer =
634 RecordFileWriter::CreateInstance(filename);
635 if (writer == nullptr) {
636 return nullptr;
637 }
638
639 if (!writer->WriteAttrSection(event_selection_set_.GetEventAttrWithId())) {
640 return nullptr;
641 }
642 return writer;
643 }
644
DumpKernelSymbol()645 bool RecordCommand::DumpKernelSymbol() {
646 if (can_dump_kernel_symbols_) {
647 std::string kallsyms;
648 if (event_selection_set_.NeedKernelSymbol() &&
649 CheckKernelSymbolAddresses()) {
650 if (!android::base::ReadFileToString("/proc/kallsyms", &kallsyms)) {
651 PLOG(ERROR) << "failed to read /proc/kallsyms";
652 return false;
653 }
654 KernelSymbolRecord r(kallsyms);
655 if (!ProcessRecord(&r)) {
656 return false;
657 }
658 }
659 }
660 return true;
661 }
662
DumpTracingData()663 bool RecordCommand::DumpTracingData() {
664 std::vector<const EventType*> tracepoint_event_types =
665 event_selection_set_.GetTracepointEvents();
666 if (tracepoint_event_types.empty()) {
667 return true; // No need to dump tracing data.
668 }
669 std::vector<char> tracing_data;
670 if (!GetTracingData(tracepoint_event_types, &tracing_data)) {
671 return false;
672 }
673 TracingDataRecord record(tracing_data);
674 if (!ProcessRecord(&record)) {
675 return false;
676 }
677 return true;
678 }
679
DumpKernelAndModuleMmaps(const perf_event_attr & attr,uint64_t event_id)680 bool RecordCommand::DumpKernelAndModuleMmaps(const perf_event_attr& attr,
681 uint64_t event_id) {
682 KernelMmap kernel_mmap;
683 std::vector<KernelMmap> module_mmaps;
684 GetKernelAndModuleMmaps(&kernel_mmap, &module_mmaps);
685
686 MmapRecord mmap_record(attr, true, UINT_MAX, 0, kernel_mmap.start_addr,
687 kernel_mmap.len, 0, kernel_mmap.filepath, event_id);
688 if (!ProcessRecord(&mmap_record)) {
689 return false;
690 }
691 for (auto& module_mmap : module_mmaps) {
692 MmapRecord mmap_record(attr, true, UINT_MAX, 0, module_mmap.start_addr,
693 module_mmap.len, 0, module_mmap.filepath, event_id);
694 if (!ProcessRecord(&mmap_record)) {
695 return false;
696 }
697 }
698 return true;
699 }
700
DumpThreadCommAndMmaps(const perf_event_attr & attr,uint64_t event_id)701 bool RecordCommand::DumpThreadCommAndMmaps(const perf_event_attr& attr,
702 uint64_t event_id) {
703 // Decide which processes and threads to dump.
704 // For system_wide profiling, dump all threads.
705 // For non system wide profiling, build dump_threads.
706 bool all_threads = system_wide_collection_;
707 std::set<pid_t> dump_threads = event_selection_set_.GetMonitoredThreads();
708 for (const auto& pid : event_selection_set_.GetMonitoredProcesses()) {
709 std::vector<pid_t> tids = GetThreadsInProcess(pid);
710 dump_threads.insert(tids.begin(), tids.end());
711 }
712
713 // Collect processes to dump.
714 std::vector<pid_t> processes;
715 if (all_threads) {
716 processes = GetAllProcesses();
717 } else {
718 std::set<pid_t> process_set;
719 for (const auto& tid : dump_threads) {
720 pid_t pid;
721 if (!GetProcessForThread(tid, &pid)) {
722 continue;
723 }
724 process_set.insert(pid);
725 }
726 processes.insert(processes.end(), process_set.begin(), process_set.end());
727 }
728
729 // Dump each process and its threads.
730 for (auto& pid : processes) {
731 // Dump mmap records.
732 std::vector<ThreadMmap> thread_mmaps;
733 if (!GetThreadMmapsInProcess(pid, &thread_mmaps)) {
734 // The process may exit before we get its info.
735 continue;
736 }
737 for (const auto& map : thread_mmaps) {
738 if (map.executable == 0) {
739 continue; // No need to dump non-executable mmap info.
740 }
741 MmapRecord record(attr, false, pid, pid, map.start_addr, map.len,
742 map.pgoff, map.name, event_id);
743 if (!ProcessRecord(&record)) {
744 return false;
745 }
746 }
747 // Dump process name.
748 std::string name;
749 if (GetThreadName(pid, &name)) {
750 CommRecord record(attr, pid, pid, name, event_id, 0);
751 if (!ProcessRecord(&record)) {
752 return false;
753 }
754 }
755 // Dump thread info.
756 std::vector<pid_t> threads = GetThreadsInProcess(pid);
757 for (const auto& tid : threads) {
758 if (tid == pid) {
759 continue;
760 }
761 if (all_threads || dump_threads.find(tid) != dump_threads.end()) {
762 ForkRecord fork_record(attr, pid, tid, pid, pid, event_id);
763 if (!ProcessRecord(&fork_record)) {
764 return false;
765 }
766 if (GetThreadName(tid, &name)) {
767 CommRecord comm_record(attr, pid, tid, name, event_id, 0);
768 if (!ProcessRecord(&comm_record)) {
769 return false;
770 }
771 }
772 }
773 }
774 }
775 return true;
776 }
777
ProcessRecord(Record * record)778 bool RecordCommand::ProcessRecord(Record* record) {
779 if (system_wide_collection_ && record->type() == PERF_RECORD_SAMPLE) {
780 auto& r = *static_cast<SampleRecord*>(record);
781 // Omit samples get before start sampling time.
782 if (r.time_data.time < start_sampling_time_in_ns_) {
783 return true;
784 }
785 }
786 UpdateRecordForEmbeddedElfPath(record);
787 if (unwind_dwarf_callchain_ && !post_unwind_) {
788 thread_tree_.Update(*record);
789 if (!UnwindRecord(record)) {
790 return false;
791 }
792 }
793 if (record->type() == PERF_RECORD_SAMPLE) {
794 sample_record_count_++;
795 } else if (record->type() == PERF_RECORD_LOST) {
796 lost_record_count_ += static_cast<LostRecord*>(record)->lost;
797 }
798 bool result = record_file_writer_->WriteRecord(*record);
799 return result;
800 }
801
802 template <class RecordType>
UpdateMmapRecordForEmbeddedElfPath(RecordType * record)803 void UpdateMmapRecordForEmbeddedElfPath(RecordType* record) {
804 RecordType& r = *record;
805 if (!r.InKernel() && r.data->pgoff != 0) {
806 // For the case of a shared library "foobar.so" embedded
807 // inside an APK, we rewrite the original MMAP from
808 // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W]
809 // so as to make the library name explicit. This update is
810 // done here (as part of the record operation) as opposed to
811 // on the host during the report, since we want to report
812 // the correct library name even if the the APK in question
813 // is not present on the host. The new offset W is
814 // calculated to be with respect to the start of foobar.so,
815 // not to the start of path.apk.
816 EmbeddedElf* ee =
817 ApkInspector::FindElfInApkByOffset(r.filename, r.data->pgoff);
818 if (ee != nullptr) {
819 // Compute new offset relative to start of elf in APK.
820 auto data = *r.data;
821 data.pgoff -= ee->entry_offset();
822 r.SetDataAndFilename(data, GetUrlInApk(r.filename, ee->entry_name()));
823 }
824 }
825 }
826
UpdateRecordForEmbeddedElfPath(Record * record)827 void RecordCommand::UpdateRecordForEmbeddedElfPath(Record* record) {
828 if (record->type() == PERF_RECORD_MMAP) {
829 UpdateMmapRecordForEmbeddedElfPath(static_cast<MmapRecord*>(record));
830 } else if (record->type() == PERF_RECORD_MMAP2) {
831 UpdateMmapRecordForEmbeddedElfPath(static_cast<Mmap2Record*>(record));
832 }
833 }
834
UnwindRecord(Record * record)835 bool RecordCommand::UnwindRecord(Record* record) {
836 if (record->type() == PERF_RECORD_SAMPLE) {
837 SampleRecord& r = *static_cast<SampleRecord*>(record);
838 if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) &&
839 (r.sample_type & PERF_SAMPLE_REGS_USER) &&
840 (r.regs_user_data.reg_mask != 0) &&
841 (r.sample_type & PERF_SAMPLE_STACK_USER) &&
842 (r.GetValidStackSize() > 0)) {
843 ThreadEntry* thread =
844 thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
845 RegSet regs = CreateRegSet(r.regs_user_data.abi,
846 r.regs_user_data.reg_mask,
847 r.regs_user_data.regs);
848 // Normally do strict arch check when unwinding stack. But allow unwinding
849 // 32-bit processes on 64-bit devices for system wide profiling.
850 bool strict_arch_check = !system_wide_collection_;
851 std::vector<uint64_t> unwind_ips =
852 UnwindCallChain(r.regs_user_data.abi, *thread, regs,
853 r.stack_user_data.data,
854 r.GetValidStackSize(), strict_arch_check);
855 r.ReplaceRegAndStackWithCallChain(unwind_ips);
856 }
857 }
858 return true;
859 }
860
PostUnwind(const std::vector<std::string> & args)861 bool RecordCommand::PostUnwind(const std::vector<std::string>& args) {
862 thread_tree_.ClearThreadAndMap();
863 std::unique_ptr<RecordFileReader> reader =
864 RecordFileReader::CreateInstance(record_filename_);
865 if (reader == nullptr) {
866 return false;
867 }
868 std::string tmp_filename = record_filename_ + ".tmp";
869 record_file_writer_ = CreateRecordFile(tmp_filename);
870 if (record_file_writer_ == nullptr) {
871 return false;
872 }
873 bool result = reader->ReadDataSection(
874 [this](std::unique_ptr<Record> record) {
875 thread_tree_.Update(*record);
876 if (!UnwindRecord(record.get())) {
877 return false;
878 }
879 return record_file_writer_->WriteRecord(*record);
880 },
881 false);
882 if (!result) {
883 return false;
884 }
885 if (!DumpAdditionalFeatures(args)) {
886 return false;
887 }
888 if (!record_file_writer_->Close()) {
889 return false;
890 }
891
892 if (unlink(record_filename_.c_str()) != 0) {
893 PLOG(ERROR) << "failed to remove " << record_filename_;
894 return false;
895 }
896 if (rename(tmp_filename.c_str(), record_filename_.c_str()) != 0) {
897 PLOG(ERROR) << "failed to rename " << tmp_filename << " to "
898 << record_filename_;
899 return false;
900 }
901 return true;
902 }
903
DumpAdditionalFeatures(const std::vector<std::string> & args)904 bool RecordCommand::DumpAdditionalFeatures(
905 const std::vector<std::string>& args) {
906 // Read data section of perf.data to collect hit file information.
907 thread_tree_.ClearThreadAndMap();
908 Dso::ReadKernelSymbolsFromProc();
909 auto callback = [&](const Record* r) {
910 thread_tree_.Update(*r);
911 if (r->type() == PERF_RECORD_SAMPLE) {
912 CollectHitFileInfo(*reinterpret_cast<const SampleRecord*>(r));
913 }
914 };
915 if (!record_file_writer_->ReadDataSection(callback)) {
916 return false;
917 }
918
919 size_t feature_count = 4;
920 if (branch_sampling_) {
921 feature_count++;
922 }
923 if (dump_symbols_) {
924 feature_count++;
925 }
926 if (!record_file_writer_->BeginWriteFeatures(feature_count)) {
927 return false;
928 }
929 if (!DumpBuildIdFeature()) {
930 return false;
931 }
932 if (dump_symbols_ && !DumpFileFeature()) {
933 return false;
934 }
935 utsname uname_buf;
936 if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) {
937 PLOG(ERROR) << "uname() failed";
938 return false;
939 }
940 if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE,
941 uname_buf.release)) {
942 return false;
943 }
944 if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH,
945 uname_buf.machine)) {
946 return false;
947 }
948
949 std::string exec_path = android::base::GetExecutablePath();
950 if (exec_path.empty()) exec_path = "simpleperf";
951 std::vector<std::string> cmdline;
952 cmdline.push_back(exec_path);
953 cmdline.push_back("record");
954 cmdline.insert(cmdline.end(), args.begin(), args.end());
955 if (!record_file_writer_->WriteCmdlineFeature(cmdline)) {
956 return false;
957 }
958 if (branch_sampling_ != 0 &&
959 !record_file_writer_->WriteBranchStackFeature()) {
960 return false;
961 }
962 if (!record_file_writer_->EndWriteFeatures()) {
963 return false;
964 }
965 return true;
966 }
967
DumpBuildIdFeature()968 bool RecordCommand::DumpBuildIdFeature() {
969 std::vector<BuildIdRecord> build_id_records;
970 BuildId build_id;
971 std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
972 for (Dso* dso : dso_v) {
973 if (!dso->HasDumpId()) {
974 continue;
975 }
976 if (dso->type() == DSO_KERNEL) {
977 if (!GetKernelBuildId(&build_id)) {
978 continue;
979 }
980 build_id_records.push_back(
981 BuildIdRecord(true, UINT_MAX, build_id, dso->Path()));
982 } else if (dso->type() == DSO_KERNEL_MODULE) {
983 std::string path = dso->Path();
984 std::string module_name = basename(&path[0]);
985 if (android::base::EndsWith(module_name, ".ko")) {
986 module_name = module_name.substr(0, module_name.size() - 3);
987 }
988 if (!GetModuleBuildId(module_name, &build_id)) {
989 LOG(DEBUG) << "can't read build_id for module " << module_name;
990 continue;
991 }
992 build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, path));
993 } else {
994 if (dso->Path() == DEFAULT_EXECNAME_FOR_THREAD_MMAP) {
995 continue;
996 }
997 auto tuple = SplitUrlInApk(dso->Path());
998 if (std::get<0>(tuple)) {
999 ElfStatus result = GetBuildIdFromApkFile(std::get<1>(tuple),
1000 std::get<2>(tuple), &build_id);
1001 if (result != ElfStatus::NO_ERROR) {
1002 LOG(DEBUG) << "can't read build_id from file " << dso->Path() << ": "
1003 << result;
1004 continue;
1005 }
1006 } else {
1007 ElfStatus result = GetBuildIdFromElfFile(dso->Path(), &build_id);
1008 if (result != ElfStatus::NO_ERROR) {
1009 LOG(DEBUG) << "can't read build_id from file " << dso->Path() << ": "
1010 << result;
1011 continue;
1012 }
1013 }
1014 build_id_records.push_back(
1015 BuildIdRecord(false, UINT_MAX, build_id, dso->Path()));
1016 }
1017 }
1018 if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) {
1019 return false;
1020 }
1021 return true;
1022 }
1023
DumpFileFeature()1024 bool RecordCommand::DumpFileFeature() {
1025 std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
1026 for (Dso* dso : dso_v) {
1027 if (!dso->HasDumpId()) {
1028 continue;
1029 }
1030 uint32_t dso_type = dso->type();
1031 uint64_t min_vaddr = dso->MinVirtualAddress();
1032
1033 // Dumping all symbols in hit files takes too much space, so only dump
1034 // needed symbols.
1035 const std::vector<Symbol>& symbols = dso->GetSymbols();
1036 std::vector<const Symbol*> dump_symbols;
1037 for (const auto& sym : symbols) {
1038 if (sym.HasDumpId()) {
1039 dump_symbols.push_back(&sym);
1040 }
1041 }
1042 std::sort(dump_symbols.begin(), dump_symbols.end(), Symbol::CompareByAddr);
1043
1044 if (!record_file_writer_->WriteFileFeature(dso->Path(), dso_type, min_vaddr,
1045 dump_symbols)) {
1046 return false;
1047 }
1048 }
1049 return true;
1050 }
1051
CollectHitFileInfo(const SampleRecord & r)1052 void RecordCommand::CollectHitFileInfo(const SampleRecord& r) {
1053 const ThreadEntry* thread =
1054 thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
1055 const MapEntry* map =
1056 thread_tree_.FindMap(thread, r.ip_data.ip, r.InKernel());
1057 Dso* dso = map->dso;
1058 const Symbol* symbol;
1059 if (dump_symbols_) {
1060 symbol = thread_tree_.FindSymbol(map, r.ip_data.ip, nullptr, &dso);
1061 if (!symbol->HasDumpId()) {
1062 dso->CreateSymbolDumpId(symbol);
1063 }
1064 }
1065 if (!dso->HasDumpId()) {
1066 dso->CreateDumpId();
1067 }
1068 if (r.sample_type & PERF_SAMPLE_CALLCHAIN) {
1069 bool in_kernel = r.InKernel();
1070 bool first_ip = true;
1071 for (uint64_t i = 0; i < r.callchain_data.ip_nr; ++i) {
1072 uint64_t ip = r.callchain_data.ips[i];
1073 if (ip >= PERF_CONTEXT_MAX) {
1074 switch (ip) {
1075 case PERF_CONTEXT_KERNEL:
1076 in_kernel = true;
1077 break;
1078 case PERF_CONTEXT_USER:
1079 in_kernel = false;
1080 break;
1081 default:
1082 LOG(DEBUG) << "Unexpected perf_context in callchain: " << std::hex
1083 << ip;
1084 }
1085 } else {
1086 if (first_ip) {
1087 first_ip = false;
1088 // Remove duplication with sample ip.
1089 if (ip == r.ip_data.ip) {
1090 continue;
1091 }
1092 }
1093 map = thread_tree_.FindMap(thread, ip, in_kernel);
1094 dso = map->dso;
1095 if (dump_symbols_) {
1096 symbol = thread_tree_.FindSymbol(map, ip, nullptr, &dso);
1097 if (!symbol->HasDumpId()) {
1098 dso->CreateSymbolDumpId(symbol);
1099 }
1100 }
1101 if (!dso->HasDumpId()) {
1102 dso->CreateDumpId();
1103 }
1104 }
1105 }
1106 }
1107 }
1108
RegisterRecordCommand()1109 void RegisterRecordCommand() {
1110 RegisterCommand("record",
1111 [] { return std::unique_ptr<Command>(new RecordCommand()); });
1112 }
1113