• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <libgen.h>
18 #include <signal.h>
19 #include <sys/prctl.h>
20 #include <sys/utsname.h>
21 #include <unistd.h>
22 #include <set>
23 #include <string>
24 #include <unordered_map>
25 #include <vector>
26 
27 #include <android-base/logging.h>
28 #include <android-base/file.h>
29 #include <android-base/parsedouble.h>
30 #include <android-base/parseint.h>
31 #include <android-base/strings.h>
32 
33 #include "command.h"
34 #include "dwarf_unwind.h"
35 #include "environment.h"
36 #include "event_selection_set.h"
37 #include "event_type.h"
38 #include "IOEventLoop.h"
39 #include "perf_clock.h"
40 #include "read_apk.h"
41 #include "read_elf.h"
42 #include "record.h"
43 #include "record_file.h"
44 #include "thread_tree.h"
45 #include "tracing.h"
46 #include "utils.h"
47 #include "workload.h"
48 
49 static std::string default_measured_event_type = "cpu-cycles";
50 
51 static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = {
52     {"u", PERF_SAMPLE_BRANCH_USER},
53     {"k", PERF_SAMPLE_BRANCH_KERNEL},
54     {"any", PERF_SAMPLE_BRANCH_ANY},
55     {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL},
56     {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN},
57     {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL},
58 };
59 
60 // The max size of records dumped by kernel is 65535, and dump stack size
61 // should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528.
62 constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;
63 
64 // The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK).
65 // Here 1024 is a desired value for pages in mapped buffer. If mapped
66 // successfully, the buffer size = 1024 * 4K (page size) = 4M.
67 constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;
68 
69 class RecordCommand : public Command {
70  public:
RecordCommand()71   RecordCommand()
72       : Command(
73             "record", "record sampling info in perf.data",
74             // clang-format off
75 "Usage: simpleperf record [options] [command [command-args]]\n"
76 "       Gather sampling information of running [command]. And -a/-p/-t option\n"
77 "       can be used to change target of sampling information.\n"
78 "-a     System-wide collection.\n"
79 "-b     Enable take branch stack sampling. Same as '-j any'\n"
80 "-c count     Set event sample period. It means recording one sample when\n"
81 "             [count] events happen. Can't be used with -f/-F option.\n"
82 "             For tracepoint events, the default option is -c 1.\n"
83 "--call-graph fp | dwarf[,<dump_stack_size>]\n"
84 "             Enable call graph recording. Use frame pointer or dwarf debug\n"
85 "             frame as the method to parse call graph in stack.\n"
86 "             Default is dwarf,65528.\n"
87 "--cpu cpu_item1,cpu_item2,...\n"
88 "             Collect samples only on the selected cpus. cpu_item can be cpu\n"
89 "             number like 1, or cpu range like 0-3.\n"
90 "--dump-symbols  Dump symbols in perf.data. By default perf.data doesn't contain\n"
91 "                symbol information for samples. This option is used when there\n"
92 "                is no symbol information in report environment.\n"
93 "--duration time_in_sec  Monitor for time_in_sec seconds instead of running\n"
94 "                        [command]. Here time_in_sec may be any positive\n"
95 "                        floating point number.\n"
96 "-e event1[:modifier1],event2[:modifier2],...\n"
97 "             Select the event list to sample. Use `simpleperf list` to find\n"
98 "             all possible event names. Modifiers can be added to define how\n"
99 "             the event should be monitored.\n"
100 "             Possible modifiers are:\n"
101 "                u - monitor user space events only\n"
102 "                k - monitor kernel space events only\n"
103 "-f freq      Set event sample frequency. It means recording at most [freq]\n"
104 "             samples every second. For non-tracepoint events, the default\n"
105 "             option is -f 4000.\n"
106 "-F freq      Same as '-f freq'.\n"
107 "-g           Same as '--call-graph dwarf'.\n"
108 "--group event1[:modifier],event2[:modifier2],...\n"
109 "             Similar to -e option. But events specified in the same --group\n"
110 "             option are monitored as a group, and scheduled in and out at the\n"
111 "             same time.\n"
112 "-j branch_filter1,branch_filter2,...\n"
113 "             Enable taken branch stack sampling. Each sample captures a series\n"
114 "             of consecutive taken branches.\n"
115 "             The following filters are defined:\n"
116 "                any: any type of branch\n"
117 "                any_call: any function call or system call\n"
118 "                any_ret: any function return or system call return\n"
119 "                ind_call: any indirect branch\n"
120 "                u: only when the branch target is at the user level\n"
121 "                k: only when the branch target is in the kernel\n"
122 "             This option requires at least one branch type among any, any_call,\n"
123 "             any_ret, ind_call.\n"
124 "-m mmap_pages   Set the size of the buffer used to receiving sample data from\n"
125 "                the kernel. It should be a power of 2. If not set, the max\n"
126 "                possible value <= 1024 will be used.\n"
127 "--no-dump-kernel-symbols  Don't dump kernel symbols in perf.data. By default\n"
128 "                          kernel symbols will be dumped when needed.\n"
129 "--no-inherit  Don't record created child threads/processes.\n"
130 "--no-unwind   If `--call-graph dwarf` option is used, then the user's stack\n"
131 "              will be unwound by default. Use this option to disable the\n"
132 "              unwinding of the user's stack.\n"
133 "-o record_file_name    Set record file name, default is perf.data.\n"
134 "-p pid1,pid2,...       Record events on existing processes. Mutually exclusive\n"
135 "                       with -a.\n"
136 "--post-unwind  If `--call-graph dwarf` option is used, then the user's stack\n"
137 "               will be unwound while recording by default. But it may lose\n"
138 "               records as stacking unwinding can be time consuming. Use this\n"
139 "               option to unwind the user's stack after recording.\n"
140 "--symfs <dir>    Look for files with symbols relative to this directory.\n"
141 "                 This option is used to provide files with symbol table and\n"
142 "                 debug information, which are used by --dump-symbols and -g.\n"
143 "-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n"
144             // clang-format on
145             ),
146         use_sample_freq_(false),
147         sample_freq_(0),
148         use_sample_period_(false),
149         sample_period_(0),
150         system_wide_collection_(false),
151         branch_sampling_(0),
152         fp_callchain_sampling_(false),
153         dwarf_callchain_sampling_(false),
154         dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE),
155         unwind_dwarf_callchain_(true),
156         post_unwind_(false),
157         child_inherit_(true),
158         duration_in_sec_(0),
159         can_dump_kernel_symbols_(true),
160         dump_symbols_(false),
161         event_selection_set_(false),
162         mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)),
163         record_filename_("perf.data"),
164         start_sampling_time_in_ns_(0),
165         sample_record_count_(0),
166         lost_record_count_(0) {
167     // Stop profiling if parent exits.
168     prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
169   }
170 
171   bool Run(const std::vector<std::string>& args);
172 
173  private:
174   bool ParseOptions(const std::vector<std::string>& args,
175                     std::vector<std::string>* non_option_args);
176   bool SetEventSelectionFlags();
177   bool CreateAndInitRecordFile();
178   std::unique_ptr<RecordFileWriter> CreateRecordFile(
179       const std::string& filename);
180   bool DumpKernelSymbol();
181   bool DumpTracingData();
182   bool DumpKernelAndModuleMmaps(const perf_event_attr& attr, uint64_t event_id);
183   bool DumpThreadCommAndMmaps(const perf_event_attr& attr, uint64_t event_id);
184   bool ProcessRecord(Record* record);
185   void UpdateRecordForEmbeddedElfPath(Record* record);
186   bool UnwindRecord(Record* record);
187   bool PostUnwind(const std::vector<std::string>& args);
188   bool DumpAdditionalFeatures(const std::vector<std::string>& args);
189   bool DumpBuildIdFeature();
190   bool DumpFileFeature();
191   void CollectHitFileInfo(const SampleRecord& r);
192 
193   bool use_sample_freq_;
194   uint64_t sample_freq_;  // Sample 'sample_freq_' times per second.
195   bool use_sample_period_;
196   uint64_t sample_period_;  // Sample once when 'sample_period_' events occur.
197 
198   bool system_wide_collection_;
199   uint64_t branch_sampling_;
200   bool fp_callchain_sampling_;
201   bool dwarf_callchain_sampling_;
202   uint32_t dump_stack_size_in_dwarf_sampling_;
203   bool unwind_dwarf_callchain_;
204   bool post_unwind_;
205   bool child_inherit_;
206   double duration_in_sec_;
207   bool can_dump_kernel_symbols_;
208   bool dump_symbols_;
209   std::vector<int> cpus_;
210   EventSelectionSet event_selection_set_;
211 
212   std::pair<size_t, size_t> mmap_page_range_;
213 
214   ThreadTree thread_tree_;
215   std::string record_filename_;
216   std::unique_ptr<RecordFileWriter> record_file_writer_;
217 
218   uint64_t start_sampling_time_in_ns_;  // nanoseconds from machine starting
219 
220   uint64_t sample_record_count_;
221   uint64_t lost_record_count_;
222 };
223 
Run(const std::vector<std::string> & args)224 bool RecordCommand::Run(const std::vector<std::string>& args) {
225   if (!CheckPerfEventLimit()) {
226     return false;
227   }
228   if (!InitPerfClock()) {
229     return false;
230   }
231 
232   // 1. Parse options, and use default measured event type if not given.
233   std::vector<std::string> workload_args;
234   if (!ParseOptions(args, &workload_args)) {
235     return false;
236   }
237   if (event_selection_set_.empty()) {
238     if (!event_selection_set_.AddEventType(default_measured_event_type)) {
239       return false;
240     }
241   }
242   if (!SetEventSelectionFlags()) {
243     return false;
244   }
245   ScopedCurrentArch scoped_arch(GetMachineArch());
246 
247   // 2. Create workload.
248   std::unique_ptr<Workload> workload;
249   if (!workload_args.empty()) {
250     workload = Workload::CreateWorkload(workload_args);
251     if (workload == nullptr) {
252       return false;
253     }
254   }
255   bool need_to_check_targets = false;
256   if (system_wide_collection_) {
257     event_selection_set_.AddMonitoredThreads({-1});
258   } else if (!event_selection_set_.HasMonitoredTarget()) {
259     if (workload != nullptr) {
260       event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
261       event_selection_set_.SetEnableOnExec(true);
262       if (event_selection_set_.HasInplaceSampler()) {
263         // Start worker early, because the worker process has to setup inplace-sampler server
264         // before we try to connect it.
265         if (!workload->Start()) {
266           return false;
267         }
268       }
269     } else {
270       LOG(ERROR)
271           << "No threads to monitor. Try `simpleperf help record` for help";
272       return false;
273     }
274   } else {
275     need_to_check_targets = true;
276   }
277 
278   // 3. Open perf_event_files, create mapped buffers for perf_event_files.
279   if (!event_selection_set_.OpenEventFiles(cpus_)) {
280     return false;
281   }
282   if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first,
283                                            mmap_page_range_.second)) {
284     return false;
285   }
286 
287   // 4. Create perf.data.
288   if (!CreateAndInitRecordFile()) {
289     return false;
290   }
291 
292   // 5. Add read/signal/periodic Events.
293   auto callback =
294       std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1);
295   if (!event_selection_set_.PrepareToReadMmapEventData(callback)) {
296     return false;
297   }
298   if (!event_selection_set_.HandleCpuHotplugEvents(cpus_)) {
299     return false;
300   }
301   if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
302     return false;
303   }
304   IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
305   if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP},
306                              [&]() { return loop->ExitLoop(); })) {
307     return false;
308   }
309   if (duration_in_sec_ != 0) {
310     if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
311                                 [&]() { return loop->ExitLoop(); })) {
312       return false;
313     }
314   }
315 
316   // 6. Write records in mapped buffers of perf_event_files to output file while
317   //    workload is running.
318   start_sampling_time_in_ns_ = GetPerfClock();
319   LOG(VERBOSE) << "start_sampling_time is " << start_sampling_time_in_ns_
320                << " ns";
321   if (workload != nullptr && !workload->IsStarted() && !workload->Start()) {
322     return false;
323   }
324   if (!loop->RunLoop()) {
325     return false;
326   }
327   if (!event_selection_set_.FinishReadMmapEventData()) {
328     return false;
329   }
330 
331   // 7. Dump additional features, and close record file.
332   if (!DumpAdditionalFeatures(args)) {
333     return false;
334   }
335   if (!record_file_writer_->Close()) {
336     return false;
337   }
338 
339   // 8. Unwind dwarf callchain.
340   if (post_unwind_) {
341     if (!PostUnwind(args)) {
342       return false;
343     }
344   }
345 
346   // 9. Show brief record result.
347   LOG(INFO) << "Samples recorded: " << sample_record_count_
348             << ". Samples lost: " << lost_record_count_ << ".";
349   if (sample_record_count_ + lost_record_count_ != 0) {
350     double lost_percent = static_cast<double>(lost_record_count_) /
351                           (lost_record_count_ + sample_record_count_);
352     constexpr double LOST_PERCENT_WARNING_BAR = 0.1;
353     if (lost_percent >= LOST_PERCENT_WARNING_BAR) {
354       LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, "
355                    << "consider increasing mmap_pages(-m), "
356                    << "or decreasing sample frequency(-f), "
357                    << "or increasing sample period(-c).";
358     }
359   }
360   return true;
361 }
362 
ParseOptions(const std::vector<std::string> & args,std::vector<std::string> * non_option_args)363 bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
364                                  std::vector<std::string>* non_option_args) {
365   size_t i;
366   for (i = 0; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) {
367     if (args[i] == "-a") {
368       system_wide_collection_ = true;
369     } else if (args[i] == "-b") {
370       branch_sampling_ = branch_sampling_type_map["any"];
371     } else if (args[i] == "-c") {
372       if (!NextArgumentOrError(args, &i)) {
373         return false;
374       }
375       char* endptr;
376       sample_period_ = strtoull(args[i].c_str(), &endptr, 0);
377       if (*endptr != '\0' || sample_period_ == 0) {
378         LOG(ERROR) << "Invalid sample period: '" << args[i] << "'";
379         return false;
380       }
381       use_sample_period_ = true;
382     } else if (args[i] == "--call-graph") {
383       if (!NextArgumentOrError(args, &i)) {
384         return false;
385       }
386       std::vector<std::string> strs = android::base::Split(args[i], ",");
387       if (strs[0] == "fp") {
388         fp_callchain_sampling_ = true;
389         dwarf_callchain_sampling_ = false;
390       } else if (strs[0] == "dwarf") {
391         fp_callchain_sampling_ = false;
392         dwarf_callchain_sampling_ = true;
393         if (strs.size() > 1) {
394           char* endptr;
395           uint64_t size = strtoull(strs[1].c_str(), &endptr, 0);
396           if (*endptr != '\0' || size > UINT_MAX) {
397             LOG(ERROR) << "invalid dump stack size in --call-graph option: "
398                        << strs[1];
399             return false;
400           }
401           if ((size & 7) != 0) {
402             LOG(ERROR) << "dump stack size " << size
403                        << " is not 8-byte aligned.";
404             return false;
405           }
406           if (size >= MAX_DUMP_STACK_SIZE) {
407             LOG(ERROR) << "dump stack size " << size
408                        << " is bigger than max allowed size "
409                        << MAX_DUMP_STACK_SIZE << ".";
410             return false;
411           }
412           dump_stack_size_in_dwarf_sampling_ = static_cast<uint32_t>(size);
413         }
414       } else {
415         LOG(ERROR) << "unexpected argument for --call-graph option: "
416                    << args[i];
417         return false;
418       }
419     } else if (args[i] == "--cpu") {
420       if (!NextArgumentOrError(args, &i)) {
421         return false;
422       }
423       cpus_ = GetCpusFromString(args[i]);
424     } else if (args[i] == "--dump-symbols") {
425       dump_symbols_ = true;
426     } else if (args[i] == "--duration") {
427       if (!NextArgumentOrError(args, &i)) {
428         return false;
429       }
430       if (!android::base::ParseDouble(args[i].c_str(), &duration_in_sec_,
431                                       1e-9)) {
432         LOG(ERROR) << "Invalid duration: " << args[i].c_str();
433         return false;
434       }
435     } else if (args[i] == "-e") {
436       if (!NextArgumentOrError(args, &i)) {
437         return false;
438       }
439       std::vector<std::string> event_types = android::base::Split(args[i], ",");
440       for (auto& event_type : event_types) {
441         if (!event_selection_set_.AddEventType(event_type)) {
442           return false;
443         }
444       }
445     } else if (args[i] == "-f" || args[i] == "-F") {
446       if (!NextArgumentOrError(args, &i)) {
447         return false;
448       }
449       if (!android::base::ParseUint(args[i].c_str(), &sample_freq_)) {
450         LOG(ERROR) << "Invalid sample frequency: " << args[i];
451         return false;
452       }
453       if (!CheckSampleFrequency(sample_freq_)) {
454         return false;
455       }
456       use_sample_freq_ = true;
457     } else if (args[i] == "-g") {
458       fp_callchain_sampling_ = false;
459       dwarf_callchain_sampling_ = true;
460     } else if (args[i] == "--group") {
461       if (!NextArgumentOrError(args, &i)) {
462         return false;
463       }
464       std::vector<std::string> event_types = android::base::Split(args[i], ",");
465       if (!event_selection_set_.AddEventGroup(event_types)) {
466         return false;
467       }
468     } else if (args[i] == "-j") {
469       if (!NextArgumentOrError(args, &i)) {
470         return false;
471       }
472       std::vector<std::string> branch_sampling_types =
473           android::base::Split(args[i], ",");
474       for (auto& type : branch_sampling_types) {
475         auto it = branch_sampling_type_map.find(type);
476         if (it == branch_sampling_type_map.end()) {
477           LOG(ERROR) << "unrecognized branch sampling filter: " << type;
478           return false;
479         }
480         branch_sampling_ |= it->second;
481       }
482     } else if (args[i] == "-m") {
483       if (!NextArgumentOrError(args, &i)) {
484         return false;
485       }
486       char* endptr;
487       uint64_t pages = strtoull(args[i].c_str(), &endptr, 0);
488       if (*endptr != '\0' || !IsPowerOfTwo(pages)) {
489         LOG(ERROR) << "Invalid mmap_pages: '" << args[i] << "'";
490         return false;
491       }
492       mmap_page_range_.first = mmap_page_range_.second = pages;
493     } else if (args[i] == "--no-dump-kernel-symbols") {
494       can_dump_kernel_symbols_ = false;
495     } else if (args[i] == "--no-inherit") {
496       child_inherit_ = false;
497     } else if (args[i] == "--no-unwind") {
498       unwind_dwarf_callchain_ = false;
499     } else if (args[i] == "-o") {
500       if (!NextArgumentOrError(args, &i)) {
501         return false;
502       }
503       record_filename_ = args[i];
504     } else if (args[i] == "-p") {
505       if (!NextArgumentOrError(args, &i)) {
506         return false;
507       }
508       std::set<pid_t> pids;
509       if (!GetValidThreadsFromThreadString(args[i], &pids)) {
510         return false;
511       }
512       event_selection_set_.AddMonitoredProcesses(pids);
513     } else if (args[i] == "--post-unwind") {
514       post_unwind_ = true;
515     } else if (args[i] == "--symfs") {
516       if (!NextArgumentOrError(args, &i)) {
517         return false;
518       }
519       if (!Dso::SetSymFsDir(args[i])) {
520         return false;
521       }
522     } else if (args[i] == "-t") {
523       if (!NextArgumentOrError(args, &i)) {
524         return false;
525       }
526       std::set<pid_t> tids;
527       if (!GetValidThreadsFromThreadString(args[i], &tids)) {
528         return false;
529       }
530       event_selection_set_.AddMonitoredThreads(tids);
531     } else {
532       ReportUnknownOption(args, i);
533       return false;
534     }
535   }
536 
537   if (use_sample_freq_ && use_sample_period_) {
538     LOG(ERROR) << "-f option can't be used with -c option.";
539     return false;
540   }
541 
542   if (!dwarf_callchain_sampling_) {
543     if (!unwind_dwarf_callchain_) {
544       LOG(ERROR)
545           << "--no-unwind is only used with `--call-graph dwarf` option.";
546       return false;
547     }
548     unwind_dwarf_callchain_ = false;
549   }
550   if (post_unwind_) {
551     if (!dwarf_callchain_sampling_) {
552       LOG(ERROR)
553           << "--post-unwind is only used with `--call-graph dwarf` option.";
554       return false;
555     }
556     if (!unwind_dwarf_callchain_) {
557       LOG(ERROR) << "--post-unwind can't be used with `--no-unwind` option.";
558       return false;
559     }
560   }
561 
562   if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
563     LOG(ERROR) << "Record system wide and existing processes/threads can't be "
564                   "used at the same time.";
565     return false;
566   }
567 
568   if (system_wide_collection_ && !IsRoot()) {
569     LOG(ERROR) << "System wide profiling needs root privilege.";
570     return false;
571   }
572 
573   if (dump_symbols_ && can_dump_kernel_symbols_) {
574     // No need to dump kernel symbols as we will dump all required symbols.
575     can_dump_kernel_symbols_ = false;
576   }
577 
578   non_option_args->clear();
579   for (; i < args.size(); ++i) {
580     non_option_args->push_back(args[i]);
581   }
582   return true;
583 }
584 
SetEventSelectionFlags()585 bool RecordCommand::SetEventSelectionFlags() {
586   if (use_sample_freq_) {
587     event_selection_set_.SetSampleFreq(sample_freq_);
588   } else if (use_sample_period_) {
589     event_selection_set_.SetSamplePeriod(sample_period_);
590   } else {
591     event_selection_set_.UseDefaultSampleFreq();
592   }
593   event_selection_set_.SampleIdAll();
594   if (!event_selection_set_.SetBranchSampling(branch_sampling_)) {
595     return false;
596   }
597   if (fp_callchain_sampling_) {
598     event_selection_set_.EnableFpCallChainSampling();
599   } else if (dwarf_callchain_sampling_) {
600     if (!event_selection_set_.EnableDwarfCallChainSampling(
601             dump_stack_size_in_dwarf_sampling_)) {
602       return false;
603     }
604   }
605   event_selection_set_.SetInherit(child_inherit_);
606   return true;
607 }
608 
CreateAndInitRecordFile()609 bool RecordCommand::CreateAndInitRecordFile() {
610   record_file_writer_ = CreateRecordFile(record_filename_);
611   if (record_file_writer_ == nullptr) {
612     return false;
613   }
614   // Use first perf_event_attr and first event id to dump mmap and comm records.
615   EventAttrWithId attr_id = event_selection_set_.GetEventAttrWithId()[0];
616   if (!DumpKernelSymbol()) {
617     return false;
618   }
619   if (!DumpTracingData()) {
620     return false;
621   }
622   if (!DumpKernelAndModuleMmaps(*attr_id.attr, attr_id.ids[0])) {
623     return false;
624   }
625   if (!DumpThreadCommAndMmaps(*attr_id.attr, attr_id.ids[0])) {
626     return false;
627   }
628   return true;
629 }
630 
CreateRecordFile(const std::string & filename)631 std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile(
632     const std::string& filename) {
633   std::unique_ptr<RecordFileWriter> writer =
634       RecordFileWriter::CreateInstance(filename);
635   if (writer == nullptr) {
636     return nullptr;
637   }
638 
639   if (!writer->WriteAttrSection(event_selection_set_.GetEventAttrWithId())) {
640     return nullptr;
641   }
642   return writer;
643 }
644 
DumpKernelSymbol()645 bool RecordCommand::DumpKernelSymbol() {
646   if (can_dump_kernel_symbols_) {
647     std::string kallsyms;
648     if (event_selection_set_.NeedKernelSymbol() &&
649         CheckKernelSymbolAddresses()) {
650       if (!android::base::ReadFileToString("/proc/kallsyms", &kallsyms)) {
651         PLOG(ERROR) << "failed to read /proc/kallsyms";
652         return false;
653       }
654       KernelSymbolRecord r(kallsyms);
655       if (!ProcessRecord(&r)) {
656         return false;
657       }
658     }
659   }
660   return true;
661 }
662 
DumpTracingData()663 bool RecordCommand::DumpTracingData() {
664   std::vector<const EventType*> tracepoint_event_types =
665       event_selection_set_.GetTracepointEvents();
666   if (tracepoint_event_types.empty()) {
667     return true;  // No need to dump tracing data.
668   }
669   std::vector<char> tracing_data;
670   if (!GetTracingData(tracepoint_event_types, &tracing_data)) {
671     return false;
672   }
673   TracingDataRecord record(tracing_data);
674   if (!ProcessRecord(&record)) {
675     return false;
676   }
677   return true;
678 }
679 
DumpKernelAndModuleMmaps(const perf_event_attr & attr,uint64_t event_id)680 bool RecordCommand::DumpKernelAndModuleMmaps(const perf_event_attr& attr,
681                                              uint64_t event_id) {
682   KernelMmap kernel_mmap;
683   std::vector<KernelMmap> module_mmaps;
684   GetKernelAndModuleMmaps(&kernel_mmap, &module_mmaps);
685 
686   MmapRecord mmap_record(attr, true, UINT_MAX, 0, kernel_mmap.start_addr,
687                          kernel_mmap.len, 0, kernel_mmap.filepath, event_id);
688   if (!ProcessRecord(&mmap_record)) {
689     return false;
690   }
691   for (auto& module_mmap : module_mmaps) {
692     MmapRecord mmap_record(attr, true, UINT_MAX, 0, module_mmap.start_addr,
693                            module_mmap.len, 0, module_mmap.filepath, event_id);
694     if (!ProcessRecord(&mmap_record)) {
695       return false;
696     }
697   }
698   return true;
699 }
700 
DumpThreadCommAndMmaps(const perf_event_attr & attr,uint64_t event_id)701 bool RecordCommand::DumpThreadCommAndMmaps(const perf_event_attr& attr,
702                                            uint64_t event_id) {
703   // Decide which processes and threads to dump.
704   // For system_wide profiling, dump all threads.
705   // For non system wide profiling, build dump_threads.
706   bool all_threads = system_wide_collection_;
707   std::set<pid_t> dump_threads = event_selection_set_.GetMonitoredThreads();
708   for (const auto& pid : event_selection_set_.GetMonitoredProcesses()) {
709     std::vector<pid_t> tids = GetThreadsInProcess(pid);
710     dump_threads.insert(tids.begin(), tids.end());
711   }
712 
713   // Collect processes to dump.
714   std::vector<pid_t> processes;
715   if (all_threads) {
716     processes = GetAllProcesses();
717   } else {
718     std::set<pid_t> process_set;
719     for (const auto& tid : dump_threads) {
720       pid_t pid;
721       if (!GetProcessForThread(tid, &pid)) {
722         continue;
723       }
724       process_set.insert(pid);
725     }
726     processes.insert(processes.end(), process_set.begin(), process_set.end());
727   }
728 
729   // Dump each process and its threads.
730   for (auto& pid : processes) {
731     // Dump mmap records.
732     std::vector<ThreadMmap> thread_mmaps;
733     if (!GetThreadMmapsInProcess(pid, &thread_mmaps)) {
734       // The process may exit before we get its info.
735       continue;
736     }
737     for (const auto& map : thread_mmaps) {
738       if (map.executable == 0) {
739         continue;  // No need to dump non-executable mmap info.
740       }
741       MmapRecord record(attr, false, pid, pid, map.start_addr, map.len,
742                         map.pgoff, map.name, event_id);
743       if (!ProcessRecord(&record)) {
744         return false;
745       }
746     }
747     // Dump process name.
748     std::string name;
749     if (GetThreadName(pid, &name)) {
750       CommRecord record(attr, pid, pid, name, event_id, 0);
751       if (!ProcessRecord(&record)) {
752         return false;
753       }
754     }
755     // Dump thread info.
756     std::vector<pid_t> threads = GetThreadsInProcess(pid);
757     for (const auto& tid : threads) {
758       if (tid == pid) {
759         continue;
760       }
761       if (all_threads || dump_threads.find(tid) != dump_threads.end()) {
762         ForkRecord fork_record(attr, pid, tid, pid, pid, event_id);
763         if (!ProcessRecord(&fork_record)) {
764           return false;
765         }
766         if (GetThreadName(tid, &name)) {
767           CommRecord comm_record(attr, pid, tid, name, event_id, 0);
768           if (!ProcessRecord(&comm_record)) {
769             return false;
770           }
771         }
772       }
773     }
774   }
775   return true;
776 }
777 
ProcessRecord(Record * record)778 bool RecordCommand::ProcessRecord(Record* record) {
779   if (system_wide_collection_ && record->type() == PERF_RECORD_SAMPLE) {
780     auto& r = *static_cast<SampleRecord*>(record);
781     // Omit samples get before start sampling time.
782     if (r.time_data.time < start_sampling_time_in_ns_) {
783       return true;
784     }
785   }
786   UpdateRecordForEmbeddedElfPath(record);
787   if (unwind_dwarf_callchain_ && !post_unwind_) {
788     thread_tree_.Update(*record);
789     if (!UnwindRecord(record)) {
790       return false;
791     }
792   }
793   if (record->type() == PERF_RECORD_SAMPLE) {
794     sample_record_count_++;
795   } else if (record->type() == PERF_RECORD_LOST) {
796     lost_record_count_ += static_cast<LostRecord*>(record)->lost;
797   }
798   bool result = record_file_writer_->WriteRecord(*record);
799   return result;
800 }
801 
802 template <class RecordType>
UpdateMmapRecordForEmbeddedElfPath(RecordType * record)803 void UpdateMmapRecordForEmbeddedElfPath(RecordType* record) {
804   RecordType& r = *record;
805   if (!r.InKernel() && r.data->pgoff != 0) {
806     // For the case of a shared library "foobar.so" embedded
807     // inside an APK, we rewrite the original MMAP from
808     // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W]
809     // so as to make the library name explicit. This update is
810     // done here (as part of the record operation) as opposed to
811     // on the host during the report, since we want to report
812     // the correct library name even if the the APK in question
813     // is not present on the host. The new offset W is
814     // calculated to be with respect to the start of foobar.so,
815     // not to the start of path.apk.
816     EmbeddedElf* ee =
817         ApkInspector::FindElfInApkByOffset(r.filename, r.data->pgoff);
818     if (ee != nullptr) {
819       // Compute new offset relative to start of elf in APK.
820       auto data = *r.data;
821       data.pgoff -= ee->entry_offset();
822       r.SetDataAndFilename(data, GetUrlInApk(r.filename, ee->entry_name()));
823     }
824   }
825 }
826 
UpdateRecordForEmbeddedElfPath(Record * record)827 void RecordCommand::UpdateRecordForEmbeddedElfPath(Record* record) {
828   if (record->type() == PERF_RECORD_MMAP) {
829     UpdateMmapRecordForEmbeddedElfPath(static_cast<MmapRecord*>(record));
830   } else if (record->type() == PERF_RECORD_MMAP2) {
831     UpdateMmapRecordForEmbeddedElfPath(static_cast<Mmap2Record*>(record));
832   }
833 }
834 
UnwindRecord(Record * record)835 bool RecordCommand::UnwindRecord(Record* record) {
836   if (record->type() == PERF_RECORD_SAMPLE) {
837     SampleRecord& r = *static_cast<SampleRecord*>(record);
838     if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) &&
839         (r.sample_type & PERF_SAMPLE_REGS_USER) &&
840         (r.regs_user_data.reg_mask != 0) &&
841         (r.sample_type & PERF_SAMPLE_STACK_USER) &&
842         (r.GetValidStackSize() > 0)) {
843       ThreadEntry* thread =
844           thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
845       RegSet regs = CreateRegSet(r.regs_user_data.abi,
846                                  r.regs_user_data.reg_mask,
847                                  r.regs_user_data.regs);
848       // Normally do strict arch check when unwinding stack. But allow unwinding
849       // 32-bit processes on 64-bit devices for system wide profiling.
850       bool strict_arch_check = !system_wide_collection_;
851       std::vector<uint64_t> unwind_ips =
852           UnwindCallChain(r.regs_user_data.abi, *thread, regs,
853                           r.stack_user_data.data,
854                           r.GetValidStackSize(), strict_arch_check);
855       r.ReplaceRegAndStackWithCallChain(unwind_ips);
856     }
857   }
858   return true;
859 }
860 
PostUnwind(const std::vector<std::string> & args)861 bool RecordCommand::PostUnwind(const std::vector<std::string>& args) {
862   thread_tree_.ClearThreadAndMap();
863   std::unique_ptr<RecordFileReader> reader =
864       RecordFileReader::CreateInstance(record_filename_);
865   if (reader == nullptr) {
866     return false;
867   }
868   std::string tmp_filename = record_filename_ + ".tmp";
869   record_file_writer_ = CreateRecordFile(tmp_filename);
870   if (record_file_writer_ == nullptr) {
871     return false;
872   }
873   bool result = reader->ReadDataSection(
874       [this](std::unique_ptr<Record> record) {
875         thread_tree_.Update(*record);
876         if (!UnwindRecord(record.get())) {
877           return false;
878         }
879         return record_file_writer_->WriteRecord(*record);
880       },
881       false);
882   if (!result) {
883     return false;
884   }
885   if (!DumpAdditionalFeatures(args)) {
886     return false;
887   }
888   if (!record_file_writer_->Close()) {
889     return false;
890   }
891 
892   if (unlink(record_filename_.c_str()) != 0) {
893     PLOG(ERROR) << "failed to remove " << record_filename_;
894     return false;
895   }
896   if (rename(tmp_filename.c_str(), record_filename_.c_str()) != 0) {
897     PLOG(ERROR) << "failed to rename " << tmp_filename << " to "
898                 << record_filename_;
899     return false;
900   }
901   return true;
902 }
903 
DumpAdditionalFeatures(const std::vector<std::string> & args)904 bool RecordCommand::DumpAdditionalFeatures(
905     const std::vector<std::string>& args) {
906   // Read data section of perf.data to collect hit file information.
907   thread_tree_.ClearThreadAndMap();
908   Dso::ReadKernelSymbolsFromProc();
909   auto callback = [&](const Record* r) {
910     thread_tree_.Update(*r);
911     if (r->type() == PERF_RECORD_SAMPLE) {
912       CollectHitFileInfo(*reinterpret_cast<const SampleRecord*>(r));
913     }
914   };
915   if (!record_file_writer_->ReadDataSection(callback)) {
916     return false;
917   }
918 
919   size_t feature_count = 4;
920   if (branch_sampling_) {
921     feature_count++;
922   }
923   if (dump_symbols_) {
924     feature_count++;
925   }
926   if (!record_file_writer_->BeginWriteFeatures(feature_count)) {
927     return false;
928   }
929   if (!DumpBuildIdFeature()) {
930     return false;
931   }
932   if (dump_symbols_ && !DumpFileFeature()) {
933     return false;
934   }
935   utsname uname_buf;
936   if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) {
937     PLOG(ERROR) << "uname() failed";
938     return false;
939   }
940   if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE,
941                                                uname_buf.release)) {
942     return false;
943   }
944   if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH,
945                                                uname_buf.machine)) {
946     return false;
947   }
948 
949   std::string exec_path = android::base::GetExecutablePath();
950   if (exec_path.empty()) exec_path = "simpleperf";
951   std::vector<std::string> cmdline;
952   cmdline.push_back(exec_path);
953   cmdline.push_back("record");
954   cmdline.insert(cmdline.end(), args.begin(), args.end());
955   if (!record_file_writer_->WriteCmdlineFeature(cmdline)) {
956     return false;
957   }
958   if (branch_sampling_ != 0 &&
959       !record_file_writer_->WriteBranchStackFeature()) {
960     return false;
961   }
962   if (!record_file_writer_->EndWriteFeatures()) {
963     return false;
964   }
965   return true;
966 }
967 
DumpBuildIdFeature()968 bool RecordCommand::DumpBuildIdFeature() {
969   std::vector<BuildIdRecord> build_id_records;
970   BuildId build_id;
971   std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
972   for (Dso* dso : dso_v) {
973     if (!dso->HasDumpId()) {
974       continue;
975     }
976     if (dso->type() == DSO_KERNEL) {
977       if (!GetKernelBuildId(&build_id)) {
978         continue;
979       }
980       build_id_records.push_back(
981           BuildIdRecord(true, UINT_MAX, build_id, dso->Path()));
982     } else if (dso->type() == DSO_KERNEL_MODULE) {
983       std::string path = dso->Path();
984       std::string module_name = basename(&path[0]);
985       if (android::base::EndsWith(module_name, ".ko")) {
986         module_name = module_name.substr(0, module_name.size() - 3);
987       }
988       if (!GetModuleBuildId(module_name, &build_id)) {
989         LOG(DEBUG) << "can't read build_id for module " << module_name;
990         continue;
991       }
992       build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, path));
993     } else {
994       if (dso->Path() == DEFAULT_EXECNAME_FOR_THREAD_MMAP) {
995         continue;
996       }
997       auto tuple = SplitUrlInApk(dso->Path());
998       if (std::get<0>(tuple)) {
999         ElfStatus result = GetBuildIdFromApkFile(std::get<1>(tuple),
1000                                                  std::get<2>(tuple), &build_id);
1001         if (result != ElfStatus::NO_ERROR) {
1002           LOG(DEBUG) << "can't read build_id from file " << dso->Path() << ": "
1003                      << result;
1004           continue;
1005         }
1006       } else {
1007         ElfStatus result = GetBuildIdFromElfFile(dso->Path(), &build_id);
1008         if (result != ElfStatus::NO_ERROR) {
1009           LOG(DEBUG) << "can't read build_id from file " << dso->Path() << ": "
1010                      << result;
1011           continue;
1012         }
1013       }
1014       build_id_records.push_back(
1015           BuildIdRecord(false, UINT_MAX, build_id, dso->Path()));
1016     }
1017   }
1018   if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) {
1019     return false;
1020   }
1021   return true;
1022 }
1023 
DumpFileFeature()1024 bool RecordCommand::DumpFileFeature() {
1025   std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
1026   for (Dso* dso : dso_v) {
1027     if (!dso->HasDumpId()) {
1028       continue;
1029     }
1030     uint32_t dso_type = dso->type();
1031     uint64_t min_vaddr = dso->MinVirtualAddress();
1032 
1033     // Dumping all symbols in hit files takes too much space, so only dump
1034     // needed symbols.
1035     const std::vector<Symbol>& symbols = dso->GetSymbols();
1036     std::vector<const Symbol*> dump_symbols;
1037     for (const auto& sym : symbols) {
1038       if (sym.HasDumpId()) {
1039         dump_symbols.push_back(&sym);
1040       }
1041     }
1042     std::sort(dump_symbols.begin(), dump_symbols.end(), Symbol::CompareByAddr);
1043 
1044     if (!record_file_writer_->WriteFileFeature(dso->Path(), dso_type, min_vaddr,
1045                                                dump_symbols)) {
1046       return false;
1047     }
1048   }
1049   return true;
1050 }
1051 
CollectHitFileInfo(const SampleRecord & r)1052 void RecordCommand::CollectHitFileInfo(const SampleRecord& r) {
1053   const ThreadEntry* thread =
1054       thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
1055   const MapEntry* map =
1056       thread_tree_.FindMap(thread, r.ip_data.ip, r.InKernel());
1057   Dso* dso = map->dso;
1058   const Symbol* symbol;
1059   if (dump_symbols_) {
1060     symbol = thread_tree_.FindSymbol(map, r.ip_data.ip, nullptr, &dso);
1061     if (!symbol->HasDumpId()) {
1062       dso->CreateSymbolDumpId(symbol);
1063     }
1064   }
1065   if (!dso->HasDumpId()) {
1066     dso->CreateDumpId();
1067   }
1068   if (r.sample_type & PERF_SAMPLE_CALLCHAIN) {
1069     bool in_kernel = r.InKernel();
1070     bool first_ip = true;
1071     for (uint64_t i = 0; i < r.callchain_data.ip_nr; ++i) {
1072       uint64_t ip = r.callchain_data.ips[i];
1073       if (ip >= PERF_CONTEXT_MAX) {
1074         switch (ip) {
1075           case PERF_CONTEXT_KERNEL:
1076             in_kernel = true;
1077             break;
1078           case PERF_CONTEXT_USER:
1079             in_kernel = false;
1080             break;
1081           default:
1082             LOG(DEBUG) << "Unexpected perf_context in callchain: " << std::hex
1083                        << ip;
1084         }
1085       } else {
1086         if (first_ip) {
1087           first_ip = false;
1088           // Remove duplication with sample ip.
1089           if (ip == r.ip_data.ip) {
1090             continue;
1091           }
1092         }
1093         map = thread_tree_.FindMap(thread, ip, in_kernel);
1094         dso = map->dso;
1095         if (dump_symbols_) {
1096           symbol = thread_tree_.FindSymbol(map, ip, nullptr, &dso);
1097           if (!symbol->HasDumpId()) {
1098             dso->CreateSymbolDumpId(symbol);
1099           }
1100         }
1101         if (!dso->HasDumpId()) {
1102           dso->CreateDumpId();
1103         }
1104       }
1105     }
1106   }
1107 }
1108 
RegisterRecordCommand()1109 void RegisterRecordCommand() {
1110   RegisterCommand("record",
1111                   [] { return std::unique_ptr<Command>(new RecordCommand()); });
1112 }
1113