• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <inttypes.h>
18 #include <libgen.h>
19 #include <signal.h>
20 #include <sys/mman.h>
21 #include <sys/prctl.h>
22 #include <sys/utsname.h>
23 #include <time.h>
24 #include <unistd.h>
25 #include <chrono>
26 #include <filesystem>
27 #include <optional>
28 #include <set>
29 #include <string>
30 #include <unordered_map>
31 #include <unordered_set>
32 #include <vector>
33 
34 #include <android-base/file.h>
35 #include <android-base/logging.h>
36 #include <android-base/parseint.h>
37 #include <android-base/stringprintf.h>
38 #include <android-base/strings.h>
39 #include <android-base/unique_fd.h>
40 
41 #pragma clang diagnostic push
42 #pragma clang diagnostic ignored "-Wunused-parameter"
43 #include <llvm/Support/MemoryBuffer.h>
44 #pragma clang diagnostic pop
45 
46 #if defined(__ANDROID__)
47 #include <android-base/properties.h>
48 #endif
49 #include <unwindstack/Error.h>
50 
51 #include "BranchListFile.h"
52 #include "CallChainJoiner.h"
53 #include "ETMRecorder.h"
54 #include "IOEventLoop.h"
55 #include "JITDebugReader.h"
56 #include "MapRecordReader.h"
57 #include "OfflineUnwinder.h"
58 #include "ProbeEvents.h"
59 #include "RecordFilter.h"
60 #include "cmd_record_impl.h"
61 #include "command.h"
62 #include "environment.h"
63 #include "event_selection_set.h"
64 #include "event_type.h"
65 #include "kallsyms.h"
66 #include "read_apk.h"
67 #include "read_elf.h"
68 #include "read_symbol_map.h"
69 #include "record.h"
70 #include "record_file.h"
71 #include "thread_tree.h"
72 #include "tracing.h"
73 #include "utils.h"
74 #include "workload.h"
75 
76 namespace simpleperf {
77 namespace {
78 
79 using android::base::ParseUint;
80 using android::base::Realpath;
81 
82 static std::string default_measured_event_type = "cpu-cycles";
83 
84 static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = {
85     {"u", PERF_SAMPLE_BRANCH_USER},
86     {"k", PERF_SAMPLE_BRANCH_KERNEL},
87     {"any", PERF_SAMPLE_BRANCH_ANY},
88     {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL},
89     {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN},
90     {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL},
91 };
92 
93 static std::unordered_map<std::string, int> clockid_map = {
94     {"realtime", CLOCK_REALTIME},
95     {"monotonic", CLOCK_MONOTONIC},
96     {"monotonic_raw", CLOCK_MONOTONIC_RAW},
97     {"boottime", CLOCK_BOOTTIME},
98 };
99 
100 // The max size of records dumped by kernel is 65535, and dump stack size
101 // should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528.
102 static constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;
103 
104 // The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK).
105 // Here 1024 is a desired value for pages in mapped buffer. If mapped
106 // successfully, the buffer size = 1024 * 4K (page size) = 4M.
107 static constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;
108 
109 // Cache size used by CallChainJoiner to cache call chains in memory.
110 static constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * kMegabyte;
111 
112 static constexpr size_t kDefaultAuxBufferSize = 4 * kMegabyte;
113 
114 // On Pixel 3, it takes about 1ms to enable ETM, and 16-40ms to disable ETM and copy 4M ETM data.
115 // So make default interval to 100ms.
116 static constexpr uint32_t kDefaultEtmDataFlushIntervalInMs = 100;
117 
118 struct TimeStat {
119   uint64_t prepare_recording_time = 0;
120   uint64_t start_recording_time = 0;
121   uint64_t stop_recording_time = 0;
122   uint64_t finish_recording_time = 0;
123   uint64_t post_process_time = 0;
124 };
125 
GetDefaultRecordBufferSize(bool system_wide_recording)126 std::optional<size_t> GetDefaultRecordBufferSize(bool system_wide_recording) {
127   // Currently, the record buffer size in user-space is set to match the kernel buffer size on a
128   // 8 core system. For system-wide recording, it is 8K pages * 4K page_size * 8 cores = 256MB.
129   // For non system-wide recording, it is 1K pages * 4K page_size * 8 cores = 64MB.
130   // But on devices with memory >= 4GB, we increase buffer size to 256MB. This reduces the chance
131   // of cutting samples, which can cause broken callchains.
132   static constexpr size_t kLowMemoryRecordBufferSize = 64 * kMegabyte;
133   static constexpr size_t kHighMemoryRecordBufferSize = 256 * kMegabyte;
134   static constexpr size_t kSystemWideRecordBufferSize = 256 * kMegabyte;
135   // Ideally we can use >= 4GB here. But the memory size shown in /proc/meminfo is like to be 3.x GB
136   // on a device with 4GB memory. So we have to use <= 3GB.
137   static constexpr uint64_t kLowMemoryLimit = 3 * kGigabyte;
138 
139   if (system_wide_recording) {
140     return kSystemWideRecordBufferSize;
141   }
142   return GetMemorySize() <= kLowMemoryLimit ? kLowMemoryRecordBufferSize
143                                             : kHighMemoryRecordBufferSize;
144 }
145 
146 class RecordCommand : public Command {
147  public:
RecordCommand()148   RecordCommand()
149       : Command(
150             "record", "record sampling info in perf.data",
151             // clang-format off
152 "Usage: simpleperf record [options] [--] [command [command-args]]\n"
153 "       Gather sampling information of running [command]. And -a/-p/-t option\n"
154 "       can be used to change target of sampling information.\n"
155 "       The default options are: -e cpu-cycles -f 4000 -o perf.data.\n"
156 "Select monitored threads:\n"
157 "-a     System-wide collection. Use with --exclude-perf to exclude samples for\n"
158 "       simpleperf process.\n"
159 #if defined(__ANDROID__)
160 "--app package_name    Profile the process of an Android application.\n"
161 "                      On non-rooted devices, the app must be debuggable,\n"
162 "                      because we use run-as to switch to the app's context.\n"
163 #endif
164 "-p pid_or_process_name_regex1,pid_or_process_name_regex2,...\n"
165 "                      Record events on existing processes. Processes are searched either by pid\n"
166 "                      or process name regex. Mutually exclusive with -a.\n"
167 "-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n"
168 "\n"
169 "Select monitored event types:\n"
170 "-e event1[:modifier1],event2[:modifier2],...\n"
171 "             Select a list of events to record. An event can be:\n"
172 "               1) an event name listed in `simpleperf list`;\n"
173 "               2) a raw PMU event in rN format. N is a hex number.\n"
174 "                  For example, r1b selects event number 0x1b.\n"
175 "               3) a kprobe event added by --kprobe option.\n"
176 "               4) a uprobe event added by --uprobe option.\n"
177 "             Modifiers can be added to define how the event should be\n"
178 "             monitored. Possible modifiers are:\n"
179 "                u - monitor user space events only\n"
180 "                k - monitor kernel space events only\n"
181 "--group event1[:modifier],event2[:modifier2],...\n"
182 "             Similar to -e option. But events specified in the same --group\n"
183 "             option are monitored as a group, and scheduled in and out at the\n"
184 "             same time.\n"
185 "--trace-offcpu   Generate samples when threads are scheduled off cpu.\n"
186 "                 Similar to \"-c 1 -e sched:sched_switch\".\n"
187 "--kprobe kprobe_event1,kprobe_event2,...\n"
188 "             Add kprobe events during recording. The kprobe_event format is in\n"
189 "             Documentation/trace/kprobetrace.rst in the kernel. Examples:\n"
190 "               'p:myprobe do_sys_openat2 $arg2:string'   - add event kprobes:myprobe\n"
191 "               'r:myretprobe do_sys_openat2 $retval:s64' - add event kprobes:myretprobe\n"
192 "--uprobe uprobe_event1,uprobe_event2,...\n"
193 "             Add uprobe events during recording. The uprobe_event format is in\n"
194 "             Documentation/trace/uprobetracer.rst in the kernel. Examples:\n"
195 "               'p:myprobe /system/lib64/libc.so:0x1000'\n"
196 "                   - add event uprobes:myprobe\n"
197 "               'r:myretprobe /system/lib64/libc.so:0x1000'\n"
198 "                   - add event uprobes:myretprobe\n"
199 "--add-counter event1,event2,...     Add additional event counts in record samples. For example,\n"
200 "                                    we can use `-e cpu-cycles --add-counter instructions` to\n"
201 "                                    get samples for cpu-cycles event, while having instructions\n"
202 "                                    event count for each sample.\n"
203 "\n"
204 "Select monitoring options:\n"
205 "-f freq      Set event sample frequency. It means recording at most [freq]\n"
206 "             samples every second. For non-tracepoint events, the default\n"
207 "             option is -f 4000. A -f/-c option affects all event types\n"
208 "             following it until meeting another -f/-c option. For example,\n"
209 "             for \"-f 1000 -e cpu-cycles -c 1 -e sched:sched_switch\", cpu-cycles\n"
210 "             has sample freq 1000, sched:sched_switch event has sample period 1.\n"
211 "-c count     Set event sample period. It means recording one sample when\n"
212 "             [count] events happen. For tracepoint events, the default option\n"
213 "             is -c 1.\n"
214 "--call-graph fp | dwarf[,<dump_stack_size>]\n"
215 "             Enable call graph recording. Use frame pointer or dwarf debug\n"
216 "             frame as the method to parse call graph in stack.\n"
217 "             Default is no call graph. Default dump_stack_size with -g is 65528.\n"
218 "-g           Same as '--call-graph dwarf'.\n"
219 "--clockid clock_id      Generate timestamps of samples using selected clock.\n"
220 "                        Possible values are: realtime, monotonic,\n"
221 "                        monotonic_raw, boottime, perf. If supported, default\n"
222 "                        is monotonic, otherwise is perf.\n"
223 "--cpu cpu_item1,cpu_item2,...  Monitor events on selected cpus. cpu_item can be a number like\n"
224 "                               1, or a range like 0-3. A --cpu option affects all event types\n"
225 "                               following it until meeting another --cpu option.\n"
226 "--delay    time_in_ms   Wait time_in_ms milliseconds before recording samples.\n"
227 "--duration time_in_sec  Monitor for time_in_sec seconds instead of running\n"
228 "                        [command]. Here time_in_sec may be any positive\n"
229 "                        floating point number.\n"
230 "-j branch_filter1,branch_filter2,...\n"
231 "             Enable taken branch stack sampling. Each sample captures a series\n"
232 "             of consecutive taken branches.\n"
233 "             The following filters are defined:\n"
234 "                any: any type of branch\n"
235 "                any_call: any function call or system call\n"
236 "                any_ret: any function return or system call return\n"
237 "                ind_call: any indirect branch\n"
238 "                u: only when the branch target is at the user level\n"
239 "                k: only when the branch target is in the kernel\n"
240 "             This option requires at least one branch type among any, any_call,\n"
241 "             any_ret, ind_call.\n"
242 "-b           Enable taken branch stack sampling. Same as '-j any'.\n"
243 "-m mmap_pages   Set pages used in the kernel to cache sample data for each cpu.\n"
244 "                It should be a power of 2. If not set, the max possible value <= 1024\n"
245 "                will be used.\n"
246 "--user-buffer-size <buffer_size> Set buffer size in userspace to cache sample data.\n"
247 "                                 By default, it is %s.\n"
248 "--no-inherit  Don't record created child threads/processes.\n"
249 "--cpu-percent <percent>  Set the max percent of cpu time used for recording.\n"
250 "                         percent is in range [1-100], default is 25.\n"
251 "\n"
252 "--tp-filter filter_string    Set filter_string for the previous tracepoint event.\n"
253 "                             Format is in Documentation/trace/events.rst in the kernel.\n"
254 "                             An example: 'prev_comm != \"simpleperf\" && (prev_pid > 1)'.\n"
255 "\n"
256 "Dwarf unwinding options:\n"
257 "--post-unwind=(yes|no) If `--call-graph dwarf` option is used, then the user's\n"
258 "                       stack will be recorded in perf.data and unwound while\n"
259 "                       recording by default. Use --post-unwind=yes to switch\n"
260 "                       to unwind after recording.\n"
261 "--no-unwind   If `--call-graph dwarf` option is used, then the user's stack\n"
262 "              will be unwound by default. Use this option to disable the\n"
263 "              unwinding of the user's stack.\n"
264 "--no-callchain-joiner  If `--call-graph dwarf` option is used, then by default\n"
265 "                       callchain joiner is used to break the 64k stack limit\n"
266 "                       and build more complete call graphs. However, the built\n"
267 "                       call graphs may not be correct in all cases.\n"
268 "--callchain-joiner-min-matching-nodes count\n"
269 "               When callchain joiner is used, set the matched nodes needed to join\n"
270 "               callchains. The count should be >= 1. By default it is 1.\n"
271 "--no-cut-samples   Simpleperf uses a record buffer to cache records received from the kernel.\n"
272 "                   When the available space in the buffer reaches low level, the stack data in\n"
273 "                   samples is truncated to 1KB. When the available space reaches critical level,\n"
274 "                   it drops all samples. This option makes simpleperf not truncate stack data\n"
275 "                   when the available space reaches low level.\n"
276 "--keep-failed-unwinding-result        Keep reasons for failed unwinding cases\n"
277 "--keep-failed-unwinding-debug-info    Keep debug info for failed unwinding cases\n"
278 "\n"
279 "Sample filter options:\n"
280 "--exclude-perf                Exclude samples for simpleperf process.\n"
281 RECORD_FILTER_OPTION_HELP_MSG_FOR_RECORDING
282 "\n"
283 "Recording file options:\n"
284 "--no-dump-build-id        Don't dump build ids in perf.data.\n"
285 "--no-dump-kernel-symbols  Don't dump kernel symbols in perf.data. By default\n"
286 "                          kernel symbols will be dumped when needed.\n"
287 "--no-dump-symbols       Don't dump symbols in perf.data. By default symbols are\n"
288 "                        dumped in perf.data, to support reporting in another\n"
289 "                        environment.\n"
290 "-o record_file_name    Set record file name, default is perf.data.\n"
291 "--size-limit SIZE[K|M|G]      Stop recording after SIZE bytes of records.\n"
292 "                              Default is unlimited.\n"
293 "--symfs <dir>    Look for files with symbols relative to this directory.\n"
294 "                 This option is used to provide files with symbol table and\n"
295 "                 debug information, which are used for unwinding and dumping symbols.\n"
296 "--add-meta-info key=value     Add extra meta info, which will be stored in the recording file.\n"
297 "-z[=<compression_level>]      Compress records using zstd. compression level: 1 is the fastest,\n"
298 "                              22 is the greatest, 3 is the default.\n"
299 "\n"
300 "ETM recording options:\n"
301 "--addr-filter filter_str1,filter_str2,...\n"
302 "                Provide address filters for cs-etm instruction tracing.\n"
303 "                filter_str accepts below formats:\n"
304 "                  'filter  <addr-range>'  -- trace instructions in a range\n"
305 "                  'start <addr>'          -- start tracing when ip is <addr>\n"
306 "                  'stop <addr>'           -- stop tracing when ip is <addr>\n"
307 "                <addr-range> accepts below formats:\n"
308 "                  <file_path>                            -- code sections in a binary file\n"
309 "                  <vaddr_start>-<vaddr_end>@<file_path>  -- part of a binary file\n"
310 "                  <kernel_addr_start>-<kernel_addr_end>  -- part of kernel space\n"
311 "                <addr> accepts below formats:\n"
312 "                  <vaddr>@<file_path>      -- virtual addr in a binary file\n"
313 "                  <kernel_addr>            -- a kernel address\n"
314 "                Examples:\n"
315 "                  'filter 0x456-0x480@/system/lib/libc.so'\n"
316 "                  'start 0x456@/system/lib/libc.so,stop 0x480@/system/lib/libc.so'\n"
317 "--aux-buffer-size <buffer_size>  Set aux buffer size, only used in cs-etm event type.\n"
318 "                                 Need to be power of 2 and page size aligned.\n"
319 "                                 Used memory size is (buffer_size * (cpu_count + 1).\n"
320 "                                 Default is 4M.\n"
321 "--decode-etm                     Convert ETM data into branch lists while recording.\n"
322 "--binary binary_name             Used with --decode-etm to only generate data for binaries\n"
323 "                                 matching binary_name regex.\n"
324 "--record-timestamp               Generate timestamp packets in ETM stream.\n"
325 "--record-cycles                  Generate cycle count packets in ETM stream.\n"
326 "--cycle-threshold <threshold>    Set cycle count counter threshold for ETM cycle count packets.\n"
327 "--etm-flush-interval <interval>  Set the interval between ETM data flushes from the ETR buffer\n"
328 "                                 to the perf event buffer (in milliseconds). Default is 100 ms.\n"
329 "\n"
330 "Other options:\n"
331 "--exit-with-parent            Stop recording when the thread starting simpleperf dies.\n"
332 "--use-cmd-exit-code           Exit with the same exit code as the monitored cmdline.\n"
333 "--start_profiling_fd fd_no    After starting profiling, write \"STARTED\" to\n"
334 "                              <fd_no>, then close <fd_no>.\n"
335 "--stdio-controls-profiling    Use stdin/stdout to pause/resume profiling.\n"
336 #if defined(__ANDROID__)
337 "--in-app                      We are already running in the app's context.\n"
338 "--tracepoint-events file_name   Read tracepoint events from [file_name] instead of tracefs.\n"
339 #endif
340 #if 0
341 // Below options are only used internally and shouldn't be visible to the public.
342 "--out-fd <fd>    Write perf.data to a file descriptor.\n"
343 "--stop-signal-fd <fd>  Stop recording when fd is readable.\n"
344 #endif
345             // clang-format on
346             ),
347         system_wide_collection_(false),
348         branch_sampling_(0),
349         fp_callchain_sampling_(false),
350         dwarf_callchain_sampling_(false),
351         dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE),
352         unwind_dwarf_callchain_(true),
353         post_unwind_(false),
354         child_inherit_(true),
355         duration_in_sec_(0),
356         can_dump_kernel_symbols_(true),
357         dump_symbols_(true),
358         event_selection_set_(false),
359         mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)),
360         record_filename_("perf.data"),
361         sample_record_count_(0),
362         in_app_context_(false),
363         trace_offcpu_(false),
364         exclude_kernel_callchain_(false),
365         allow_callchain_joiner_(true),
366         callchain_joiner_min_matching_nodes_(1u),
367         last_record_timestamp_(0u),
368         record_filter_(thread_tree_) {
369     // If we run `adb shell simpleperf record xxx` and stop profiling by ctrl-c, adb closes
370     // sockets connecting simpleperf. After that, simpleperf will receive SIGPIPE when writing
371     // to stdout/stderr, which is a problem when we use '--app' option. So ignore SIGPIPE to
372     // finish properly.
373     signal(SIGPIPE, SIG_IGN);
374   }
375 
376   std::string LongHelpString() const override;
377   void Run(const std::vector<std::string>& args, int* exit_code) override;
Run(const std::vector<std::string> & args)378   bool Run(const std::vector<std::string>& args) override {
379     int exit_code;
380     Run(args, &exit_code);
381     return exit_code == 0;
382   }
383 
384  private:
385   bool ParseOptions(const std::vector<std::string>& args, std::vector<std::string>* non_option_args,
386                     ProbeEvents& probe_events);
387   bool AdjustPerfEventLimit();
388   bool PrepareRecording(Workload* workload);
389   bool DoRecording(Workload* workload);
390   bool PostProcessRecording(const std::vector<std::string>& args);
391   // pre recording functions
392   bool TraceOffCpu();
393   bool SetEventSelectionFlags();
394   bool CreateAndInitRecordFile();
395   std::unique_ptr<RecordFileWriter> CreateRecordFile(const std::string& filename,
396                                                      const EventAttrIds& attrs);
397   bool DumpKernelSymbol();
398   bool DumpTracingData();
399   bool DumpMaps();
400   bool DumpAuxTraceInfo();
401 
402   // recording functions
403   bool ProcessRecord(Record* record);
404   bool ShouldOmitRecord(Record* record);
405   bool DumpMapsForRecord(Record* record);
406   bool SaveRecordForPostUnwinding(Record* record);
407   bool SaveRecordAfterUnwinding(Record* record);
408   bool SaveRecordWithoutUnwinding(Record* record);
409   bool ProcessJITDebugInfo(std::vector<JITDebugInfo> debug_info, bool sync_kernel_records);
410   bool ProcessControlCmd(IOEventLoop* loop);
411   void UpdateRecord(Record* record);
412   bool UnwindRecord(SampleRecord& r);
413   bool KeepFailedUnwindingResult(const SampleRecord& r, const std::vector<uint64_t>& ips,
414                                  const std::vector<uint64_t>& sps);
415 
416   // post recording functions
417   std::unique_ptr<RecordFileReader> MoveRecordFile(const std::string& old_filename);
418   bool PostUnwindRecords();
419   bool JoinCallChains();
420   bool DumpAdditionalFeatures(const std::vector<std::string>& args);
421   bool DumpBuildIdFeature();
422   bool DumpFileFeature();
423   bool DumpMetaInfoFeature(bool kernel_symbols_available);
424   bool DumpDebugUnwindFeature(const std::unordered_set<Dso*>& dso_set);
425   void CollectHitFileInfo(const SampleRecord& r, std::unordered_set<Dso*>* dso_set);
426   bool DumpETMBranchListFeature();
427   bool DumpInitMapFeature();
428 
429   bool system_wide_collection_;
430   uint64_t branch_sampling_;
431   bool fp_callchain_sampling_;
432   bool dwarf_callchain_sampling_;
433   uint32_t dump_stack_size_in_dwarf_sampling_;
434   bool unwind_dwarf_callchain_;
435   bool post_unwind_;
436   bool keep_failed_unwinding_result_ = false;
437   bool keep_failed_unwinding_debug_info_ = false;
438   std::unique_ptr<OfflineUnwinder> offline_unwinder_;
439   bool child_inherit_;
440   uint64_t delay_in_ms_ = 0;
441   double duration_in_sec_;
442   bool dump_build_id_ = true;
443   bool can_dump_kernel_symbols_;
444   bool dump_symbols_;
445   std::string clockid_;
446   EventSelectionSet event_selection_set_;
447 
448   std::pair<size_t, size_t> mmap_page_range_;
449   std::optional<size_t> user_buffer_size_;
450   size_t aux_buffer_size_ = kDefaultAuxBufferSize;
451 
452   ThreadTree thread_tree_;
453   std::string record_filename_;
454   android::base::unique_fd out_fd_;
455   std::unique_ptr<RecordFileWriter> record_file_writer_;
456   android::base::unique_fd stop_signal_fd_;
457 
458   uint64_t sample_record_count_;
459   android::base::unique_fd start_profiling_fd_;
460   bool stdio_controls_profiling_ = false;
461 
462   std::string app_package_name_;
463   bool in_app_context_;
464   bool trace_offcpu_;
465   bool exclude_kernel_callchain_;
466   uint64_t size_limit_in_bytes_ = 0;
467   uint64_t max_sample_freq_ = DEFAULT_SAMPLE_FREQ_FOR_NONTRACEPOINT_EVENT;
468   size_t cpu_time_max_percent_ = 25;
469 
470   // For CallChainJoiner
471   bool allow_callchain_joiner_;
472   size_t callchain_joiner_min_matching_nodes_;
473   std::unique_ptr<CallChainJoiner> callchain_joiner_;
474   bool allow_truncating_samples_ = true;
475 
476   std::unique_ptr<JITDebugReader> jit_debug_reader_;
477   uint64_t last_record_timestamp_;  // used to insert Mmap2Records for JIT debug info
478   TimeStat time_stat_;
479   EventAttrWithId dumping_attr_id_;
480   // In system wide recording, record if we have dumped map info for a process.
481   std::unordered_set<pid_t> dumped_processes_;
482   bool exclude_perf_ = false;
483   RecordFilter record_filter_;
484 
485   std::optional<MapRecordReader> map_record_reader_;
486   std::optional<MapRecordThread> map_record_thread_;
487 
488   std::unordered_map<std::string, std::string> extra_meta_info_;
489   bool use_cmd_exit_code_ = false;
490   std::vector<std::string> add_counters_;
491 
492   std::unique_ptr<ETMBranchListGenerator> etm_branch_list_generator_;
493   std::unique_ptr<RegEx> binary_name_regex_;
494   std::chrono::milliseconds etm_flush_interval_{kDefaultEtmDataFlushIntervalInMs};
495 
496   size_t compression_level_ = 0;
497 };
498 
LongHelpString() const499 std::string RecordCommand::LongHelpString() const {
500   uint64_t process_buffer_size = 0;
501   uint64_t system_wide_buffer_size = 0;
502   if (auto size = GetDefaultRecordBufferSize(false); size) {
503     process_buffer_size = size.value() / kMegabyte;
504   }
505   if (auto size = GetDefaultRecordBufferSize(true); size) {
506     system_wide_buffer_size = size.value() / kMegabyte;
507   }
508   std::string buffer_size_str;
509   if (process_buffer_size == system_wide_buffer_size) {
510     buffer_size_str = android::base::StringPrintf("%" PRIu64 "M", process_buffer_size);
511   } else {
512     buffer_size_str =
513         android::base::StringPrintf("%" PRIu64 "M for process recording and %" PRIu64
514                                     "M\n                                 for system wide recording",
515                                     process_buffer_size, system_wide_buffer_size);
516   }
517   return android::base::StringPrintf(long_help_string_.c_str(), buffer_size_str.c_str());
518 }
519 
Run(const std::vector<std::string> & args,int * exit_code)520 void RecordCommand::Run(const std::vector<std::string>& args, int* exit_code) {
521   *exit_code = 1;
522   time_stat_.prepare_recording_time = GetSystemClock();
523   ScopedCurrentArch scoped_arch(GetMachineArch());
524 
525   if (!CheckPerfEventLimit()) {
526     return;
527   }
528   AllowMoreOpenedFiles();
529 
530   std::vector<std::string> workload_args;
531   ProbeEvents probe_events(event_selection_set_);
532   if (!ParseOptions(args, &workload_args, probe_events)) {
533     return;
534   }
535   if (!AdjustPerfEventLimit()) {
536     return;
537   }
538   std::unique_ptr<ScopedTempFiles> scoped_temp_files =
539       ScopedTempFiles::Create(android::base::Dirname(record_filename_));
540   if (!scoped_temp_files) {
541     PLOG(ERROR) << "Can't create output file in directory "
542                 << android::base::Dirname(record_filename_);
543     return;
544   }
545   if (!app_package_name_.empty() && !in_app_context_) {
546     // Some users want to profile non debuggable apps on rooted devices. If we use run-as,
547     // it will be impossible when using --app. So don't switch to app's context when we are
548     // root.
549     if (!IsRoot()) {
550       // Running simpleperf in app context doesn't allow running child command. So no need to
551       // consider exit code of child command here.
552       *exit_code = RunInAppContext(app_package_name_, "record", args, workload_args.size(),
553                                    record_filename_, true)
554                        ? 0
555                        : 1;
556       return;
557     }
558   }
559   std::unique_ptr<Workload> workload;
560   if (!workload_args.empty()) {
561     workload = Workload::CreateWorkload(workload_args);
562     if (workload == nullptr) {
563       return;
564     }
565   }
566   if (!PrepareRecording(workload.get())) {
567     return;
568   }
569   time_stat_.start_recording_time = GetSystemClock();
570   if (!DoRecording(workload.get()) || !PostProcessRecording(args)) {
571     return;
572   }
573   if (use_cmd_exit_code_ && workload) {
574     workload->WaitChildProcess(false, exit_code);
575   } else {
576     *exit_code = 0;
577   }
578 }
579 
PrepareRecording(Workload * workload)580 bool RecordCommand::PrepareRecording(Workload* workload) {
581   // 1. Prepare in other modules.
582   PrepareVdsoFile();
583 
584   // 2. Add default event type.
585   if (event_selection_set_.empty()) {
586     std::string event_type = default_measured_event_type;
587     if (GetTargetArch() == ARCH_X86_32 || GetTargetArch() == ARCH_X86_64 ||
588         GetTargetArch() == ARCH_RISCV64) {
589       // Emulators may not support hardware events. So switch to cpu-clock when cpu-cycles isn't
590       // available.
591       if (!IsHardwareEventSupported()) {
592         event_type = "cpu-clock";
593         LOG(INFO) << "Hardware events are not available, switch to cpu-clock.";
594       }
595     }
596     if (!event_selection_set_.AddEventType(event_type)) {
597       return false;
598     }
599   }
600 
601   // 3. Process options before opening perf event files.
602   exclude_kernel_callchain_ = event_selection_set_.ExcludeKernel();
603 #if defined(__ANDROID__)
604   // Enforce removing kernel IP addresses to prevent KASLR disclosure.
605   if (!IsRoot()) {
606     exclude_kernel_callchain_ = true;
607   }
608 #endif  // defined(__ANDROID__)
609   if (trace_offcpu_ && !TraceOffCpu()) {
610     return false;
611   }
612   if (!add_counters_.empty()) {
613     if (child_inherit_) {
614       LOG(ERROR) << "--no-inherit is needed when using --add-counter.";
615       return false;
616     }
617     if (!event_selection_set_.AddCounters(add_counters_)) {
618       return false;
619     }
620   }
621   if (!SetEventSelectionFlags()) {
622     return false;
623   }
624   if (unwind_dwarf_callchain_) {
625     bool collect_stat = keep_failed_unwinding_result_;
626     offline_unwinder_ = OfflineUnwinder::Create(collect_stat);
627   }
628   if (unwind_dwarf_callchain_ && allow_callchain_joiner_) {
629     callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE,
630                                                 callchain_joiner_min_matching_nodes_, false));
631   }
632 
633   // 4. Add monitored targets.
634   bool need_to_check_targets = false;
635   if (system_wide_collection_) {
636     event_selection_set_.AddMonitoredThreads({-1});
637   } else if (!event_selection_set_.HasMonitoredTarget()) {
638     if (workload != nullptr) {
639       event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
640       event_selection_set_.SetEnableCondition(false, true);
641     } else if (!app_package_name_.empty()) {
642       // If app process is not created, wait for it. This allows simpleperf starts before
643       // app process. In this way, we can have a better support of app start-up time profiling.
644       std::set<pid_t> pids = WaitForAppProcesses(app_package_name_);
645       event_selection_set_.AddMonitoredProcesses(pids);
646       need_to_check_targets = true;
647     } else {
648       LOG(ERROR) << "No threads to monitor. Try `simpleperf help record` for help";
649       return false;
650     }
651   } else {
652     need_to_check_targets = true;
653   }
654   if (delay_in_ms_ != 0 || event_selection_set_.HasAuxTrace()) {
655     event_selection_set_.SetEnableCondition(false, false);
656   }
657 
658   // Profiling JITed/interpreted Java code is supported starting from Android P.
659   // Also support profiling art interpreter on host.
660   if (GetAndroidVersion() >= kAndroidVersionP || GetAndroidVersion() == 0) {
661     // JIT symfiles are stored in temporary files, and are deleted after recording. But if
662     // `-g --no-unwind` option is used, we want to keep symfiles to support unwinding in
663     // the debug-unwind cmd.
664     auto symfile_option = (dwarf_callchain_sampling_ && !unwind_dwarf_callchain_)
665                               ? JITDebugReader::SymFileOption::kKeepSymFiles
666                               : JITDebugReader::SymFileOption::kDropSymFiles;
667     auto sync_option = (clockid_ == "monotonic") ? JITDebugReader::SyncOption::kSyncWithRecords
668                                                  : JITDebugReader::SyncOption::kNoSync;
669     jit_debug_reader_.reset(new JITDebugReader(record_filename_, symfile_option, sync_option));
670     // To profile java code, need to dump maps containing vdex files, which are not executable.
671     event_selection_set_.SetRecordNotExecutableMaps(true);
672   }
673 
674   // 5. Open perf event files and create mapped buffers.
675   if (!event_selection_set_.OpenEventFiles()) {
676     return false;
677   }
678   size_t record_buffer_size = 0;
679   if (user_buffer_size_.has_value()) {
680     record_buffer_size = user_buffer_size_.value();
681   } else {
682     auto default_size = GetDefaultRecordBufferSize(system_wide_collection_);
683     if (!default_size.has_value()) {
684       return false;
685     }
686     record_buffer_size = default_size.value();
687   }
688   if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first, mmap_page_range_.second,
689                                            aux_buffer_size_, record_buffer_size,
690                                            allow_truncating_samples_, exclude_perf_)) {
691     return false;
692   }
693   auto callback = std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1);
694   if (!event_selection_set_.PrepareToReadMmapEventData(callback)) {
695     return false;
696   }
697 
698   // 6. Create perf.data.
699   if (!CreateAndInitRecordFile()) {
700     return false;
701   }
702 
703   // 7. Add read/signal/periodic Events.
704   if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
705     return false;
706   }
707   IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
708   auto exit_loop_callback = [loop]() { return loop->ExitLoop(); };
709   if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM}, exit_loop_callback, IOEventHighPriority)) {
710     return false;
711   }
712 
713   // Only add an event for SIGHUP if we didn't inherit SIG_IGN (e.g. from nohup).
714   if (!SignalIsIgnored(SIGHUP)) {
715     if (!loop->AddSignalEvent(SIGHUP, exit_loop_callback, IOEventHighPriority)) {
716       return false;
717     }
718   }
719   if (stop_signal_fd_ != -1) {
720     if (!loop->AddReadEvent(stop_signal_fd_, exit_loop_callback, IOEventHighPriority)) {
721       return false;
722     }
723   }
724 
725   if (delay_in_ms_ != 0) {
726     auto delay_callback = [this]() {
727       if (!event_selection_set_.SetEnableEvents(true)) {
728         return false;
729       }
730       if (!system_wide_collection_) {
731         // Dump maps in case there are new maps created while delaying.
732         return DumpMaps();
733       }
734       return true;
735     };
736     if (!loop->AddOneTimeEvent(SecondToTimeval(delay_in_ms_ / 1000), delay_callback)) {
737       return false;
738     }
739   }
740   if (duration_in_sec_ != 0) {
741     if (!loop->AddPeriodicEvent(
742             SecondToTimeval(duration_in_sec_), [loop]() { return loop->ExitLoop(); },
743             IOEventHighPriority)) {
744       return false;
745     }
746   }
747   if (stdio_controls_profiling_) {
748     if (!loop->AddReadEvent(0, [this, loop]() { return ProcessControlCmd(loop); })) {
749       return false;
750     }
751   }
752   if (jit_debug_reader_) {
753     auto callback = [this](std::vector<JITDebugInfo> debug_info, bool sync_kernel_records) {
754       return ProcessJITDebugInfo(std::move(debug_info), sync_kernel_records);
755     };
756     if (!jit_debug_reader_->RegisterDebugInfoCallback(loop, callback)) {
757       return false;
758     }
759     if (!system_wide_collection_) {
760       std::set<pid_t> pids = event_selection_set_.GetMonitoredProcesses();
761       for (pid_t tid : event_selection_set_.GetMonitoredThreads()) {
762         pid_t pid;
763         if (GetProcessForThread(tid, &pid)) {
764           pids.insert(pid);
765         }
766       }
767       for (pid_t pid : pids) {
768         if (!jit_debug_reader_->MonitorProcess(pid)) {
769           return false;
770         }
771       }
772       if (!jit_debug_reader_->ReadAllProcesses()) {
773         return false;
774       }
775     }
776   }
777   if (event_selection_set_.HasAuxTrace()) {
778     // ETM events can only be enabled successfully after MmapEventFiles().
779     if (delay_in_ms_ == 0 && !event_selection_set_.IsEnabledOnExec()) {
780       if (!event_selection_set_.EnableETMEvents()) {
781         return false;
782       }
783     }
784     // ETM data is dumped to kernel buffer only when there is no thread traced by ETM. It happens
785     // either when all monitored threads are scheduled off cpu, or when all etm perf events are
786     // disabled.
787     // If ETM data isn't dumped to kernel buffer in time, overflow parts will be dropped. This
788     // makes less than expected data, especially in system wide recording. So add a periodic event
789     // to flush etm data by temporarily disable all perf events.
790     auto etm_flush = [this]() {
791       return event_selection_set_.DisableETMEvents() && event_selection_set_.EnableETMEvents();
792     };
793     if (!loop->AddPeriodicEvent(SecondToTimeval(etm_flush_interval_.count() / 1000.0), etm_flush)) {
794       return false;
795     }
796 
797     if (etm_branch_list_generator_) {
798       if (exclude_perf_) {
799         etm_branch_list_generator_->SetExcludePid(getpid());
800       }
801       if (binary_name_regex_) {
802         etm_branch_list_generator_->SetBinaryFilter(binary_name_regex_.get());
803       }
804     }
805   }
806   return true;
807 }
808 
DoRecording(Workload * workload)809 bool RecordCommand::DoRecording(Workload* workload) {
810   // Write records in mapped buffers of perf_event_files to output file while workload is running.
811   if (workload != nullptr && !workload->IsStarted() && !workload->Start()) {
812     return false;
813   }
814   if (start_profiling_fd_.get() != -1) {
815     if (!android::base::WriteStringToFd("STARTED", start_profiling_fd_)) {
816       PLOG(ERROR) << "failed to write to start_profiling_fd_";
817     }
818     start_profiling_fd_.reset();
819   }
820   if (stdio_controls_profiling_) {
821     printf("started\n");
822     fflush(stdout);
823   }
824   if (!event_selection_set_.GetIOEventLoop()->RunLoop()) {
825     return false;
826   }
827   time_stat_.stop_recording_time = GetSystemClock();
828   if (event_selection_set_.HasAuxTrace()) {
829     // Disable ETM events to flush the last ETM data.
830     if (!event_selection_set_.DisableETMEvents()) {
831       return false;
832     }
833   }
834   if (!event_selection_set_.SyncKernelBuffer()) {
835     return false;
836   }
837   event_selection_set_.CloseEventFiles();
838   time_stat_.finish_recording_time = GetSystemClock();
839   uint64_t recording_time = time_stat_.finish_recording_time - time_stat_.start_recording_time;
840   LOG(INFO) << "Recorded for " << recording_time / 1e9 << " seconds. Start post processing.";
841   return true;
842 }
843 
WriteRecordDataToOutFd(const std::string & in_filename,android::base::unique_fd out_fd)844 static bool WriteRecordDataToOutFd(const std::string& in_filename,
845                                    android::base::unique_fd out_fd) {
846   android::base::unique_fd in_fd(FileHelper::OpenReadOnly(in_filename));
847   if (in_fd == -1) {
848     PLOG(ERROR) << "Failed to open " << in_filename;
849     return false;
850   }
851   char buf[8192];
852   while (true) {
853     ssize_t n = TEMP_FAILURE_RETRY(read(in_fd, buf, sizeof(buf)));
854     if (n < 0) {
855       PLOG(ERROR) << "Failed to read " << in_filename;
856       return false;
857     }
858     if (n == 0) {
859       break;
860     }
861     if (!android::base::WriteFully(out_fd, buf, n)) {
862       PLOG(ERROR) << "Failed to write to out_fd";
863       return false;
864     }
865   }
866   unlink(in_filename.c_str());
867   return true;
868 }
869 
PostProcessRecording(const std::vector<std::string> & args)870 bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) {
871   // 1. Read records left in the buffer.
872   if (!event_selection_set_.FinishReadMmapEventData()) {
873     return false;
874   }
875 
876   // 2. Post unwind dwarf callchain.
877   if (unwind_dwarf_callchain_ && post_unwind_) {
878     if (!PostUnwindRecords()) {
879       return false;
880     }
881   }
882 
883   // 3. Optionally join Callchains.
884   if (callchain_joiner_) {
885     JoinCallChains();
886   }
887 
888   // 4. Dump additional features, and close record file.
889   if (!record_file_writer_->FinishWritingDataSection()) {
890     return false;
891   }
892   if (!DumpAdditionalFeatures(args)) {
893     return false;
894   }
895   if (!record_file_writer_->Close()) {
896     return false;
897   }
898   if (out_fd_ != -1 && !WriteRecordDataToOutFd(record_filename_, std::move(out_fd_))) {
899     return false;
900   }
901   time_stat_.post_process_time = GetSystemClock();
902 
903   // 5. Show brief record result.
904   auto report_compression_stat = [&]() {
905     if (auto compressor = record_file_writer_->GetCompressor(); compressor != nullptr) {
906       uint64_t original_size = compressor->TotalInputSize();
907       uint64_t compressed_size = compressor->TotalOutputSize();
908       LOG(INFO) << "Record compressed: " << ReadableBytes(compressed_size) << " (original "
909                 << ReadableBytes(original_size) << ", ratio " << std::setprecision(2)
910                 << (static_cast<double>(original_size) / compressed_size) << ")";
911     }
912   };
913 
914   auto record_stat = event_selection_set_.GetRecordStat();
915   if (event_selection_set_.HasAuxTrace()) {
916     LOG(INFO) << "Aux data traced: " << ReadableCount(record_stat.aux_data_size);
917     if (record_stat.lost_aux_data_size != 0) {
918       LOG(INFO) << "Aux data lost in user space: " << ReadableCount(record_stat.lost_aux_data_size)
919                 << ", consider increasing userspace buffer size(--user-buffer-size).";
920     }
921     report_compression_stat();
922   } else {
923     // Here we report all lost records as samples. This isn't accurate. Because records like
924     // MmapRecords are not samples. But It's easier for users to understand.
925     size_t userspace_lost_samples =
926         record_stat.userspace_lost_samples + record_stat.userspace_lost_non_samples;
927     size_t lost_samples = record_stat.kernelspace_lost_records + userspace_lost_samples;
928 
929     std::stringstream os;
930     os << "Samples recorded: " << ReadableCount(sample_record_count_);
931     if (record_stat.userspace_truncated_stack_samples > 0) {
932       os << " (" << ReadableCount(record_stat.userspace_truncated_stack_samples)
933          << " with truncated stacks)";
934     }
935     os << ". Samples lost: " << ReadableCount(lost_samples);
936     if (lost_samples != 0) {
937       os << " (kernelspace: " << ReadableCount(record_stat.kernelspace_lost_records)
938          << ", userspace: " << ReadableCount(userspace_lost_samples) << ")";
939     }
940     os << ".";
941     LOG(INFO) << os.str();
942     report_compression_stat();
943 
944     LOG(DEBUG) << "Record stat: kernelspace_lost_records="
945                << ReadableCount(record_stat.kernelspace_lost_records)
946                << ", userspace_lost_samples=" << ReadableCount(record_stat.userspace_lost_samples)
947                << ", userspace_lost_non_samples="
948                << ReadableCount(record_stat.userspace_lost_non_samples)
949                << ", userspace_truncated_stack_samples="
950                << ReadableCount(record_stat.userspace_truncated_stack_samples);
951 
952     if (sample_record_count_ + record_stat.kernelspace_lost_records != 0) {
953       double kernelspace_lost_percent =
954           static_cast<double>(record_stat.kernelspace_lost_records) /
955           (record_stat.kernelspace_lost_records + sample_record_count_);
956       constexpr double KERNELSPACE_LOST_PERCENT_WARNING_BAR = 0.1;
957       if (kernelspace_lost_percent >= KERNELSPACE_LOST_PERCENT_WARNING_BAR) {
958         LOG(WARNING) << "Lost " << (kernelspace_lost_percent * 100)
959                      << "% of samples in kernel space, "
960                      << "consider increasing kernel buffer size(-m), "
961                      << "or decreasing sample frequency(-f), "
962                      << "or increasing sample period(-c).";
963       }
964     }
965     size_t userspace_lost_truncated_samples =
966         userspace_lost_samples + record_stat.userspace_truncated_stack_samples;
967     size_t userspace_complete_samples =
968         sample_record_count_ - record_stat.userspace_truncated_stack_samples;
969     if (userspace_complete_samples + userspace_lost_truncated_samples != 0) {
970       double userspace_lost_percent =
971           static_cast<double>(userspace_lost_truncated_samples) /
972           (userspace_complete_samples + userspace_lost_truncated_samples);
973       constexpr double USERSPACE_LOST_PERCENT_WARNING_BAR = 0.1;
974       if (userspace_lost_percent >= USERSPACE_LOST_PERCENT_WARNING_BAR) {
975         LOG(WARNING) << "Lost/Truncated " << (userspace_lost_percent * 100)
976                      << "% of samples in user space, "
977                      << "consider increasing userspace buffer size(--user-buffer-size), "
978                      << "or decreasing sample frequency(-f), "
979                      << "or increasing sample period(-c).";
980       }
981     }
982     if (callchain_joiner_) {
983       callchain_joiner_->DumpStat();
984     }
985   }
986   LOG(DEBUG) << "Prepare recording time "
987              << (time_stat_.start_recording_time - time_stat_.prepare_recording_time) / 1e9
988              << " s, recording time "
989              << (time_stat_.stop_recording_time - time_stat_.start_recording_time) / 1e9
990              << " s, stop recording time "
991              << (time_stat_.finish_recording_time - time_stat_.stop_recording_time) / 1e9
992              << " s, post process time "
993              << (time_stat_.post_process_time - time_stat_.finish_recording_time) / 1e9 << " s.";
994   return true;
995 }
996 
ParseOptions(const std::vector<std::string> & args,std::vector<std::string> * non_option_args,ProbeEvents & probe_events)997 bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
998                                  std::vector<std::string>* non_option_args,
999                                  ProbeEvents& probe_events) {
1000   OptionValueMap options;
1001   std::vector<std::pair<OptionName, OptionValue>> ordered_options;
1002 
1003   if (!PreprocessOptions(args, GetRecordCmdOptionFormats(), &options, &ordered_options,
1004                          non_option_args)) {
1005     return false;
1006   }
1007 
1008   // Process options.
1009   system_wide_collection_ = options.PullBoolValue("-a");
1010 
1011   if (auto value = options.PullValue("--add-counter"); value) {
1012     add_counters_ = android::base::Split(value->str_value, ",");
1013   }
1014 
1015   for (const OptionValue& value : options.PullValues("--add-meta-info")) {
1016     const std::string& s = value.str_value;
1017     auto split_pos = s.find('=');
1018     if (split_pos == std::string::npos || split_pos == 0 || split_pos + 1 == s.size()) {
1019       LOG(ERROR) << "invalid meta-info: " << s;
1020       return false;
1021     }
1022     extra_meta_info_[s.substr(0, split_pos)] = s.substr(split_pos + 1);
1023   }
1024 
1025   if (auto value = options.PullValue("--addr-filter"); value) {
1026     auto filters = ParseAddrFilterOption(value->str_value);
1027     if (filters.empty()) {
1028       return false;
1029     }
1030     event_selection_set_.SetAddrFilters(std::move(filters));
1031   }
1032 
1033   if (auto value = options.PullValue("--app"); value) {
1034     app_package_name_ = value->str_value;
1035   }
1036 
1037   if (auto value = options.PullValue("--aux-buffer-size"); value) {
1038     uint64_t v = value->uint_value;
1039     if (v > std::numeric_limits<size_t>::max() || !IsPowerOfTwo(v) || v % sysconf(_SC_PAGE_SIZE)) {
1040       LOG(ERROR) << "invalid aux buffer size: " << v;
1041       return false;
1042     }
1043     aux_buffer_size_ = static_cast<size_t>(v);
1044   }
1045 
1046   if (options.PullValue("-b")) {
1047     branch_sampling_ = branch_sampling_type_map["any"];
1048   }
1049 
1050   if (auto value = options.PullValue("--binary"); value) {
1051     binary_name_regex_ = RegEx::Create(value->str_value);
1052     if (binary_name_regex_ == nullptr) {
1053       return false;
1054     }
1055   }
1056 
1057   if (!options.PullUintValue("--callchain-joiner-min-matching-nodes",
1058                              &callchain_joiner_min_matching_nodes_, 1)) {
1059     return false;
1060   }
1061 
1062   if (auto value = options.PullValue("--clockid"); value) {
1063     clockid_ = value->str_value;
1064     if (clockid_ != "perf") {
1065       if (!IsSettingClockIdSupported()) {
1066         LOG(ERROR) << "Setting clockid is not supported by the kernel.";
1067         return false;
1068       }
1069       if (clockid_map.find(clockid_) == clockid_map.end()) {
1070         LOG(ERROR) << "Invalid clockid: " << clockid_;
1071         return false;
1072       }
1073     }
1074   }
1075 
1076   if (!options.PullUintValue("--cpu-percent", &cpu_time_max_percent_, 1, 100)) {
1077     return false;
1078   }
1079 
1080   if (options.PullBoolValue("--decode-etm")) {
1081     etm_branch_list_generator_ = ETMBranchListGenerator::Create(system_wide_collection_);
1082   }
1083   uint32_t interval = 0;
1084   if (options.PullUintValue("--etm-flush-interval", &interval) && interval != 0) {
1085     etm_flush_interval_ = std::chrono::milliseconds(interval);
1086   }
1087 
1088   if (options.PullBoolValue("--record-timestamp")) {
1089     ETMRecorder& recorder = ETMRecorder::GetInstance();
1090     recorder.SetRecordTimestamp(true);
1091   }
1092 
1093   if (options.PullBoolValue("--record-cycles")) {
1094     ETMRecorder& recorder = ETMRecorder::GetInstance();
1095     recorder.SetRecordCycles(true);
1096   }
1097 
1098   if (!options.PullUintValue("--delay", &delay_in_ms_)) {
1099     return false;
1100   }
1101 
1102   size_t cyc_threshold;
1103   if (options.PullUintValue("--cycle-threshold", &cyc_threshold)) {
1104     ETMRecorder& recorder = ETMRecorder::GetInstance();
1105     recorder.SetCycleThreshold(cyc_threshold);
1106   }
1107 
1108   if (!options.PullDoubleValue("--duration", &duration_in_sec_, 1e-9)) {
1109     return false;
1110   }
1111 
1112   exclude_perf_ = options.PullBoolValue("--exclude-perf");
1113   if (!record_filter_.ParseOptions(options)) {
1114     return false;
1115   }
1116 
1117   if (options.PullValue("--exit-with-parent")) {
1118     prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
1119   }
1120 
1121   in_app_context_ = options.PullBoolValue("--in-app");
1122 
1123   for (const OptionValue& value : options.PullValues("-j")) {
1124     std::vector<std::string> branch_sampling_types = android::base::Split(value.str_value, ",");
1125     for (auto& type : branch_sampling_types) {
1126       auto it = branch_sampling_type_map.find(type);
1127       if (it == branch_sampling_type_map.end()) {
1128         LOG(ERROR) << "unrecognized branch sampling filter: " << type;
1129         return false;
1130       }
1131       branch_sampling_ |= it->second;
1132     }
1133   }
1134   keep_failed_unwinding_result_ = options.PullBoolValue("--keep-failed-unwinding-result");
1135   keep_failed_unwinding_debug_info_ = options.PullBoolValue("--keep-failed-unwinding-debug-info");
1136   if (keep_failed_unwinding_debug_info_) {
1137     keep_failed_unwinding_result_ = true;
1138   }
1139 
1140   for (const OptionValue& value : options.PullValues("--kprobe")) {
1141     std::vector<std::string> cmds = android::base::Split(value.str_value, ",");
1142     for (const auto& cmd : cmds) {
1143       if (!probe_events.AddProbe(ProbeEventType::kKprobe, cmd)) {
1144         return false;
1145       }
1146     }
1147   }
1148   for (const OptionValue& value : options.PullValues("--uprobe")) {
1149     std::vector<std::string> cmds = android::base::Split(value.str_value, ",");
1150     for (const auto& cmd : cmds) {
1151       if (!probe_events.AddProbe(ProbeEventType::kUprobe, cmd)) {
1152         return false;
1153       }
1154     }
1155   }
1156 
1157   if (auto value = options.PullValue("-m"); value) {
1158     if (!IsPowerOfTwo(value->uint_value) ||
1159         value->uint_value > std::numeric_limits<size_t>::max()) {
1160       LOG(ERROR) << "Invalid mmap_pages: '" << value->uint_value << "'";
1161       return false;
1162     }
1163     mmap_page_range_.first = mmap_page_range_.second = value->uint_value;
1164   }
1165 
1166   allow_callchain_joiner_ = !options.PullBoolValue("--no-callchain-joiner");
1167   allow_truncating_samples_ = !options.PullBoolValue("--no-cut-samples");
1168   dump_build_id_ = !options.PullBoolValue("--no-dump-build-id");
1169   can_dump_kernel_symbols_ = !options.PullBoolValue("--no-dump-kernel-symbols");
1170   dump_symbols_ = !options.PullBoolValue("--no-dump-symbols");
1171   if (auto value = options.PullValue("--no-inherit"); value) {
1172     child_inherit_ = false;
1173   } else if (system_wide_collection_) {
1174     // child_inherit is used to monitor newly created threads. It isn't useful in system wide
1175     // collection, which monitors all threads running on selected cpus.
1176     child_inherit_ = false;
1177   }
1178   unwind_dwarf_callchain_ = !options.PullBoolValue("--no-unwind");
1179 
1180   if (auto value = options.PullValue("-o"); value) {
1181     record_filename_ = value->str_value;
1182   }
1183 
1184   if (auto value = options.PullValue("--out-fd"); value) {
1185     out_fd_.reset(static_cast<int>(value->uint_value));
1186   }
1187 
1188   if (auto strs = options.PullStringValues("-p"); !strs.empty()) {
1189     if (auto pids = GetPidsFromStrings(strs, true, true); pids) {
1190       event_selection_set_.AddMonitoredProcesses(pids.value());
1191     } else {
1192       return false;
1193     }
1194   }
1195 
1196   // Use explicit if statements instead of logical operators to avoid short-circuit.
1197   if (options.PullValue("--post-unwind")) {
1198     post_unwind_ = true;
1199   }
1200   if (options.PullValue("--post-unwind=yes")) {
1201     post_unwind_ = true;
1202   }
1203   if (options.PullValue("--post-unwind=no")) {
1204     post_unwind_ = false;
1205   }
1206 
1207   if (auto value = options.PullValue("--user-buffer-size"); value) {
1208     uint64_t v = value->uint_value;
1209     if (v > std::numeric_limits<size_t>::max() || v == 0) {
1210       LOG(ERROR) << "invalid user buffer size: " << v;
1211       return false;
1212     }
1213     user_buffer_size_ = static_cast<size_t>(v);
1214   }
1215 
1216   if (!options.PullUintValue("--size-limit", &size_limit_in_bytes_, 1)) {
1217     return false;
1218   }
1219 
1220   if (auto value = options.PullValue("--start_profiling_fd"); value) {
1221     start_profiling_fd_.reset(static_cast<int>(value->uint_value));
1222   }
1223 
1224   stdio_controls_profiling_ = options.PullBoolValue("--stdio-controls-profiling");
1225 
1226   if (auto value = options.PullValue("--stop-signal-fd"); value) {
1227     stop_signal_fd_.reset(static_cast<int>(value->uint_value));
1228   }
1229 
1230   if (auto value = options.PullValue("--symfs"); value) {
1231     if (!Dso::SetSymFsDir(value->str_value)) {
1232       return false;
1233     }
1234   }
1235 
1236   for (const OptionValue& value : options.PullValues("-t")) {
1237     if (auto tids = GetTidsFromString(value.str_value, true); tids) {
1238       event_selection_set_.AddMonitoredThreads(tids.value());
1239     } else {
1240       return false;
1241     }
1242   }
1243 
1244   trace_offcpu_ = options.PullBoolValue("--trace-offcpu");
1245 
1246   if (auto value = options.PullValue("--tracepoint-events"); value) {
1247     if (!EventTypeManager::Instance().ReadTracepointsFromFile(value->str_value)) {
1248       return false;
1249     }
1250   }
1251   use_cmd_exit_code_ = options.PullBoolValue("--use-cmd-exit-code");
1252 
1253   if (auto value = options.PullValue("-z"); value) {
1254     if (value->str_value.empty()) {
1255       // 3 is the default compression level of zstd library, in ZSTD_defaultCLevel().
1256       constexpr size_t DEFAULT_COMPRESSION_LEVEL = 3;
1257       compression_level_ = DEFAULT_COMPRESSION_LEVEL;
1258     } else {
1259       if (!android::base::ParseUint(value->str_value, &compression_level_) ||
1260           compression_level_ < 1 || compression_level_ > 22) {
1261         LOG(ERROR) << "invalid compression level for -z: " << value->str_value;
1262         return false;
1263       }
1264     }
1265   }
1266 
1267   CHECK(options.values.empty());
1268 
1269   bool check_event_type = true;
1270   if (!app_package_name_.empty() && !in_app_context_ && !IsRoot()) {
1271     // Defer event type checking when RunInAppContext() is called.
1272     check_event_type = false;
1273   }
1274 
1275   // Process ordered options.
1276   for (const auto& pair : ordered_options) {
1277     const OptionName& name = pair.first;
1278     const OptionValue& value = pair.second;
1279 
1280     if (name == "-c" || name == "-f") {
1281       if (value.uint_value < 1) {
1282         LOG(ERROR) << "invalid " << name << ": " << value.uint_value;
1283         return false;
1284       }
1285       SampleRate rate;
1286       if (name == "-c") {
1287         rate.sample_period = value.uint_value;
1288       } else {
1289         if (value.uint_value >= INT_MAX) {
1290           LOG(ERROR) << "sample freq can't be bigger than INT_MAX: " << value.uint_value;
1291           return false;
1292         }
1293         rate.sample_freq = value.uint_value;
1294         max_sample_freq_ = std::max(max_sample_freq_, rate.sample_freq);
1295       }
1296       event_selection_set_.SetSampleRateForNewEvents(rate);
1297 
1298     } else if (name == "--call-graph") {
1299       std::vector<std::string> strs = android::base::Split(value.str_value, ",");
1300       if (strs[0] == "fp") {
1301         fp_callchain_sampling_ = true;
1302         dwarf_callchain_sampling_ = false;
1303       } else if (strs[0] == "dwarf") {
1304         fp_callchain_sampling_ = false;
1305         dwarf_callchain_sampling_ = true;
1306         if (strs.size() > 1) {
1307           uint64_t size;
1308           if (!ParseUint(strs[1], &size)) {
1309             LOG(ERROR) << "invalid dump stack size in --call-graph option: " << strs[1];
1310             return false;
1311           }
1312           if ((size & 7) != 0) {
1313             LOG(ERROR) << "dump stack size " << size << " is not 8-byte aligned.";
1314             return false;
1315           }
1316           if (size >= MAX_DUMP_STACK_SIZE) {
1317             LOG(ERROR) << "dump stack size " << size << " is bigger than max allowed size "
1318                        << MAX_DUMP_STACK_SIZE << ".";
1319             return false;
1320           }
1321           dump_stack_size_in_dwarf_sampling_ = static_cast<uint32_t>(size);
1322         }
1323       }
1324 
1325     } else if (name == "--cpu") {
1326       if (auto cpus = GetCpusFromString(value.str_value); cpus) {
1327         event_selection_set_.SetCpusForNewEvents(
1328             std::vector<int>(cpus.value().begin(), cpus.value().end()));
1329       } else {
1330         return false;
1331       }
1332     } else if (name == "-e") {
1333       std::vector<std::string> event_types = android::base::Split(value.str_value, ",");
1334       for (auto& event_type : event_types) {
1335         if (!probe_events.CreateProbeEventIfNotExist(event_type)) {
1336           return false;
1337         }
1338         if (!event_selection_set_.AddEventType(event_type, check_event_type)) {
1339           return false;
1340         }
1341       }
1342     } else if (name == "-g") {
1343       fp_callchain_sampling_ = false;
1344       dwarf_callchain_sampling_ = true;
1345     } else if (name == "--group") {
1346       std::vector<std::string> event_types = android::base::Split(value.str_value, ",");
1347       for (const auto& event_type : event_types) {
1348         if (!probe_events.CreateProbeEventIfNotExist(event_type)) {
1349           return false;
1350         }
1351       }
1352       if (!event_selection_set_.AddEventGroup(event_types, check_event_type)) {
1353         return false;
1354       }
1355     } else if (name == "--tp-filter") {
1356       if (!event_selection_set_.SetTracepointFilter(value.str_value)) {
1357         return false;
1358       }
1359     } else {
1360       LOG(ERROR) << "unprocessed option: " << name;
1361       return false;
1362     }
1363   }
1364 
1365   if (!dwarf_callchain_sampling_) {
1366     if (!unwind_dwarf_callchain_) {
1367       LOG(ERROR) << "--no-unwind is only used with `--call-graph dwarf` option.";
1368       return false;
1369     }
1370     unwind_dwarf_callchain_ = false;
1371   }
1372   if (post_unwind_) {
1373     if (!dwarf_callchain_sampling_ || !unwind_dwarf_callchain_) {
1374       post_unwind_ = false;
1375     }
1376   }
1377 
1378   if (fp_callchain_sampling_) {
1379     if (GetTargetArch() == ARCH_ARM) {
1380       LOG(WARNING) << "`--callgraph fp` option doesn't work well on arm architecture, "
1381                    << "consider using `-g` option or profiling on aarch64 architecture.";
1382     }
1383   }
1384 
1385   if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
1386     LOG(ERROR) << "Record system wide and existing processes/threads can't be "
1387                   "used at the same time.";
1388     return false;
1389   }
1390 
1391   if (system_wide_collection_ && !IsRoot()) {
1392     LOG(ERROR) << "System wide profiling needs root privilege.";
1393     return false;
1394   }
1395 
1396   if (dump_symbols_ && can_dump_kernel_symbols_) {
1397     // No need to dump kernel symbols as we will dump all required symbols.
1398     can_dump_kernel_symbols_ = false;
1399   }
1400   if (clockid_.empty()) {
1401     clockid_ = IsSettingClockIdSupported() ? "monotonic" : "perf";
1402   }
1403   return true;
1404 }
1405 
AdjustPerfEventLimit()1406 bool RecordCommand::AdjustPerfEventLimit() {
1407   bool set_prop = false;
1408   // 1. Adjust max_sample_rate.
1409   uint64_t cur_max_freq;
1410   if (GetMaxSampleFrequency(&cur_max_freq) && cur_max_freq < max_sample_freq_ &&
1411       !SetMaxSampleFrequency(max_sample_freq_)) {
1412     set_prop = true;
1413   }
1414   // 2. Adjust perf_cpu_time_max_percent.
1415   size_t cur_percent;
1416   if (GetCpuTimeMaxPercent(&cur_percent) && cur_percent != cpu_time_max_percent_ &&
1417       !SetCpuTimeMaxPercent(cpu_time_max_percent_)) {
1418     set_prop = true;
1419   }
1420   // 3. Adjust perf_event_mlock_kb.
1421   long cpus = sysconf(_SC_NPROCESSORS_CONF);
1422   uint64_t mlock_kb = cpus * (mmap_page_range_.second + 1) * 4;
1423   if (event_selection_set_.HasAuxTrace()) {
1424     mlock_kb += cpus * aux_buffer_size_ / 1024;
1425   }
1426   uint64_t cur_mlock_kb;
1427   if (GetPerfEventMlockKb(&cur_mlock_kb) && cur_mlock_kb < mlock_kb &&
1428       !SetPerfEventMlockKb(mlock_kb)) {
1429     set_prop = true;
1430   }
1431 
1432   if (GetAndroidVersion() >= kAndroidVersionQ && set_prop && !in_app_context_) {
1433     return SetPerfEventLimits(std::max(max_sample_freq_, cur_max_freq), cpu_time_max_percent_,
1434                               std::max(mlock_kb, cur_mlock_kb));
1435   }
1436   return true;
1437 }
1438 
TraceOffCpu()1439 bool RecordCommand::TraceOffCpu() {
1440   if (FindEventTypeByName("sched:sched_switch") == nullptr) {
1441     LOG(ERROR) << "Can't trace off cpu because sched:sched_switch event is not available";
1442     return false;
1443   }
1444   for (auto& event_type : event_selection_set_.GetTracepointEvents()) {
1445     if (event_type->name == "sched:sched_switch") {
1446       LOG(ERROR) << "Trace offcpu can't be used together with sched:sched_switch event";
1447       return false;
1448     }
1449   }
1450   if (!IsDumpingRegsForTracepointEventsSupported()) {
1451     LOG(ERROR) << "Dumping regs for tracepoint events is not supported by the kernel";
1452     return false;
1453   }
1454   // --trace-offcpu option only works with one of the selected event types.
1455   std::set<std::string> accepted_events = {"cpu-clock", "task-clock"};
1456   std::vector<const EventType*> events = event_selection_set_.GetEvents();
1457   if (events.size() != 1 || accepted_events.find(events[0]->name) == accepted_events.end()) {
1458     LOG(ERROR) << "--trace-offcpu option only works with one of events "
1459                << android::base::Join(accepted_events, ' ');
1460     return false;
1461   }
1462   if (!event_selection_set_.AddEventType("sched:sched_switch", SampleRate(0, 1))) {
1463     return false;
1464   }
1465   if (IsSwitchRecordSupported()) {
1466     event_selection_set_.EnableSwitchRecord();
1467   }
1468   return true;
1469 }
1470 
SetEventSelectionFlags()1471 bool RecordCommand::SetEventSelectionFlags() {
1472   event_selection_set_.SampleIdAll();
1473   if (!event_selection_set_.SetBranchSampling(branch_sampling_)) {
1474     return false;
1475   }
1476   if (fp_callchain_sampling_) {
1477     event_selection_set_.EnableFpCallChainSampling();
1478   } else if (dwarf_callchain_sampling_) {
1479     if (!event_selection_set_.EnableDwarfCallChainSampling(dump_stack_size_in_dwarf_sampling_)) {
1480       return false;
1481     }
1482   }
1483   event_selection_set_.SetInherit(child_inherit_);
1484   if (clockid_ != "perf") {
1485     event_selection_set_.SetClockId(clockid_map[clockid_]);
1486   }
1487   return true;
1488 }
1489 
CreateAndInitRecordFile()1490 bool RecordCommand::CreateAndInitRecordFile() {
1491   EventAttrIds attrs = event_selection_set_.GetEventAttrWithId();
1492   bool remove_regs_and_stacks = unwind_dwarf_callchain_ && !post_unwind_;
1493   if (remove_regs_and_stacks) {
1494     for (auto& attr : attrs) {
1495       ReplaceRegAndStackWithCallChain(attr.attr);
1496     }
1497   }
1498   record_file_writer_ = CreateRecordFile(record_filename_, attrs);
1499   if (record_file_writer_ == nullptr) {
1500     return false;
1501   }
1502   // Use first perf_event_attr and first event id to dump mmap and comm records.
1503   CHECK(!attrs.empty());
1504   dumping_attr_id_ = attrs[0];
1505   CHECK(!dumping_attr_id_.ids.empty());
1506   map_record_reader_.emplace(dumping_attr_id_.attr, dumping_attr_id_.ids[0],
1507                              event_selection_set_.RecordNotExecutableMaps());
1508   map_record_reader_->SetCallback([this](Record* r) { return ProcessRecord(r); });
1509 
1510   return DumpKernelSymbol() && DumpTracingData() && DumpMaps() && DumpAuxTraceInfo();
1511 }
1512 
CreateRecordFile(const std::string & filename,const EventAttrIds & attrs)1513 std::unique_ptr<RecordFileWriter> RecordCommand::CreateRecordFile(const std::string& filename,
1514                                                                   const EventAttrIds& attrs) {
1515   std::unique_ptr<RecordFileWriter> writer = RecordFileWriter::CreateInstance(filename);
1516   if (!writer) {
1517     return nullptr;
1518   }
1519   if (compression_level_ != 0 && !writer->SetCompressionLevel(compression_level_)) {
1520     return nullptr;
1521   }
1522   if (!writer->WriteAttrSection(attrs)) {
1523     return nullptr;
1524   }
1525   return writer;
1526 }
1527 
DumpKernelSymbol()1528 bool RecordCommand::DumpKernelSymbol() {
1529   if (can_dump_kernel_symbols_) {
1530     if (event_selection_set_.NeedKernelSymbol()) {
1531       std::string kallsyms;
1532       if (!LoadKernelSymbols(&kallsyms)) {
1533         // Symbol loading may have failed due to the lack of permissions. This
1534         // is not fatal, the symbols will appear as "unknown".
1535         return true;
1536       }
1537       KernelSymbolRecord r(kallsyms);
1538       if (!ProcessRecord(&r)) {
1539         return false;
1540       }
1541     }
1542   }
1543   return true;
1544 }
1545 
DumpTracingData()1546 bool RecordCommand::DumpTracingData() {
1547   std::vector<const EventType*> tracepoint_event_types = event_selection_set_.GetTracepointEvents();
1548   if (tracepoint_event_types.empty() || !CanRecordRawData() || in_app_context_) {
1549     return true;  // No need to dump tracing data, or can't do it.
1550   }
1551   std::vector<char> tracing_data;
1552   if (!GetTracingData(tracepoint_event_types, &tracing_data)) {
1553     return false;
1554   }
1555   TracingDataRecord record(tracing_data);
1556   if (!ProcessRecord(&record)) {
1557     return false;
1558   }
1559   return true;
1560 }
1561 
DumpMaps()1562 bool RecordCommand::DumpMaps() {
1563   if (system_wide_collection_) {
1564     // For system wide recording:
1565     //   If not aux tracing, only dump kernel maps. Maps of a process is dumped when needed (the
1566     //   first time a sample hits that process).
1567     //   If aux tracing with decoding etm data, the maps are dumped by etm_branch_list_generator.
1568     //   If aux tracing without decoding etm data, we don't know which maps will be needed, so dump
1569     //   all process maps. To reduce pre recording time, we dump process maps in map record thread
1570     //   while recording.
1571     if (event_selection_set_.HasAuxTrace() && !etm_branch_list_generator_) {
1572       map_record_thread_.emplace(*map_record_reader_);
1573       return true;
1574     }
1575     if (!event_selection_set_.ExcludeKernel()) {
1576       return map_record_reader_->ReadKernelMaps();
1577     }
1578     return true;
1579   }
1580   if (!event_selection_set_.ExcludeKernel() && !map_record_reader_->ReadKernelMaps()) {
1581     return false;
1582   }
1583   // Map from process id to a set of thread ids in that process.
1584   std::unordered_map<pid_t, std::unordered_set<pid_t>> process_map;
1585   for (pid_t pid : event_selection_set_.GetMonitoredProcesses()) {
1586     std::vector<pid_t> tids = GetThreadsInProcess(pid);
1587     process_map[pid].insert(tids.begin(), tids.end());
1588   }
1589   for (pid_t tid : event_selection_set_.GetMonitoredThreads()) {
1590     pid_t pid;
1591     if (GetProcessForThread(tid, &pid)) {
1592       process_map[pid].insert(tid);
1593     }
1594   }
1595 
1596   // Dump each process.
1597   for (const auto& [pid, tids] : process_map) {
1598     if (!map_record_reader_->ReadProcessMaps(pid, tids, 0)) {
1599       return false;
1600     }
1601   }
1602   return true;
1603 }
1604 
ProcessRecord(Record * record)1605 bool RecordCommand::ProcessRecord(Record* record) {
1606   UpdateRecord(record);
1607   if (ShouldOmitRecord(record)) {
1608     return true;
1609   }
1610   if (size_limit_in_bytes_ > 0u) {
1611     if (size_limit_in_bytes_ < record_file_writer_->GetDataSectionSize()) {
1612       return event_selection_set_.GetIOEventLoop()->ExitLoop();
1613     }
1614   }
1615   if (jit_debug_reader_ && !jit_debug_reader_->UpdateRecord(record)) {
1616     return false;
1617   }
1618   last_record_timestamp_ = std::max(last_record_timestamp_, record->Timestamp());
1619   // In system wide recording, maps are dumped when they are needed by records.
1620   if (system_wide_collection_ && !DumpMapsForRecord(record)) {
1621     return false;
1622   }
1623   // Record filter check should go after DumpMapsForRecord(). Otherwise, process/thread name
1624   // filters don't work in system wide collection.
1625   if (record->type() == PERF_RECORD_SAMPLE) {
1626     if (!record_filter_.Check(static_cast<SampleRecord&>(*record))) {
1627       return true;
1628     }
1629   }
1630   if (etm_branch_list_generator_) {
1631     bool consumed = false;
1632     if (!etm_branch_list_generator_->ProcessRecord(*record, consumed)) {
1633       return false;
1634     }
1635     if (consumed) {
1636       return true;
1637     }
1638   }
1639   if (unwind_dwarf_callchain_) {
1640     if (post_unwind_) {
1641       return SaveRecordForPostUnwinding(record);
1642     }
1643     return SaveRecordAfterUnwinding(record);
1644   }
1645   return SaveRecordWithoutUnwinding(record);
1646 }
1647 
DumpAuxTraceInfo()1648 bool RecordCommand::DumpAuxTraceInfo() {
1649   if (event_selection_set_.HasAuxTrace()) {
1650     AuxTraceInfoRecord auxtrace_info = ETMRecorder::GetInstance().CreateAuxTraceInfoRecord();
1651     return ProcessRecord(&auxtrace_info);
1652   }
1653   return true;
1654 }
1655 
1656 template <typename MmapRecordType>
MapOnlyExistInMemory(MmapRecordType * record)1657 bool MapOnlyExistInMemory(MmapRecordType* record) {
1658   return !record->InKernel() && MappedFileOnlyExistInMemory(record->filename);
1659 }
1660 
ShouldOmitRecord(Record * record)1661 bool RecordCommand::ShouldOmitRecord(Record* record) {
1662   if (jit_debug_reader_) {
1663     // To profile jitted Java code, we need PROT_JIT_SYMFILE_MAP maps not overlapped by maps for
1664     // [anon:dalvik-jit-code-cache]. To profile interpreted Java code, we record maps that
1665     // are not executable. Some non-exec maps (like those for stack, heap) provide misleading map
1666     // entries for unwinding, as in http://b/77236599. So it is better to remove
1667     // dalvik-jit-code-cache and other maps that only exist in memory.
1668     switch (record->type()) {
1669       case PERF_RECORD_MMAP:
1670         return MapOnlyExistInMemory(static_cast<MmapRecord*>(record));
1671       case PERF_RECORD_MMAP2:
1672         return MapOnlyExistInMemory(static_cast<Mmap2Record*>(record));
1673     }
1674   }
1675   return false;
1676 }
1677 
DumpMapsForRecord(Record * record)1678 bool RecordCommand::DumpMapsForRecord(Record* record) {
1679   if (record->type() == PERF_RECORD_SAMPLE) {
1680     pid_t pid = static_cast<SampleRecord*>(record)->tid_data.pid;
1681     if (dumped_processes_.find(pid) == dumped_processes_.end()) {
1682       // Dump map info and all thread names for that process.
1683       if (!map_record_reader_->ReadProcessMaps(pid, last_record_timestamp_)) {
1684         return false;
1685       }
1686       dumped_processes_.insert(pid);
1687     }
1688   }
1689   return true;
1690 }
1691 
SaveRecordForPostUnwinding(Record * record)1692 bool RecordCommand::SaveRecordForPostUnwinding(Record* record) {
1693   if (!record_file_writer_->WriteRecord(*record)) {
1694     LOG(ERROR) << "If there isn't enough space for storing profiling data, consider using "
1695                << "--no-post-unwind option.";
1696     return false;
1697   }
1698   return true;
1699 }
1700 
SaveRecordAfterUnwinding(Record * record)1701 bool RecordCommand::SaveRecordAfterUnwinding(Record* record) {
1702   if (record->type() == PERF_RECORD_SAMPLE) {
1703     auto& r = *static_cast<SampleRecord*>(record);
1704     // AdjustCallChainGeneratedByKernel() should go before UnwindRecord(). Because we don't want
1705     // to adjust callchains generated by dwarf unwinder.
1706     r.AdjustCallChainGeneratedByKernel();
1707     if (!UnwindRecord(r)) {
1708       return false;
1709     }
1710     // ExcludeKernelCallChain() should go after UnwindRecord() to notice the generated user call
1711     // chain.
1712     if (r.InKernel() && exclude_kernel_callchain_ && !r.ExcludeKernelCallChain()) {
1713       // If current record contains no user callchain, skip it.
1714       return true;
1715     }
1716     sample_record_count_++;
1717   } else {
1718     thread_tree_.Update(*record);
1719   }
1720   return record_file_writer_->WriteRecord(*record);
1721 }
1722 
SaveRecordWithoutUnwinding(Record * record)1723 bool RecordCommand::SaveRecordWithoutUnwinding(Record* record) {
1724   if (record->type() == PERF_RECORD_SAMPLE) {
1725     auto& r = *static_cast<SampleRecord*>(record);
1726     if (fp_callchain_sampling_ || dwarf_callchain_sampling_) {
1727       r.AdjustCallChainGeneratedByKernel();
1728     }
1729     if (r.InKernel() && exclude_kernel_callchain_ && !r.ExcludeKernelCallChain()) {
1730       // If current record contains no user callchain, skip it.
1731       return true;
1732     }
1733     sample_record_count_++;
1734   }
1735   return record_file_writer_->WriteRecord(*record);
1736 }
1737 
ProcessJITDebugInfo(std::vector<JITDebugInfo> debug_info,bool sync_kernel_records)1738 bool RecordCommand::ProcessJITDebugInfo(std::vector<JITDebugInfo> debug_info,
1739                                         bool sync_kernel_records) {
1740   for (auto& info : debug_info) {
1741     if (info.type == JITDebugInfo::JIT_DEBUG_JIT_CODE) {
1742       uint64_t timestamp =
1743           jit_debug_reader_->SyncWithRecords() ? info.timestamp : last_record_timestamp_;
1744       Mmap2Record record(dumping_attr_id_.attr, false, info.pid, info.pid, info.jit_code_addr,
1745                          info.jit_code_len, info.file_offset, map_flags::PROT_JIT_SYMFILE_MAP,
1746                          info.file_path, dumping_attr_id_.ids[0], timestamp);
1747       if (!ProcessRecord(&record)) {
1748         return false;
1749       }
1750     } else {
1751       if (!info.symbols.empty()) {
1752         Dso* dso = thread_tree_.FindUserDsoOrNew(info.file_path, 0, DSO_DEX_FILE);
1753         dso->SetSymbols(&info.symbols);
1754       }
1755       if (info.dex_file_map) {
1756         ThreadMmap& map = *info.dex_file_map;
1757         uint64_t timestamp =
1758             jit_debug_reader_->SyncWithRecords() ? info.timestamp : last_record_timestamp_;
1759         Mmap2Record record(dumping_attr_id_.attr, false, info.pid, info.pid, map.start_addr,
1760                            map.len, map.pgoff, map.prot, map.name, dumping_attr_id_.ids[0],
1761                            timestamp);
1762         if (!ProcessRecord(&record)) {
1763           return false;
1764         }
1765       }
1766       thread_tree_.AddDexFileOffset(info.file_path, info.dex_file_offset);
1767     }
1768   }
1769   // We want to let samples see the most recent JIT maps generated before them, but no JIT maps
1770   // generated after them. So process existing samples each time generating new JIT maps. We prefer
1771   // to process samples after processing JIT maps. Because some of the samples may hit the new JIT
1772   // maps, and we want to report them properly.
1773   if (sync_kernel_records && !event_selection_set_.SyncKernelBuffer()) {
1774     return false;
1775   }
1776   return true;
1777 }
1778 
ProcessControlCmd(IOEventLoop * loop)1779 bool RecordCommand::ProcessControlCmd(IOEventLoop* loop) {
1780   char* line = nullptr;
1781   size_t line_length = 0;
1782   if (getline(&line, &line_length, stdin) == -1) {
1783     free(line);
1784     // When the simpleperf Java API destroys the simpleperf process, it also closes the stdin pipe.
1785     // So we may see EOF of stdin.
1786     return loop->ExitLoop();
1787   }
1788   std::string cmd = android::base::Trim(line);
1789   free(line);
1790   LOG(DEBUG) << "process control cmd: " << cmd;
1791   bool result = false;
1792   if (cmd == "pause") {
1793     result = event_selection_set_.SetEnableEvents(false);
1794   } else if (cmd == "resume") {
1795     result = event_selection_set_.SetEnableEvents(true);
1796   } else {
1797     LOG(ERROR) << "unknown control cmd: " << cmd;
1798   }
1799   printf("%s\n", result ? "ok" : "error");
1800   fflush(stdout);
1801   return result;
1802 }
1803 
1804 template <class RecordType>
UpdateMmapRecordForEmbeddedPath(RecordType & r,bool has_prot,uint32_t prot)1805 void UpdateMmapRecordForEmbeddedPath(RecordType& r, bool has_prot, uint32_t prot) {
1806   if (r.InKernel()) {
1807     return;
1808   }
1809   std::string filename = r.filename;
1810   bool name_changed = false;
1811   // Some vdex files in map files are marked with deleted flag, but they exist in the file
1812   // system.
1813   // It may be because a new file is used to replace the old one, but still worth to try.
1814   if (android::base::EndsWith(filename, " (deleted)")) {
1815     filename.resize(filename.size() - 10);
1816     name_changed = true;
1817   }
1818   if (r.data->pgoff != 0 && (!has_prot || (prot & PROT_EXEC))) {
1819     // For the case of a shared library "foobar.so" embedded
1820     // inside an APK, we rewrite the original MMAP from
1821     // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W]
1822     // so as to make the library name explicit. This update is
1823     // done here (as part of the record operation) as opposed to
1824     // on the host during the report, since we want to report
1825     // the correct library name even if the the APK in question
1826     // is not present on the host. The new offset W is
1827     // calculated to be with respect to the start of foobar.so,
1828     // not to the start of path.apk.
1829     EmbeddedElf* ee = ApkInspector::FindElfInApkByOffset(filename, r.data->pgoff);
1830     if (ee != nullptr) {
1831       // Compute new offset relative to start of elf in APK.
1832       auto data = *r.data;
1833       data.pgoff -= ee->entry_offset();
1834       r.SetDataAndFilename(data, GetUrlInApk(filename, ee->entry_name()));
1835       return;
1836     }
1837   }
1838   std::string zip_path;
1839   std::string entry_name;
1840   if (ParseExtractedInMemoryPath(filename, &zip_path, &entry_name)) {
1841     filename = GetUrlInApk(zip_path, entry_name);
1842     name_changed = true;
1843   }
1844   if (name_changed) {
1845     auto data = *r.data;
1846     r.SetDataAndFilename(data, filename);
1847   }
1848 }
1849 
UpdateRecord(Record * record)1850 void RecordCommand::UpdateRecord(Record* record) {
1851   if (record->type() == PERF_RECORD_MMAP) {
1852     UpdateMmapRecordForEmbeddedPath(*static_cast<MmapRecord*>(record), false, 0);
1853   } else if (record->type() == PERF_RECORD_MMAP2) {
1854     auto r = static_cast<Mmap2Record*>(record);
1855     UpdateMmapRecordForEmbeddedPath(*r, true, r->data->prot);
1856   } else if (record->type() == PERF_RECORD_COMM) {
1857     auto r = static_cast<CommRecord*>(record);
1858     if (r->data->pid == r->data->tid) {
1859       std::string s = GetCompleteProcessName(r->data->pid);
1860       if (!s.empty()) {
1861         r->SetCommandName(s);
1862       }
1863     }
1864   }
1865 }
1866 
UnwindRecord(SampleRecord & r)1867 bool RecordCommand::UnwindRecord(SampleRecord& r) {
1868   if (!(r.sample_type & PERF_SAMPLE_CALLCHAIN) && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
1869       (r.regs_user_data.reg_mask != 0) && (r.sample_type & PERF_SAMPLE_STACK_USER)) {
1870     return true;
1871   }
1872   if (r.GetValidStackSize() > 0) {
1873     ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
1874     RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
1875     std::vector<uint64_t> ips;
1876     std::vector<uint64_t> sps;
1877     if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
1878                                             r.GetValidStackSize(), &ips, &sps)) {
1879       return false;
1880     }
1881     // The unwinding may fail if JIT debug info isn't the latest. In this case, read JIT debug info
1882     // from the process and retry unwinding.
1883     if (jit_debug_reader_ && !post_unwind_ &&
1884         offline_unwinder_->IsCallChainBrokenForIncompleteJITDebugInfo()) {
1885       jit_debug_reader_->ReadProcess(r.tid_data.pid);
1886       jit_debug_reader_->FlushDebugInfo(r.Timestamp());
1887       if (!offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
1888                                               r.GetValidStackSize(), &ips, &sps)) {
1889         return false;
1890       }
1891     }
1892     if (keep_failed_unwinding_result_ && !KeepFailedUnwindingResult(r, ips, sps)) {
1893       return false;
1894     }
1895     r.ReplaceRegAndStackWithCallChain(ips);
1896     if (callchain_joiner_ &&
1897         !callchain_joiner_->AddCallChain(r.tid_data.pid, r.tid_data.tid,
1898                                          CallChainJoiner::ORIGINAL_OFFLINE, ips, sps)) {
1899       return false;
1900     }
1901   } else {
1902     // For kernel samples, we still need to remove user stack and register fields.
1903     r.ReplaceRegAndStackWithCallChain({});
1904   }
1905   return true;
1906 }
1907 
KeepFailedUnwindingResult(const SampleRecord & r,const std::vector<uint64_t> & ips,const std::vector<uint64_t> & sps)1908 bool RecordCommand::KeepFailedUnwindingResult(const SampleRecord& r,
1909                                               const std::vector<uint64_t>& ips,
1910                                               const std::vector<uint64_t>& sps) {
1911   auto& result = offline_unwinder_->GetUnwindingResult();
1912   if (result.error_code != unwindstack::ERROR_NONE) {
1913     if (keep_failed_unwinding_debug_info_) {
1914       return record_file_writer_->WriteRecord(UnwindingResultRecord(
1915           r.time_data.time, result, r.regs_user_data, r.stack_user_data, ips, sps));
1916     }
1917     return record_file_writer_->WriteRecord(
1918         UnwindingResultRecord(r.time_data.time, result, {}, {}, {}, {}));
1919   }
1920   return true;
1921 }
1922 
MoveRecordFile(const std::string & old_filename)1923 std::unique_ptr<RecordFileReader> RecordCommand::MoveRecordFile(const std::string& old_filename) {
1924   if (!record_file_writer_->FinishWritingDataSection() || !record_file_writer_->Close()) {
1925     return nullptr;
1926   }
1927   record_file_writer_.reset();
1928   std::error_code ec;
1929   std::filesystem::rename(record_filename_, old_filename, ec);
1930   if (ec) {
1931     LOG(DEBUG) << "Failed to rename: " << ec.message();
1932     // rename() fails on Android N x86 emulator, which uses kernel 3.10. Because rename() in bionic
1933     // uses renameat2 syscall, which isn't support on kernel < 3.15. So add a fallback to mv
1934     // command. The mv command can also work with other situations when rename() doesn't work.
1935     // So we'd like to keep it as a fallback to rename().
1936     if (!Workload::RunCmd({"mv", record_filename_, old_filename})) {
1937       return nullptr;
1938     }
1939   }
1940 
1941   auto reader = RecordFileReader::CreateInstance(old_filename);
1942   if (!reader) {
1943     return nullptr;
1944   }
1945 
1946   record_file_writer_ = CreateRecordFile(record_filename_, reader->AttrSection());
1947   if (!record_file_writer_) {
1948     return nullptr;
1949   }
1950   return reader;
1951 }
1952 
PostUnwindRecords()1953 bool RecordCommand::PostUnwindRecords() {
1954   auto tmp_file = ScopedTempFiles::CreateTempFile();
1955   auto reader = MoveRecordFile(tmp_file->path);
1956   if (!reader) {
1957     return false;
1958   }
1959   // Write new event attrs without regs and stacks fields.
1960   EventAttrIds attrs = reader->AttrSection();
1961   for (auto& attr : attrs) {
1962     ReplaceRegAndStackWithCallChain(attr.attr);
1963   }
1964   if (!record_file_writer_->WriteAttrSection(attrs)) {
1965     return false;
1966   }
1967 
1968   sample_record_count_ = 0;
1969   auto callback = [this](std::unique_ptr<Record> record) {
1970     return SaveRecordAfterUnwinding(record.get());
1971   };
1972   return reader->ReadDataSection(callback);
1973 }
1974 
JoinCallChains()1975 bool RecordCommand::JoinCallChains() {
1976   // 1. Prepare joined callchains.
1977   if (!callchain_joiner_->JoinCallChains()) {
1978     return false;
1979   }
1980   // 2. Move records from record_filename_ to a temporary file.
1981   auto tmp_file = ScopedTempFiles::CreateTempFile();
1982   auto reader = MoveRecordFile(tmp_file->path);
1983   if (!reader) {
1984     return false;
1985   }
1986 
1987   // 3. Read records from the temporary file, and write record with joined call chains back
1988   // to record_filename_.
1989   auto record_callback = [&](std::unique_ptr<Record> r) {
1990     if (r->type() != PERF_RECORD_SAMPLE) {
1991       return record_file_writer_->WriteRecord(*r);
1992     }
1993     SampleRecord& sr = *static_cast<SampleRecord*>(r.get());
1994     if (!sr.HasUserCallChain()) {
1995       return record_file_writer_->WriteRecord(sr);
1996     }
1997     pid_t pid;
1998     pid_t tid;
1999     CallChainJoiner::ChainType type;
2000     std::vector<uint64_t> ips;
2001     std::vector<uint64_t> sps;
2002     if (!callchain_joiner_->GetNextCallChain(pid, tid, type, ips, sps)) {
2003       return false;
2004     }
2005     CHECK_EQ(type, CallChainJoiner::JOINED_OFFLINE);
2006     CHECK_EQ(pid, static_cast<pid_t>(sr.tid_data.pid));
2007     CHECK_EQ(tid, static_cast<pid_t>(sr.tid_data.tid));
2008     sr.UpdateUserCallChain(ips);
2009     return record_file_writer_->WriteRecord(sr);
2010   };
2011   return reader->ReadDataSection(record_callback);
2012 }
2013 
LoadSymbolMapFile(int pid,const std::string & package,ThreadTree * thread_tree)2014 static void LoadSymbolMapFile(int pid, const std::string& package, ThreadTree* thread_tree) {
2015   // On Linux, symbol map files usually go to /tmp/perf-<pid>.map
2016   // On Android, use /tmp/perf-<pid>.map and /data/local/tmp/perf-<pid>.map, which works for
2017   // standalone programs, and /data/data/<package>/perf-<pid>.map, which works for apps.
2018   std::vector<Symbol> symbols;
2019   std::string filename = android::base::StringPrintf("perf-%d.map", pid);
2020   if (package.empty()) {
2021     symbols = ReadSymbolMapFromFile("/tmp/" + filename);
2022     if (symbols.empty()) {
2023       symbols = ReadSymbolMapFromFile("/data/local/tmp/" + filename);
2024     }
2025   } else {
2026     symbols = ReadSymbolMapFromFile("/data/data/" + package + "/" + filename);
2027   }
2028   if (!symbols.empty()) {
2029     thread_tree->AddSymbolsForProcess(pid, &symbols);
2030   }
2031 }
2032 
DumpAdditionalFeatures(const std::vector<std::string> & args)2033 bool RecordCommand::DumpAdditionalFeatures(const std::vector<std::string>& args) {
2034   // Read data section of perf.data to collect hit file information.
2035   thread_tree_.ClearThreadAndMap();
2036   bool kernel_symbols_available = false;
2037   std::string kallsyms;
2038   if (event_selection_set_.NeedKernelSymbol() && LoadKernelSymbols(&kallsyms)) {
2039     Dso::SetKallsyms(kallsyms);
2040     kernel_symbols_available = true;
2041   }
2042   std::unordered_set<int> loaded_symbol_maps;
2043   const std::vector<uint64_t>& auxtrace_offset = record_file_writer_->AuxTraceRecordOffsets();
2044   std::unordered_set<Dso*> debug_unwinding_files;
2045   bool failed_unwinding_sample = false;
2046 
2047   auto callback = [&](const Record* r) {
2048     thread_tree_.Update(*r);
2049     if (r->type() == PERF_RECORD_SAMPLE) {
2050       auto sample = reinterpret_cast<const SampleRecord*>(r);
2051       // Symbol map files are available after recording. Load one for the process.
2052       if (loaded_symbol_maps.insert(sample->tid_data.pid).second) {
2053         LoadSymbolMapFile(sample->tid_data.pid, app_package_name_, &thread_tree_);
2054       }
2055       if (failed_unwinding_sample) {
2056         failed_unwinding_sample = false;
2057         CollectHitFileInfo(*sample, &debug_unwinding_files);
2058       } else {
2059         CollectHitFileInfo(*sample, nullptr);
2060       }
2061     } else if (r->type() == SIMPLE_PERF_RECORD_UNWINDING_RESULT) {
2062       failed_unwinding_sample = true;
2063     }
2064   };
2065 
2066   if (map_record_thread_) {
2067     if (!map_record_thread_->Join()) {
2068       return false;
2069     }
2070     // If not dumping build id, we only need to read kernel maps, to dump kernel module addresses
2071     // in file feature section.
2072     if (!map_record_thread_->ReadMapRecords(callback, !dump_build_id_)) {
2073       return false;
2074     }
2075   }
2076 
2077   // We don't need to read data section when recording ETM data and not need to dump build ids.
2078   bool read_data_section = true;
2079   if (event_selection_set_.HasAuxTrace() && !dump_build_id_) {
2080     read_data_section = false;
2081   }
2082 
2083   if (read_data_section && !record_file_writer_->ReadDataSection(callback)) {
2084     return false;
2085   }
2086 
2087   size_t feature_count = 5;
2088   if (dump_build_id_) {
2089     feature_count++;
2090   }
2091   if (branch_sampling_) {
2092     feature_count++;
2093   }
2094   if (!auxtrace_offset.empty()) {
2095     feature_count++;
2096   }
2097   if (keep_failed_unwinding_debug_info_) {
2098     feature_count += 2;
2099   }
2100   if (etm_branch_list_generator_) {
2101     feature_count++;
2102   }
2103   if (map_record_thread_) {
2104     feature_count++;
2105   }
2106   if (!record_file_writer_->BeginWriteFeatures(feature_count)) {
2107     return false;
2108   }
2109   if (dump_build_id_ && !DumpBuildIdFeature()) {
2110     return false;
2111   }
2112   if (!DumpFileFeature()) {
2113     return false;
2114   }
2115   utsname uname_buf;
2116   if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) {
2117     PLOG(ERROR) << "uname() failed";
2118     return false;
2119   }
2120   if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE, uname_buf.release)) {
2121     return false;
2122   }
2123   if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH, uname_buf.machine)) {
2124     return false;
2125   }
2126 
2127   std::string exec_path = android::base::GetExecutablePath();
2128   if (exec_path.empty()) exec_path = "simpleperf";
2129   std::vector<std::string> cmdline;
2130   cmdline.push_back(exec_path);
2131   cmdline.push_back("record");
2132   cmdline.insert(cmdline.end(), args.begin(), args.end());
2133   if (!record_file_writer_->WriteCmdlineFeature(cmdline)) {
2134     return false;
2135   }
2136   if (branch_sampling_ != 0 && !record_file_writer_->WriteBranchStackFeature()) {
2137     return false;
2138   }
2139   if (!DumpMetaInfoFeature(kernel_symbols_available)) {
2140     return false;
2141   }
2142   if (!auxtrace_offset.empty() && !record_file_writer_->WriteAuxTraceFeature(auxtrace_offset)) {
2143     return false;
2144   }
2145   if (keep_failed_unwinding_debug_info_ && !DumpDebugUnwindFeature(debug_unwinding_files)) {
2146     return false;
2147   }
2148   if (etm_branch_list_generator_ && !DumpETMBranchListFeature()) {
2149     return false;
2150   }
2151   if (map_record_thread_ && !DumpInitMapFeature()) {
2152     return false;
2153   }
2154 
2155   if (!record_file_writer_->EndWriteFeatures()) {
2156     return false;
2157   }
2158   return true;
2159 }
2160 
DumpBuildIdFeature()2161 bool RecordCommand::DumpBuildIdFeature() {
2162   std::vector<BuildIdRecord> build_id_records;
2163   BuildId build_id;
2164   std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
2165   for (Dso* dso : dso_v) {
2166     // For aux tracing, we don't know which binaries are traced.
2167     // So dump build ids for all binaries.
2168     if (!dso->HasDumpId() && !event_selection_set_.HasAuxTrace()) {
2169       continue;
2170     }
2171     if (GetBuildId(*dso, build_id)) {
2172       bool in_kernel = dso->type() == DSO_KERNEL || dso->type() == DSO_KERNEL_MODULE;
2173       build_id_records.emplace_back(in_kernel, UINT_MAX, build_id, dso->Path());
2174     }
2175   }
2176   if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) {
2177     return false;
2178   }
2179   return true;
2180 }
2181 
DumpFileFeature()2182 bool RecordCommand::DumpFileFeature() {
2183   std::vector<Dso*> dso_v = thread_tree_.GetAllDsos();
2184   // To parse ETM data for kernel modules, we need to dump memory address for kernel modules.
2185   if (event_selection_set_.HasAuxTrace() && !event_selection_set_.ExcludeKernel()) {
2186     for (Dso* dso : dso_v) {
2187       if (dso->type() == DSO_KERNEL_MODULE) {
2188         dso->CreateDumpId();
2189       }
2190     }
2191   }
2192   return record_file_writer_->WriteFileFeatures(dso_v);
2193 }
2194 
DumpMetaInfoFeature(bool kernel_symbols_available)2195 bool RecordCommand::DumpMetaInfoFeature(bool kernel_symbols_available) {
2196   std::unordered_map<std::string, std::string> info_map = extra_meta_info_;
2197   info_map["simpleperf_version"] = GetSimpleperfVersion();
2198   info_map["system_wide_collection"] = system_wide_collection_ ? "true" : "false";
2199   info_map["trace_offcpu"] = trace_offcpu_ ? "true" : "false";
2200   // By storing event types information in perf.data, the readers of perf.data have the same
2201   // understanding of event types, even if they are on another machine.
2202   info_map["event_type_info"] = ScopedEventTypes::BuildString(event_selection_set_.GetEvents());
2203 #if defined(__ANDROID__)
2204   info_map["product_props"] = android::base::StringPrintf(
2205       "%s:%s:%s", android::base::GetProperty("ro.product.manufacturer", "").c_str(),
2206       android::base::GetProperty("ro.product.model", "").c_str(),
2207       android::base::GetProperty("ro.product.name", "").c_str());
2208   info_map["android_version"] = android::base::GetProperty("ro.build.version.release", "");
2209   info_map["android_sdk_version"] = android::base::GetProperty("ro.build.version.sdk", "");
2210   info_map["android_build_type"] = android::base::GetProperty("ro.build.type", "");
2211   info_map["android_build_fingerprint"] = android::base::GetProperty("ro.build.fingerprint", "");
2212   utsname un;
2213   if (uname(&un) == 0) {
2214     info_map["kernel_version"] = un.release;
2215   }
2216   if (!app_package_name_.empty()) {
2217     info_map["app_package_name"] = app_package_name_;
2218     if (IsRoot()) {
2219       info_map["app_type"] = GetAppType(app_package_name_);
2220     }
2221   }
2222   if (event_selection_set_.HasAuxTrace()) {
2223     // used by --exclude-perf in cmd_inject.cpp
2224     info_map["recording_process"] = std::to_string(getpid());
2225   }
2226 #endif
2227   info_map["clockid"] = clockid_;
2228   info_map["timestamp"] = std::to_string(time(nullptr));
2229   info_map["kernel_symbols_available"] = kernel_symbols_available ? "true" : "false";
2230   if (dwarf_callchain_sampling_ && !unwind_dwarf_callchain_) {
2231     OfflineUnwinder::CollectMetaInfo(&info_map);
2232   }
2233   auto record_stat = event_selection_set_.GetRecordStat();
2234   info_map["record_stat"] = android::base::StringPrintf(
2235       "sample_record_count=%" PRIu64
2236       ",kernelspace_lost_records=%zu,userspace_lost_samples=%zu,"
2237       "userspace_lost_non_samples=%zu,userspace_truncated_stack_samples=%zu",
2238       sample_record_count_, record_stat.kernelspace_lost_records,
2239       record_stat.userspace_lost_samples, record_stat.userspace_lost_non_samples,
2240       record_stat.userspace_truncated_stack_samples);
2241 
2242   return record_file_writer_->WriteMetaInfoFeature(info_map);
2243 }
2244 
DumpDebugUnwindFeature(const std::unordered_set<Dso * > & dso_set)2245 bool RecordCommand::DumpDebugUnwindFeature(const std::unordered_set<Dso*>& dso_set) {
2246   DebugUnwindFeature debug_unwind_feature;
2247   debug_unwind_feature.reserve(dso_set.size());
2248   for (const Dso* dso : dso_set) {
2249     if (dso->type() != DSO_ELF_FILE) {
2250       continue;
2251     }
2252     const std::string& filename = dso->GetDebugFilePath();
2253     std::unique_ptr<ElfFile> elf = ElfFile::Open(filename);
2254     if (elf) {
2255       llvm::MemoryBuffer* buffer = elf->GetMemoryBuffer();
2256       debug_unwind_feature.resize(debug_unwind_feature.size() + 1);
2257       auto& debug_unwind_file = debug_unwind_feature.back();
2258       debug_unwind_file.path = filename;
2259       debug_unwind_file.size = buffer->getBufferSize();
2260       if (!record_file_writer_->WriteFeature(PerfFileFormat::FEAT_DEBUG_UNWIND_FILE,
2261                                              buffer->getBufferStart(), buffer->getBufferSize())) {
2262         return false;
2263       }
2264     } else {
2265       LOG(WARNING) << "failed to keep " << filename << " in debug_unwind_feature section";
2266     }
2267   }
2268   return record_file_writer_->WriteDebugUnwindFeature(debug_unwind_feature);
2269 }
2270 
CollectHitFileInfo(const SampleRecord & r,std::unordered_set<Dso * > * dso_set)2271 void RecordCommand::CollectHitFileInfo(const SampleRecord& r, std::unordered_set<Dso*>* dso_set) {
2272   const ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
2273   size_t kernel_ip_count;
2274   std::vector<uint64_t> ips = r.GetCallChain(&kernel_ip_count);
2275   if ((r.sample_type & PERF_SAMPLE_BRANCH_STACK) != 0) {
2276     for (uint64_t i = 0; i < r.branch_stack_data.stack_nr; ++i) {
2277       const auto& item = r.branch_stack_data.stack[i];
2278       ips.push_back(item.from);
2279       ips.push_back(item.to);
2280     }
2281   }
2282   for (size_t i = 0; i < ips.size(); i++) {
2283     const MapEntry* map = thread_tree_.FindMap(thread, ips[i], i < kernel_ip_count);
2284     Dso* dso = map->dso;
2285     if (dump_symbols_) {
2286       const Symbol* symbol = thread_tree_.FindSymbol(map, ips[i], nullptr, &dso);
2287       if (!symbol->HasDumpId()) {
2288         dso->CreateSymbolDumpId(symbol);
2289       }
2290     }
2291     if (!dso->HasDumpId() && dso->type() != DSO_UNKNOWN_FILE) {
2292       dso->CreateDumpId();
2293     }
2294     if (dso_set != nullptr) {
2295       dso_set->insert(dso);
2296     }
2297   }
2298 }
2299 
DumpETMBranchListFeature()2300 bool RecordCommand::DumpETMBranchListFeature() {
2301   ETMBinaryMap binary_map = etm_branch_list_generator_->GetETMBinaryMap();
2302   std::string s;
2303   if (!ETMBinaryMapToString(binary_map, s)) {
2304     return false;
2305   }
2306   return record_file_writer_->WriteFeature(PerfFileFormat::FEAT_ETM_BRANCH_LIST, s.data(),
2307                                            s.size());
2308 }
2309 
DumpInitMapFeature()2310 bool RecordCommand::DumpInitMapFeature() {
2311   if (!map_record_thread_->Join()) {
2312     return false;
2313   }
2314   auto callback = [&](const char* data, size_t size) {
2315     return record_file_writer_->WriteInitMapFeature(data, size);
2316   };
2317   return map_record_thread_->ReadMapRecordData(callback) &&
2318          record_file_writer_->FinishWritingInitMapFeature();
2319 }
2320 
2321 }  // namespace
2322 
ConsumeStr(const char * & p,const char * s)2323 static bool ConsumeStr(const char*& p, const char* s) {
2324   if (strncmp(p, s, strlen(s)) == 0) {
2325     p += strlen(s);
2326     return true;
2327   }
2328   return false;
2329 }
2330 
ConsumeAddr(const char * & p,uint64_t * addr)2331 static bool ConsumeAddr(const char*& p, uint64_t* addr) {
2332   errno = 0;
2333   char* end;
2334   *addr = strtoull(p, &end, 0);
2335   if (errno == 0 && p != end) {
2336     p = end;
2337     return true;
2338   }
2339   return false;
2340 }
2341 
2342 // To reduce function length, not all format errors are checked.
ParseOneAddrFilter(const std::string & s,std::vector<AddrFilter> * filters)2343 static bool ParseOneAddrFilter(const std::string& s, std::vector<AddrFilter>* filters) {
2344   std::vector<std::string> args = android::base::Split(s, " ");
2345   if (args.size() != 2) {
2346     return false;
2347   }
2348 
2349   uint64_t addr1;
2350   uint64_t addr2;
2351   uint64_t off1;
2352   uint64_t off2;
2353   std::string path;
2354 
2355   if (auto p = s.data(); ConsumeStr(p, "start") && ConsumeAddr(p, &addr1)) {
2356     if (*p == '\0') {
2357       // start <kernel_addr>
2358       filters->emplace_back(AddrFilter::KERNEL_START, addr1, 0, "");
2359       return true;
2360     }
2361     if (ConsumeStr(p, "@") && *p != '\0') {
2362       // start <vaddr>@<file_path>
2363       if (auto elf = ElfFile::Open(p); elf && elf->VaddrToOff(addr1, &off1) && Realpath(p, &path)) {
2364         filters->emplace_back(AddrFilter::FILE_START, off1, 0, path);
2365         return true;
2366       }
2367     }
2368   }
2369   if (auto p = s.data(); ConsumeStr(p, "stop") && ConsumeAddr(p, &addr1)) {
2370     if (*p == '\0') {
2371       // stop <kernel_addr>
2372       filters->emplace_back(AddrFilter::KERNEL_STOP, addr1, 0, "");
2373       return true;
2374     }
2375     if (ConsumeStr(p, "@") && *p != '\0') {
2376       // stop <vaddr>@<file_path>
2377       if (auto elf = ElfFile::Open(p); elf && elf->VaddrToOff(addr1, &off1) && Realpath(p, &path)) {
2378         filters->emplace_back(AddrFilter::FILE_STOP, off1, 0, path);
2379         return true;
2380       }
2381     }
2382   }
2383   if (auto p = s.data(); ConsumeStr(p, "filter") && ConsumeAddr(p, &addr1) && ConsumeStr(p, "-") &&
2384                          ConsumeAddr(p, &addr2)) {
2385     if (*p == '\0') {
2386       // filter <kernel_addr_start>-<kernel_addr_end>
2387       filters->emplace_back(AddrFilter::KERNEL_RANGE, addr1, addr2 - addr1, "");
2388       return true;
2389     }
2390     if (ConsumeStr(p, "@") && *p != '\0') {
2391       // filter <vaddr_start>-<vaddr_end>@<file_path>
2392       if (auto elf = ElfFile::Open(p); elf && elf->VaddrToOff(addr1, &off1) &&
2393                                        elf->VaddrToOff(addr2, &off2) && Realpath(p, &path)) {
2394         filters->emplace_back(AddrFilter::FILE_RANGE, off1, off2 - off1, path);
2395         return true;
2396       }
2397     }
2398   }
2399   if (auto p = s.data(); ConsumeStr(p, "filter") && *p != '\0') {
2400     // filter <file_path>
2401     path = android::base::Trim(p);
2402     if (auto elf = ElfFile::Open(path); elf) {
2403       for (const ElfSegment& seg : elf->GetProgramHeader()) {
2404         if (seg.is_executable) {
2405           filters->emplace_back(AddrFilter::FILE_RANGE, seg.file_offset, seg.file_size, path);
2406         }
2407       }
2408       return true;
2409     }
2410   }
2411   return false;
2412 }
2413 
ParseAddrFilterOption(const std::string & s)2414 std::vector<AddrFilter> ParseAddrFilterOption(const std::string& s) {
2415   std::vector<AddrFilter> filters;
2416   for (const auto& str : android::base::Split(s, ",")) {
2417     if (!ParseOneAddrFilter(str, &filters)) {
2418       LOG(ERROR) << "failed to parse addr filter: " << str;
2419       return {};
2420     }
2421   }
2422   return filters;
2423 }
2424 
RegisterRecordCommand()2425 void RegisterRecordCommand() {
2426   RegisterCommand("record", [] { return std::unique_ptr<Command>(new RecordCommand()); });
2427 }
2428 
2429 }  // namespace simpleperf
2430