• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #ifndef SUBCOMMAND_RECORD_H
16 #define SUBCOMMAND_RECORD_H
17 
18 // some debug feaure
19 #define HIDEBUG_RECORD_NOT_PROCESS       0
20 #define HIDEBUG_RECORD_NOT_PROCESS_VM    0
21 #define HIDEBUG_RECORD_NOT_SAVE          0
22 #define HIDEBUG_SKIP_PROCESS_SYMBOLS     0
23 #define HIDEBUG_SKIP_MATCH_SYMBOLS       0
24 #define HIDEBUG_SKIP_LOAD_KERNEL_SYMBOLS 0
25 #define HIDEBUG_SKIP_SAVE_SYMBOLS        0
26 #define USE_COLLECT_SYMBOLIC             1
27 
28 #include <functional>
29 #include <thread>
30 #include <unordered_map>
31 #include <unordered_set>
32 #include <chrono>
33 #include "perf_event_record.h"
34 #include "perf_events.h"
35 #include "perf_file_writer.h"
36 #include "subcommand.h"
37 #include "virtual_runtime.h"
38 
39 namespace OHOS {
40 namespace Developtools {
41 namespace HiPerf {
42 class SubCommandRecord : public SubCommand {
43 public:
44     static constexpr int DEFAULT_CPU_PERCENT = 25;
45     static constexpr int MIN_CPU_PERCENT = 1;
46     static constexpr int MAX_CPU_PERCENT = 100;
47     static constexpr int MIN_SAMPLE_FREQUENCY = 1;
48     static constexpr int MAX_SAMPLE_FREQUENCY = 100000;
49     static constexpr int DEFAULT_MMAP_PAGES = 256;
50     static constexpr int MIN_PERF_MMAP_PAGE = 2;
51     static constexpr int MAX_PERF_MMAP_PAGE = 1024;
52     static constexpr int DEFAULT_CHECK_APP_MS = 10;
53     static constexpr int MIN_CHECK_APP_MS = 1;
54     static constexpr int MAX_CHECK_APP_MS = 200;
55     static constexpr float MIN_STOP_SECONDS = 0.100;
56     static constexpr float MAX_STOP_SECONDS = 10000.0;
57 
SubCommandRecord()58     SubCommandRecord()
59         // clang-format off
60         : SubCommand("record", "Collect performance sample information",
61         "Usage: hiperf record [options] [command [command-args]]\n"
62         "       Collect performance sampling information of running [command].\n"
63         "       The default options are: -c <all cpu> --cpu-limit 25 -d 10000.0 -e hw-cpu-cycles\n"
64         "       -f 4000 -m 1024 -o /data/local/tmp/perf.data.\n"
65         "   -a\n"
66         "         Collect system-wide information.\n"
67         "         for measures all processes/threads\n"
68         "         This requires CAP_PERFMON (since Linux 5.8) or CAP_SYS_ADMIN capability or a\n"
69         "         /proc/sys/kernel/perf_event_paranoid value of less than 1.\n"
70         "   --exclude-hiperf\n"
71         "         Don't record events issued by hiperf itself.\n"
72         "   -c <cpuid>[<,cpuid>]...\n"
73         "         cpuid should be 0,1,2...\n"
74         "         Limit the CPU that collects data.\n"
75         "         0 means cpu0, 1 means cpu1 ...\n"
76         "   --cpu-limit <percent>\n"
77         "         Set the max percent of cpu time used for recording.\n"
78         "         percent is in range [1-100], default is 25.\n"
79         "   -d <sec>\n"
80         "         stop in <sec> seconds. floating point number. seconds is in range [0.100-10000.0]\n"
81         "         default is 10000.0\n"
82         "   -f <freq>\n"
83         "         Set event sampling frequency. default is 4000 samples every second.\n"
84         "         check /proc/sys/kernel/perf_event_max_sample_rate for maximum allowed frequency\n"
85         "   --period <num>\n"
86         "         Set event sampling period for tracepoint events. recording one sample when <num> events happen.\n"
87         "         The default <num> is 1\n"
88         "   -e <event1[:<u|k>]>[,event1[:<u|k>]]...\n"
89         "         Customize the name of the event that needs to be sampled.\n"
90         "         The name can use the names listed in the list parameter.\n"
91         "         It can also be represented by the value of 0x<hex>.\n"
92         "           u - monitor user space events only\n"
93         "           k - monitor kernel space events only\n"
94         "   -g <event1[:<u|k>]>[,event1[:<u|k>]]...\n"
95         "         Put the events into a group, can set multiple groups by multiple -g\n"
96         "         PMU is required to report data in designated groups\n"
97         "         limited by HW capability, too many events cannot be reported in the same sampling)\n"
98         "   --no-inherit\n"
99         "         Don't trace child processes.\n"
100         "   -p <pid1>[,pid2]...\n"
101         "         Limit the process id of the collection target. Conflicts with the -a option.\n"
102         "   -t <tid1>[,tid2]...\n"
103         "         Limit the thread id of the collection target. Conflicts with the -a option.\n"
104         "   --exclude-thread <tname1>[,tname2]...\n"
105         "         Exclude threads of the collection target by thread names. Conflicts with the -a option.\n"
106         "   --offcpu\n"
107         "         Trace when threads are scheduled off cpu.\n"
108         "   -j <branch_filter1>[,branch_filter2]...\n"
109         "         taken branch stack sampling, filter can be:\n"
110         "           any: any type of branch\n"
111         "           any_call: any function call or system call\n"
112         "           any_ret: any function return or system call return\n"
113         "           ind_call: any indirect branch\n"
114         "           call: direct calls, including far (to/from kernel) calls\n"
115         "           u: only when the branch target is at the user level\n"
116         "           k: only when the branch target is in the kernel\n"
117         "         requires at least one of any, any_call, any_ret, ind_call\n"
118         "   -s / --call-stack <fp|dwarf[,size]>\n"
119         "         Setup and enable call stack (stack chain/backtrace) recording, Default is 'fp'.\n"
120         "           the value can be:\n"
121         "             fp: frame pointer\n"
122         "             dwarf: DWARF's CFI - Call Frame Information\n"
123         "               'dwarf,size' set sample stack size, size should be in 8~65528 and 8 byte aligned. \n"
124         "           as the method to collect the information used to show the call stacks.\n"
125         "   --delay-unwind\n"
126         "         If '-s dwarf' used, stack will be unwind while recording, use this option to switch\n"
127         "         to unwind after recording.\n"
128         "   --disable-unwind\n"
129         "         If '-s dwarf' is used, stack will be unwind while recording by default\n"
130         "         use this option to disable unwinding.\n"
131         "   --disable-callstack-expand\n"
132         "         If '-s dwarf' is used, to break the 64k stack limit, callstack is merged by default\n"
133         "         to build more complete call stack. that may not be correct sometimes.\n"
134         "   --clockid <clock_id>\n"
135         "         Set the clock id to use for the various time fields in the perf_event_type records.\n"
136         "         monotonic and monotonic_raw are supported,\n"
137         "         some events might also allow boottime, realtime and clock_tai.\n"
138         "   --symbol-dir <dir>\n"
139         "         Set directory to look for symbol files, used for unwinding. \n"
140         "   -m <mmap_pages>\n"
141         "         Number of the mmap pages, used to receiving record data from kernel,\n"
142         "         must be a power of two, rang[2,1024], default is 1024.\n"
143         "   --app <package_name>\n"
144         "         Collect profile info for an OHOS app, the app must be debuggable.\n"
145         "         Record will exit if the process is not started within 10 seconds.\n"
146         "   --chkms <millisec>\n"
147         "         Set the interval of querying the <package_name>.\n"
148         "         <millisec> is in range [1-200], default is 10.\n"
149         "   --data-limit <SIZE[K|M|G]>\n"
150         "         Stop recording after SIZE bytes of records. Default is unlimited.\n"
151         "   -o <output_file_name>\n"
152         "         Set output file name, default is /data/local/tmp/perf.data.\n"
153         "   -z\n"
154         "         Compress record data.\n"
155         "   --verbose\n"
156         "         Show more detailed reports.\n"
157         "   --control <command>\n"
158         "         Control sampling by <command>, the <command> can be:\n"
159         "           prepare: set arguments and prepare sampling\n"
160         "           start: start sampling\n"
161         "           pause: pause sampling\n"
162         "           resume: resume sampling\n"
163         "           stop: stop sampling\n"
164         )
165     // clang-format on
166     {
167     }
168 
169     ~SubCommandRecord();
170     bool OnSubCommand(std::vector<std::string> &args) override;
171     bool ParseOption(std::vector<std::string> &args) override;
172     void DumpOptions(void) const override;
173 
174     static bool RegisterSubCommandRecord(void);
175 
176 private:
177     PerfEvents perfEvents_;
178 
179     bool targetSystemWide_ = false;
180     bool compressData_ = false;
181     bool noInherit_ = false;
182     bool excludeHiperf_ = false;
183     bool offCPU_ = false;
184     bool delayUnwind_ = false;
185     bool disableUnwind_ = false;
186     bool disableCallstackExpend_ = false;
187     bool verboseReport_ = false;
188     float timeStopSec_ = PerfEvents::DEFAULT_TIMEOUT;
189     int frequency_ = 0;
190     int period_ = 0;
191     int cpuPercent_ = DEFAULT_CPU_PERCENT;
192     int mmapPages_ = MAX_PERF_MMAP_PAGE;
193     std::vector<std::string> symbolDir_ = {};
194     std::string outputFilename_ = "/data/local/tmp/perf.data";
195     std::string appPackage_ = {};
196     int checkAppMs_ = DEFAULT_CHECK_APP_MS;
197     std::string clockId_ = {};
198     std::string strLimit_ = {};
199     std::vector<pid_t> selectCpus_ = {};
200     std::vector<pid_t> selectPids_ = {};
201     std::vector<pid_t> selectTids_ = {};
202     std::vector<std::string> selectEvents_ = {};
203     std::vector<std::vector<std::string>> selectGroups_ = {};
204     std::vector<std::string> callStackType_ = {};
205     std::vector<std::string> vecBranchFilters_ = {};
206     std::vector<std::string> trackedCommand_ = {};
207     std::vector<std::string> excludeThreadNames_ = {};
208 
209     bool GetOptions(std::vector<std::string> &args);
210     bool CheckOptions();
211     bool CheckDataLimitOption();
212     bool CheckSelectCpuPidOption();
213     bool GetOptionFrequencyAndPeriod(std::vector<std::string> &args);
214 
215     bool isCallStackDwarf_ = false;
216     bool isCallStackFp_ = false;
217     uint32_t callStackDwarfSize_ = MAX_SAMPLE_STACK_SIZE;
218     uint64_t branchSampleType_ = 0;
219     uint64_t dataSizeLimit_ = 0;
220     bool isDataSizeLimitStop_ = false;
221 
222     std::unique_ptr<PerfFileWriter> fileWriter_ = nullptr;
223 
224     // for client
225     int clientPipeInput_ = -1;
226     int clientPipeOutput_ = -1;
227     int nullFd_ = -1;
228     std::thread clientCommandHanle_;
229     bool clientExit_ = false;
230     void ClientCommandHandle();
231     bool ClientCommandResponse(bool OK);
232     bool IsSamplingRunning();
233     // for cmdline client
234     std::string controlCmd_ = {};
235     bool isFifoServer_ = false;
236     bool isFifoClient_ = false;
237     bool ProcessControl();
238     bool CreateFifoServer();
239     bool SendFifoAndWaitReply(const std::string &cmd, const std::chrono::milliseconds &timeOut);
240     bool WaitFifoReply(int fd, const std::chrono::milliseconds &timeOut);
241     void CloseClientThread();
242 
243     bool PreparePerfEvent();
244     bool PrepareSysKernel();
245     bool PrepareVirtualRuntime();
246 
247     size_t recordSamples_ = 0;
248     size_t recordNoSamples_ = 0;
249 
250     bool isNeedSetPerfHarden_ = false;
251 
252     // callback to process record
253     bool ProcessRecord(std::unique_ptr<PerfEventRecord>);
254     bool SaveRecord(std::unique_ptr<PerfEventRecord>);
255 
256     // file format like as 0,1-3,4-6,7,8
257     uint32_t GetCountFromFile(const std::string &fileName);
258     std::string GetCpuDescFromFile();
259     bool AddCpuFeature();
260     void AddMemTotalFeature();
261     void AddEventDescFeature();
262     void AddRecordTimeFeature();
263     void AddWorkloadCmdFeature();
264     void AddCommandLineFeature();
265     void AddCpuOffFeature();
266     bool AddFeatureRecordFile();
267 
268     bool CreateInitRecordFile(bool compressData = false);
269     bool FinishWriteRecordFile();
270     bool PostProcessRecordFile();
271     bool RecordCompleted();
272 #ifdef HIPERF_DEBUG_TIME
273     void ReportTime();
274 #endif
275 
276     bool CollectionSymbol(std::unique_ptr<PerfEventRecord> record);
277 
278     bool SetPerfLimit(const std::string& file, int value, std::function<bool (int, int)> const& cmd,
279         const std::string& param);
280     bool SetPerfCpuMaxPercent();
281     bool SetPerfMaxSampleRate();
282     bool SetPerfEventMlock();
283     bool SetPerfHarden();
284 
285     bool TraceOffCpu();
286     bool ParseCallStackOption(const std::vector<std::string> &callStackType);
287     bool ParseDataLimitOption(const std::string &str);
288     bool ParseBranchSampleType(const std::vector<std::string> &vecBranchSampleTypes);
289     bool ParseControlCmd(const std::string cmd);
290     bool CheckTargetProcessOptions();
291     bool CheckTargetPids();
292 
293     void ExcludeThreadsFromSelectTids(const std::vector<std::string> &excludeThreadNames,
294         std::vector<pid_t> &selectTids);
295     pid_t GetAppPackagePid(const std::string &appPackge);
296 
297     VirtualRuntime virtualRuntime_;
298 #if USE_COLLECT_SYMBOLIC
299     std::unordered_set<uint64_t> kernelSymbolsHits_;
300     std::unordered_map<pid_t, std::unordered_set<uint64_t>> userSymbolsHits_;
301     void SymbolicHits();
302 #endif
303 
304 #ifdef HIPERF_DEBUG_TIME
305     std::chrono::microseconds prcessRecordTimes_ = std::chrono::microseconds::zero();
306     std::chrono::microseconds saveRecordTimes_ = std::chrono::microseconds::zero();
307     std::chrono::microseconds saveFeatureTimes_ = std::chrono::microseconds::zero();
308 #endif
309     std::chrono::time_point<std::chrono::steady_clock> startSaveFileTimes_;
310 };
311 } // namespace HiPerf
312 } // namespace Developtools
313 } // namespace OHOS
314 #endif // SUBCOMMAND_RECORD_H
315