• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #ifndef SUBCOMMAND_RECORD_H
16 #define SUBCOMMAND_RECORD_H
17 
18 // some debug feaure
19 #define HIDEBUG_RECORD_NOT_PROCESS       0
20 #define HIDEBUG_RECORD_NOT_PROCESS_VM    0
21 #define HIDEBUG_RECORD_NOT_SAVE          0
22 #define HIDEBUG_SKIP_PROCESS_SYMBOLS     0
23 #define HIDEBUG_SKIP_MATCH_SYMBOLS       0
24 #define HIDEBUG_SKIP_LOAD_KERNEL_SYMBOLS 0
25 #define HIDEBUG_SKIP_SAVE_SYMBOLS        0
26 #define USE_COLLECT_SYMBOLIC             1
27 
28 #include <functional>
29 #include <thread>
30 #include <unordered_map>
31 #include <unordered_set>
32 #include <chrono>
33 #include "perf_event_record.h"
34 #include "perf_events.h"
35 #include "perf_file_writer.h"
36 #include "perf_pipe.h"
37 #include "subcommand.h"
38 #include "virtual_runtime.h"
39 
40 namespace OHOS {
41 namespace Developtools {
42 namespace HiPerf {
43 class SubCommandRecord : public SubCommand {
44 public:
45     static constexpr int DEFAULT_CPU_PERCENT = 25;
46     static constexpr int MIN_CPU_PERCENT = 1;
47     static constexpr int MAX_CPU_PERCENT = 100;
48     static constexpr int MIN_SAMPLE_FREQUENCY = 1;
49     static constexpr int MAX_SAMPLE_FREQUENCY = 100000;
50     static constexpr int DEFAULT_MMAP_PAGES = 256;
51     static constexpr int MIN_PERF_MMAP_PAGE = 2;
52     static constexpr int MAX_PERF_MMAP_PAGE = 1024;
53     static constexpr int DEFAULT_CHECK_APP_MS = 10;
54     static constexpr int MIN_CHECK_APP_MS = 1;
55     static constexpr int MAX_CHECK_APP_MS = 200;
56     static constexpr float MIN_STOP_SECONDS = 0.100;
57     static constexpr float MAX_STOP_SECONDS = 10000.0;
58     static constexpr int MIN_SAVED_CMDLINES_SIZE = 512;
59     static constexpr int DEFAULT_SAVED_CMDLINES_SIZE = 2048;
60     static constexpr int MAX_SAVED_CMDLINES_SIZE = 4096;
61     static constexpr uint64_t MIN_BACKTRACK_TIME_SEC = 5;
62     static constexpr uint64_t DEFAULT_BACKTRACK_TIME_SEC = 10;
63     static constexpr uint64_t MAX_BACKTRACK_TIME_SEC = 30;
64 
SubCommandRecord()65     SubCommandRecord()
66         // clang-format off
67         : SubCommand("record", "Collect performance sample information",
68         "Usage: hiperf record [options] [command [command-args]]\n"
69         "       Collect performance sampling information of running [command].\n"
70         "       The default options are: -c <all cpu> --cpu-limit 25 -d 10000.0 -e hw-cpu-cycles\n"
71         "       -f 4000 -m 1024 -o /data/local/tmp/perf.data.\n"
72         "   -a\n"
73         "         Collect system-wide information.\n"
74         "         for measures all processes/threads\n"
75         "         This requires CAP_PERFMON (since Linux 5.8) or CAP_SYS_ADMIN capability or a\n"
76         "         /proc/sys/kernel/perf_event_paranoid value of less than 1.\n"
77         "   --exclude-hiperf\n"
78         "         Don't record events issued by hiperf itself.\n"
79         "   -c <cpuid>[<,cpuid>]...\n"
80         "         cpuid should be 0,1,2...\n"
81         "         Limit the CPU that collects data.\n"
82         "         0 means cpu0, 1 means cpu1 ...\n"
83         "   --cpu-limit <percent>\n"
84         "         Set the max percent of cpu time used for recording.\n"
85         "         percent is in range [1-100], default is 25.\n"
86         "   -d <sec>\n"
87         "         stop in <sec> seconds. floating point number. seconds is in range [0.100-10000.0]\n"
88         "         default is 10000.0\n"
89         "   -f <freq>\n"
90         "         Set event sampling frequency. default is 4000 samples every second.\n"
91         "         check /proc/sys/kernel/perf_event_max_sample_rate for maximum allowed frequency\n"
92         "   --period <num>\n"
93         "         Set event sampling period for tracepoint events. recording one sample when <num> events happen.\n"
94         "         The default <num> is 1\n"
95         "   -e <event1[:<u|k>]>[,event1[:<u|k>]]...\n"
96         "         Customize the name of the event that needs to be sampled.\n"
97         "         The name can use the names listed in the list parameter.\n"
98         "         It can also be represented by the value of 0x<hex>.\n"
99         "           u - monitor user space events only\n"
100         "           k - monitor kernel space events only\n"
101         "   -g <event1[:<u|k>]>[,event1[:<u|k>]]...\n"
102         "         Put the events into a group, can set multiple groups by multiple -g\n"
103         "         PMU is required to report data in designated groups\n"
104         "         limited by HW capability, too many events cannot be reported in the same sampling)\n"
105         "   --no-inherit\n"
106         "         Don't trace child processes.\n"
107         "   -p <pid1>[,pid2]...\n"
108         "         Limit the process id of the collection target. Conflicts with the -a option.\n"
109         "   -t <tid1>[,tid2]...\n"
110         "         Limit the thread id of the collection target. Conflicts with the -a option.\n"
111         "   --exclude-tid <tid1>[,tid2]...\n"
112         "         Exclude threads of the collection target by thread ids. Conflicts with the -a option.\n"
113         "   --exclude-thread <tname1>[,tname2]...\n"
114         "         Exclude threads of the collection target by thread names. Conflicts with the -a option.\n"
115         "   --exclude-process <pname1>[,pname2]...\n"
116         "         Exclude processes by process names. Must be used with -a.\n"
117         "   --offcpu\n"
118         "         Trace when threads are scheduled off cpu.\n"
119         "   -j <branch_filter1>[,branch_filter2]...\n"
120         "         taken branch stack sampling, filter can be:\n"
121         "           any: any type of branch\n"
122         "           any_call: any function call or system call\n"
123         "           any_ret: any function return or system call return\n"
124         "           ind_call: any indirect branch\n"
125         "           ind_jmp: any indirect jump\n"
126         "           cond: conditional branches\n"
127         "           call: direct calls, including far (to/from kernel) calls\n"
128         "           u: only when the branch target is at the user level\n"
129         "           k: only when the branch target is in the kernel\n"
130         "         requires at least one of any, any_call, any_ret, ind_call, ind_jmp, cond, call\n"
131         "   -s / --call-stack <fp|dwarf[,size]>\n"
132         "         Setup and enable call stack (stack chain/backtrace) recording, Default is 'fp'.\n"
133         "           the value can be:\n"
134         "             fp: frame pointer\n"
135         "             dwarf: DWARF's CFI - Call Frame Information\n"
136         "               'dwarf,size' set sample stack size, size should be in 8~65528 and 8 byte aligned. \n"
137         "           as the method to collect the information used to show the call stacks.\n"
138         "   --kernel-callchain\n"
139         "         collect kernel callchain, must used with -s fp/dwarf simultaneously.\n"
140         "   --callchain-useronly\n"
141         "         collect only user callchain.\n"
142         "   --delay-unwind\n"
143         "         If '-s dwarf' used, stack will be unwind while recording, use this option to switch\n"
144         "         to unwind after recording.\n"
145         "   --disable-unwind\n"
146         "         If '-s dwarf' is used, stack will be unwind while recording by default\n"
147         "         use this option to disable unwinding.\n"
148         "   --disable-callstack-expand\n"
149         "         If '-s dwarf' is used, to break the 64k stack limit, callstack is merged by default\n"
150         "         to build more complete call stack. that may not be correct sometimes.\n"
151         "   --enable-debuginfo-symbolic\n"
152         "         If '-s fp/dwarf' is used, symbols in .gnu_debugdata section of an elf, also called minidebuginfo\n"
153         "         will be parsed, if not use this option, we will not parse minidebuginfo by default.\n"
154         "   --clockid <clock_id>\n"
155         "         Set the clock id to use for the various time fields in the perf_event_type records.\n"
156         "         monotonic and monotonic_raw are supported,\n"
157         "         some events might also allow boottime, realtime and clock_tai.\n"
158         "   --pipe_input\n"
159         "         Enable anonymous pipe for the client (calling process) to send control\n"
160         "         commands to the server (hiperf executable program).\n"
161         "         This parameter is designed for system-side C++ interface calls.\n"
162         "         Application developers do not need to pay attention to this parameter\n"
163         "         when using the hiperf command tool.\n"
164         "   --pipe_output\n"
165         "         Enable anonymous pipe for the server (hiperf executable program) to send\n"
166         "         response to the client (calling process).\n"
167         "         This parameter is designed for system-side C++ interface calls.\n"
168         "         Application developers do not need to pay attention to this parameter\n"
169         "         when using the hiperf command tool.\n"
170         "   --symbol-dir <dir>\n"
171         "         Set directory to look for symbol files, used for unwinding. \n"
172         "   -m <mmap_pages>\n"
173         "         Number of the mmap pages, used to receiving record data from kernel,\n"
174         "         must be a power of two, rang[2,1024], default is 1024.\n"
175         "   --app <package_name>\n"
176         "         Collect profile info for an OHOS app, the app must be debuggable.\n"
177         "         Record will exit if the process is not started within 20 seconds.\n"
178         "   --chkms <millisec>\n"
179         "         Set the interval of querying the <package_name>.\n"
180         "         <millisec> is in range [1-200], default is 10.\n"
181         "   --data-limit <SIZE[K|M|G]>\n"
182         "         Stop recording after SIZE bytes of records. Default is unlimited.\n"
183         "   -o <output_file_name>\n"
184         "         Set output file name, default is /data/local/tmp/perf.data.\n"
185         "   -z\n"
186         "         Compress record data.\n"
187         "   --restart\n"
188         "         Collect performance counter information of application startup.\n"
189         "         Record will exit if the process is not started within 30 seconds.\n"
190         "   --verbose\n"
191         "         Show more detailed reports.\n"
192         "   --control <command>\n"
193         "         Control sampling by <command>, the <command> can be:\n"
194         "           prepare: set arguments and prepare sampling\n"
195         "           start: start sampling\n"
196         "           pause: pause sampling\n"
197         "           resume: resume sampling\n"
198         "           output: output sampling data\n"
199         "           stop: stop sampling\n"
200         "   --dedup_stack\n"
201         "         Remove duplicated stacks in perf record, conflicts with -a, only restrain using with -p\n"
202         "   --cmdline-size <size>\n"
203         "         set value to /sys/kernel/tracing/saved_cmdlines_size\n"
204         "         the value should be between 512 and 4096\n"
205         "   --report\n"
206         "         Report with callstack after record. Conflicts with the -a option.\n"
207         "   --backtrack\n"
208         "         Collect data of the previous period. only restrain using with --control.\n"
209         "   --backtrack-sec\n"
210         "         If '--backtrack' is used, stop in <sec> seconds. seconds is in range [5-30]\n"
211         "         default is 10\n"
212         "   --dumpoptions\n"
213         "         Dump command options.\n"
214         )
215     // clang-format on
216     {
217     }
218 
219     ~SubCommandRecord();
220     HiperfError OnSubCommand(std::vector<std::string>& args) override;
221     bool ParseOption(std::vector<std::string> &args) override;
222     void DumpOptions(void) const override;
223 
224     // add args for hisysevent
225     void AddReportArgs(CommandReporter& reporter) override;
226 
227     static bool RegisterSubCommandRecord(void);
228     std::map<const std::string, uint64_t> speOptMap_ = {
229         {"branch_filter", 0},   {"load_filter", 0},
230         {"store_filter", 0},    {"ts_enable", 0},
231         {"pa_enable", 0},       {"jitter", 0},
232         {"min_latency", 0},      {"event_filter", 0},
233         {"pct_enable", 0},
234     };
235 
236     static SubCommand& GetInstance();
237 
238 private:
239     PerfEvents perfEvents_;
240     PerfPipe perfPipe_;
241 
242     bool targetSystemWide_ = false;
243     bool compressData_ = false;
244     bool noInherit_ = false;
245     bool excludeHiperf_ = false;
246     bool offCPU_ = false;
247     bool delayUnwind_ = false;
248     bool disableUnwind_ = false;
249     bool disableCallstackExpend_ = false;
250     bool enableDebugInfoSymbolic_ = false;
251     bool verboseReport_ = false;
252     bool kernelCallChain_ = true;
253     bool callChainUserOnly_ = false;
254     bool report_ = false;
255     float timeStopSec_ = PerfEvents::DEFAULT_TIMEOUT;
256     int frequency_ = 0;
257     int period_ = 0;
258     int cpuPercent_ = DEFAULT_CPU_PERCENT;
259     int mmapPages_ = MAX_PERF_MMAP_PAGE;
260     int cmdlinesSize_ = DEFAULT_SAVED_CMDLINES_SIZE;
261     int oldCmdlinesSize_ = 0;
262     std::vector<std::string> symbolDir_ = {};
263     std::string outputFilename_ = "/data/local/tmp/perf.data";
264     std::string appPackage_ = {};
265     int checkAppMs_ = DEFAULT_CHECK_APP_MS;
266     std::string clockId_ = {};
267     std::string strLimit_ = {};
268     std::string fifoFileC2S_;
269     std::string fifoFileS2C_;
270     std::vector<pid_t> selectCpus_ = {};
271     std::vector<pid_t> selectPids_ = {};
272     std::vector<pid_t> selectTids_ = {};
273     std::vector<pid_t> inputPidTidArgs_ = {};
274     bool restart_ = false;
275     std::vector<std::string> selectEvents_ = {};
276     std::vector<std::vector<std::string>> selectGroups_ = {};
277     std::vector<std::string> callStackType_ = {};
278     std::vector<std::string> vecBranchFilters_ = {};
279     std::vector<std::string> trackedCommand_ = {};
280 
281     // for exclude process and thread
282     std::vector<pid_t> excludeTidArgs_ = {};
283     std::vector<std::string> excludeThreadNameArgs_ = {};
284     std::vector<std::string> excludeProcessNameArgs_ = {};
285     std::set<pid_t> excludePids_ = {};
286     std::set<pid_t> excludeTids_ = {};
287     void CollectExcludeThread();
288     void SetExcludeHiperf();
289     bool IsThreadExcluded(const pid_t pid, const pid_t tid);
290 
291     // for background track
292     bool backtrack_ = false;
293     uint64_t backtrackTime_ = DEFAULT_BACKTRACK_TIME_SEC;   // 10 seconds
294     bool outputEnd_ = false;
295     bool PreOutputRecordFile();
296     void OutputRecordFile();
297     bool PostOutputRecordFile(const bool output);
298 
299 #ifdef CONFIG_HAS_CCM
300     static constexpr char PRODUCT_CONFIG_PATH[] = "etc/hiperf/hiperf_cfg.json";
301     static constexpr char CFG_MAP_PAGES[] = "MmapPages";
302     void GetMmapPagesCfg();
303 #endif
304 
305     bool GetOptions(std::vector<std::string> &args);
306     bool CheckArgsRange();
307     bool CheckExcludeArgs();
308     bool CheckOptions();
309     bool GetSpeOptions();
310     bool CheckDataLimitOption();
311     bool CheckSelectCpuPidOption();
312     bool GetOptionFrequencyAndPeriod(std::vector<std::string> &args);
313 
314     bool isCallStackDwarf_ = false;
315     bool isCallStackFp_ = false;
316     uint32_t callStackDwarfSize_ = MAX_SAMPLE_STACK_SIZE;
317     uint64_t branchSampleType_ = 0;
318     uint64_t dataSizeLimit_ = 0;
319     bool isDataSizeLimitStop_ = false;
320 
321     std::unique_ptr<PerfFileWriter> fileWriter_ = nullptr;
322 
323     // for client
324     int clientPipeInput_ = -1;
325     int clientPipeOutput_ = -1;
326     int readFd_ = -1;
327     int writeFd_ = -1;
328     int nullFd_ = -1;
329     std::thread clientCommandHandle_;
330     std::thread replyCommandHandle_;
331     std::atomic_bool clientRunning_ = true;
332     bool isHiperfClient_ = false;
333     struct ControlCommandHandler {
334         std::function<bool()> preProcess = []() -> bool {
335             return false;
336         };
337         std::function<void(bool)> postProcess = [](bool) {};
338     };
339     std::unordered_map<std::string, ControlCommandHandler> controlCommandHandlerMap_ = {};
340     inline void CreateClientThread();
341     inline void CreateReplyThread();
342     void ClientCommandHandle();
343     void ReplyCommandHandle();
344     void InitControlCommandHandlerMap();
345     void DispatchControlCommand(const std::string& command);
346     bool ClientCommandResponse(const bool response);
347     bool ClientCommandResponse(const std::string& str);
348     bool ChildResponseToMain(const bool response);
349     bool ChildResponseToMain(const std::string& str);
350     bool IsSamplingRunning();
351 
352     // for cmdline client
353     bool allowIpc_ = true;
354     std::string controlCmd_ = {};
355     bool isFifoServer_ = false;
356     bool isFifoClient_ = false;
357     bool dedupStack_ = false;
358     std::map<pid_t, std::vector<pid_t>> mapPids_;
359     bool ProcessControl();
360     bool CreateFifoServer();
361     bool MainRecvFromChild(const int fd, std::string& reply);
362     void CloseClientThread();
363     void CloseReplyThread();
364 
365     bool PreparePerfEvent();
366     bool PrepareSysKernel();
367     void PrepareKernelMaps();
368     bool PrepareVirtualRuntime();
369 
370     size_t recordSamples_ = 0;
371     size_t recordNoSamples_ = 0;
372 
373     bool isNeedSetPerfHarden_ = false;
374     bool isSpe_ = false;
375 
376     const bool isRoot_ = IsRoot();
377     uint32_t offset_ = 0;
378     uint32_t devhostPid_ = UINT32_MAX;
379 
380     // callback to process record
381     bool ProcessRecord(PerfEventRecord& record);
382     bool SaveRecord(const PerfEventRecord& record);
383     uint32_t GetOffsetNum();
384     void UpdateDevHostMaps(PerfEventRecord& record);
385     void UpdateDevHostCallChains(PerfEventRecord& record);
386     void UpdateDevHostMapsAndIPs(PerfEventRecord& record);
387 
388     // file format like as 0,1-3,4-6,7,8
389     uint32_t GetCountFromFile(const std::string &fileName);
390     std::string GetCpuDescFromFile();
391     bool AddCpuFeature();
392     void AddMemTotalFeature();
393     void AddEventDescFeature();
394     void AddRecordTimeFeature();
395     void AddWorkloadCmdFeature();
396     void AddCommandLineFeature();
397     void AddCpuOffFeature();
398     void AddDevhostFeature();
399     bool AddFeatureRecordFile();
400 
401     bool CreateInitRecordFile(const bool compressData = false);
402     bool FinishWriteRecordFile();
403     bool PostProcessRecordFile();
404     bool RecordCompleted();
405 #ifdef HIPERF_DEBUG_TIME
406     void ReportTime();
407 #endif
408 
409     bool CollectionSymbol(PerfEventRecord& record);
410     void CollectSymbol(PerfRecordSample *sample);
411     bool SetPerfLimit(const std::string& file, const int value, std::function<bool (int, int)> const& cmd,
412         const std::string& param);
413     bool SetPerfCpuMaxPercent();
414     bool SetPerfMaxSampleRate();
415     bool SetPerfEventMlock();
416     bool SetPerfHarden();
417 
418     bool TraceOffCpu();
419     bool ParseCallStackOption(const std::vector<std::string> &callStackType);
420     bool ParseDataLimitOption(const std::string &str);
421     bool ParseBranchSampleType(const std::vector<std::string> &vecBranchSampleTypes);
422     bool ParseControlCmd(const std::string cmd);
423     bool CheckTargetProcessOptions();
424     bool CheckTargetPids();
425     bool CheckReportOption();
426     bool CheckBacktrackOption();
427     bool CheckSpeOption();
428     bool IsAppRestarted();
429     bool CheckAppRestart();
430     pid_t GetPidFromAppPackage(const pid_t oldPid, const uint64_t waitAppTimeOut);
431     bool IsAppRunning();
432     bool IsPidAndTidExist();
433     void MsgPrintAndTrans(const bool isTrans, const std::string& msg);
434     void WriteCommEventBeforeSampling();
435     void RemoveVdsoTmpFile();
436     void RemoveFifoFile();
437 
438     VirtualRuntime virtualRuntime_;
439 #if USE_COLLECT_SYMBOLIC
440     std::unordered_map<pid_t, std::unordered_set<uint64_t>> kernelThreadSymbolsHits_;
441     kSymbolsHits kernelSymbolsHits_;
442     uSymbolsHits userSymbolsHits_;
443     void SymbolicHits();
444 #endif
445 
446 #ifdef HIPERF_DEBUG_TIME
447     std::chrono::microseconds prcessRecordTimes_ = std::chrono::microseconds::zero();
448     std::chrono::microseconds saveRecordTimes_ = std::chrono::microseconds::zero();
449     std::chrono::microseconds saveFeatureTimes_ = std::chrono::microseconds::zero();
450 #endif
451     std::chrono::time_point<std::chrono::steady_clock> startSaveFileTimes_;
452 
453     void SetHM();
454     void SetSavedCmdlinesSize();
455     void RecoverSavedCmdlinesSize();
456     bool OnlineReportData();
457 
458     // only used in UT
459     using CheckRecordCallBack = std::function<void(const PerfEventRecord&)>;
460     void SetCheckRecordCallback(CheckRecordCallBack callback);
461     CheckRecordCallBack checkCallback_ = nullptr;
462 };
463 } // namespace HiPerf
464 } // namespace Developtools
465 } // namespace OHOS
466 #endif // SUBCOMMAND_RECORD_H
467