1 /* 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 #ifndef SUBCOMMAND_RECORD_H 16 #define SUBCOMMAND_RECORD_H 17 18 // some debug feaure 19 #define HIDEBUG_RECORD_NOT_PROCESS 0 20 #define HIDEBUG_RECORD_NOT_PROCESS_VM 0 21 #define HIDEBUG_RECORD_NOT_SAVE 0 22 #define HIDEBUG_SKIP_PROCESS_SYMBOLS 0 23 #define HIDEBUG_SKIP_MATCH_SYMBOLS 0 24 #define HIDEBUG_SKIP_LOAD_KERNEL_SYMBOLS 0 25 #define HIDEBUG_SKIP_SAVE_SYMBOLS 0 26 #define USE_COLLECT_SYMBOLIC 1 27 28 #include <thread> 29 #include <unordered_map> 30 #include <unordered_set> 31 32 #include "perf_event_record.h" 33 #include "perf_events.h" 34 #include "perf_file_writer.h" 35 #include "subcommand.h" 36 #include "virtual_runtime.h" 37 38 namespace OHOS { 39 namespace Developtools { 40 namespace HiPerf { 41 class SubCommandRecord : public SubCommand { 42 public: 43 static constexpr int DEFAULT_CPU_PERCENT = 25; 44 static constexpr int MIN_CPU_PERCENT = 1; 45 static constexpr int MAX_CPU_PERCENT = 100; 46 static constexpr int MIN_SAMPLE_FREQUENCY = 1; 47 static constexpr int MAX_SAMPLE_FREQUENCY = 100000; 48 static constexpr int DEFAULT_MMAP_PAGES = 256; 49 static constexpr int MIN_PERF_MMAP_PAGE = 2; 50 static constexpr int MAX_PERF_MMAP_PAGE = 1024; 51 static constexpr float MIN_STOP_SECONDS = 0.100; 52 static constexpr float MAX_STOP_SECONDS = 10000.0; 53 SubCommandRecord()54 SubCommandRecord() 55 // clang-format off 56 : SubCommand("record", "Collect performance sample information", 57 "Usage: hiperf record [options] [command [command-args]]\n" 58 " Collect performance sampling information of running [command].\n" 59 " The default options are: -c <all cpu> --cpu-limit 25 -d 10000.0 -e hw-cpu-cycles\n" 60 " -f 4000 -m 1024 -o /data/local/tmp/perf.data.\n" 61 " -a\n" 62 " Collect system-wide information.\n" 63 " for measures all processes/threads\n" 64 " This requires CAP_PERFMON (since Linux 5.8) or CAP_SYS_ADMIN capability or a\n" 65 " /proc/sys/kernel/perf_event_paranoid value of less than 1.\n" 66 " --exclude-hiperf\n" 67 " Don't record events issued by hiperf itself.\n" 68 " -c <cpuid>[<,cpuid>]...\n" 69 " cpuid should be 0,1,2...\n" 70 " Limit the CPU that collects data.\n" 71 " 0 means cpu0, 1 means cpu1 ...\n" 72 " --cpu-limit <percent>\n" 73 " Set the max percent of cpu time used for recording.\n" 74 " percent is in range [1-100], default is 25.\n" 75 " -d <sec>\n" 76 " stop in <sec> seconds. floating point number. seconds is in range [0.100-10000.0]\n" 77 " default is 10000.0\n" 78 " -f <freq>\n" 79 " Set event sampling frequency. default is 4000 samples every second.\n" 80 " check /proc/sys/kernel/perf_event_max_sample_rate for maximum allowed frequency\n" 81 " --period <num>\n" 82 " Set event sampling period for tracepoint events. recording one sample when <num> events happen.\n" 83 " The default <num> is 1\n" 84 " -e <event1[:<u|k>]>[,event1[:<u|k>]]...\n" 85 " Customize the name of the event that needs to be sampled.\n" 86 " The name can use the names listed in the list parameter.\n" 87 " It can also be represented by the value of 0x<hex>.\n" 88 " u - monitor user space events only\n" 89 " k - monitor kernel space events only\n" 90 " -g <event1[:<u|k>]>[,event1[:<u|k>]]...\n" 91 " Put the events into a group, can set multiple groups by multiple -g\n" 92 " PMU is required to report data in designated groups\n" 93 " limited by HW capability, too many events cannot be reported in the same sampling)\n" 94 " --no-inherit\n" 95 " Don't trace child processes.\n" 96 " -p <pid1>[,pid2]...\n" 97 " Limit the process id of the collection target. Conflicts with the -a option.\n" 98 " -t <tid1>[,tid2]...\n" 99 " Limit the thread id of the collection target. Conflicts with the -a option.\n" 100 " --offcpu\n" 101 " Trace when threads are scheduled off cpu.\n" 102 " -j <branch_filter1>[,branch_filter2]...\n" 103 " taken branch stack sampling, filter can be:\n" 104 " any: any type of branch\n" 105 " any_call: any function call or system call\n" 106 " any_ret: any function return or system call return\n" 107 " ind_call: any indirect branch\n" 108 " call: direct calls, including far (to/from kernel) calls\n" 109 " u: only when the branch target is at the user level\n" 110 " k: only when the branch target is in the kernel\n" 111 " requires at least one of any, any_call, any_ret, ind_call\n" 112 " -s / --call-stack <fp|dwarf[,size]>\n" 113 " Setup and enable call stack (stack chain/backtrace) recording, Default is 'fp'.\n" 114 " the value can be:\n" 115 " fp: frame pointer\n" 116 " dwarf: DWARF's CFI - Call Frame Information\n" 117 " 'dwarf,size' set sample stack size, size should be in 8~65528 and 8 byte aligned. \n" 118 " as the method to collect the information used to show the call stacks.\n" 119 " --delay-unwind\n" 120 " If '-s dwarf' used, stack will be unwind while recording, use this option to switch\n" 121 " to unwind after recording.\n" 122 " --disable-unwind\n" 123 " If '-s dwarf' is used, stack will be unwind while recording by default\n" 124 " use this option to disable unwinding.\n" 125 " --disable-callstack-expand\n" 126 " If '-s dwarf' is used, to break the 64k stack limit, callstack is merged by default\n" 127 " to build more complete call stack. that may not be correct sometimes.\n" 128 " --clockid <clock_id>\n" 129 " Set the clock id to use for the various time fields in the perf_event_type records.\n" 130 " monotonic and monotonic_raw are supported,\n" 131 " some events might also allow boottime, realtime and clock_tai.\n" 132 " --symbol-dir <dir>\n" 133 " Set directory to look for symbol files, used for unwinding. \n" 134 " -m <mmap_pages>\n" 135 " Number of the mmap pages, used to receiving record data from kernel,\n" 136 " must be a power of two, rang[2,1024], default is 1024.\n" 137 " --app <package_name>\n" 138 " Collect profile info for an OHOS app, the app must be debuggable.\n" 139 " Record will exit if the process is not started within 10 seconds.\n" 140 " --data-limit <SIZE[K|M|G]>\n" 141 " Stop recording after SIZE bytes of records. Default is unlimited.\n" 142 " -o <output_file_name>\n" 143 " Set output file name, default is /data/local/tmp/perf.data.\n" 144 " -z\n" 145 " Compress record data.\n" 146 " --verbose\n" 147 " Show more detailed reports.\n" 148 " --control <command>\n" 149 " Control sampling by <command>, the <command> can be:\n" 150 " prepare: set arguments and prepare sampling\n" 151 " start: start sampling\n" 152 " pause: pause sampling\n" 153 " resume: resume sampling\n" 154 " stop: stop sampling\n" 155 ) 156 // clang-format on 157 { 158 } 159 160 ~SubCommandRecord(); 161 bool OnSubCommand(std::vector<std::string> &args) override; 162 bool ParseOption(std::vector<std::string> &args) override; 163 void DumpOptions(void) const override; 164 165 static bool RegisterSubCommandRecord(void); 166 167 private: 168 PerfEvents perfEvents_; 169 170 bool targetSystemWide_ = false; 171 bool compressData_ = false; 172 bool noInherit_ = false; 173 bool excludeHiperf_ = false; 174 bool offCPU_ = false; 175 bool delayUnwind_ = false; 176 bool disableUnwind_ = false; 177 bool disableCallstackExpend_ = false; 178 bool verboseReport_ = false; 179 float timeStopSec_ = PerfEvents::DEFAULT_TIMEOUT; 180 int frequency_ = 0; 181 int period_ = 0; 182 int cpuPercent_ = DEFAULT_CPU_PERCENT; 183 int mmapPages_ = MAX_PERF_MMAP_PAGE; 184 std::vector<std::string> symbolDir_ = {}; 185 std::string outputFilename_ = "/data/local/tmp/perf.data"; 186 std::string appPackage_ = {}; 187 std::string clockId_ = {}; 188 std::string strLimit_ = {}; 189 std::vector<pid_t> selectCpus_ = {}; 190 std::vector<pid_t> selectPids_ = {}; 191 std::vector<pid_t> selectTids_ = {}; 192 std::vector<std::string> selectEvents_ = {}; 193 std::vector<std::vector<std::string>> selectGroups_ = {}; 194 std::vector<std::string> callStackType_ = {}; 195 std::vector<std::string> vecBranchFilters_ = {}; 196 std::vector<std::string> trackedCommand_ = {}; 197 198 bool GetOptions(std::vector<std::string> &args); 199 bool CheckOptions(); 200 bool CheckDataLimitOption(); 201 bool CheckSelectCpuPidOption(); 202 bool GetOptionFrequencyAndPeriod(std::vector<std::string> &args); 203 204 bool isCallStackDwarf_ = false; 205 bool isCallStackFp_ = false; 206 uint32_t callStackDwarfSize_ = MAX_SAMPLE_STACK_SIZE; 207 uint64_t branchSampleType_ = 0; 208 uint64_t dataSizeLimit_ = 0; 209 bool isDataSizeLimitStop_ = false; 210 211 std::unique_ptr<PerfFileWriter> fileWriter_ = nullptr; 212 213 // for client 214 int clientPipeInput_ = -1; 215 int clientPipeOutput_ = -1; 216 std::thread clientCommandHanle_; 217 bool clientExit_ = false; 218 void ClientCommandHandle(); 219 bool ClientCommandResponse(bool OK); 220 bool IsSamplingRunning(); 221 // for cmdline client 222 std::string controlCmd_ = {}; 223 bool isFifoServer_ = false; 224 bool isFifoClient_ = false; 225 bool ProcessControl(); 226 bool CreateFifoServer(); 227 bool SendFifoAndWaitReply(const std::string &cmd); 228 bool WaitFifoReply(int fd); 229 void CloseClientThread(); 230 231 bool PreparePerfEvent(); 232 bool PrepareSysKernel(); 233 bool PrepareVirtualRuntime(); 234 235 size_t recordSamples_ = 0; 236 size_t recordNoSamples_ = 0; 237 // callback to process record 238 bool ProcessRecord(std::unique_ptr<PerfEventRecord>); 239 bool SaveRecord(std::unique_ptr<PerfEventRecord>); 240 241 // file format like as 0,1-3,4-6,7,8 242 uint32_t GetCountFromFile(const std::string &fileName); 243 std::string GetCpuDescFromFile(); 244 bool AddCpuFeature(); 245 void AddMemTotalFeature(); 246 void AddEventDescFeature(); 247 void AddRecordTimeFeature(); 248 void AddWorkloadCmdFeature(); 249 void AddCommandLineFeature(); 250 void AddCpuOffFeature(); 251 bool AddFeatureRecordFile(); 252 253 bool CreateInitRecordFile(bool compressData = false); 254 bool FinishWriteRecordFile(); 255 bool PostProcessRecordFile(); 256 bool RecordCompleted(); 257 #ifdef HIPERF_DEBUG_TIME 258 void ReportTime(); 259 #endif 260 261 bool CollectionSymbol(std::unique_ptr<PerfEventRecord> record); 262 263 bool SetPerfCpuMaxPercent(); 264 bool SetPerfMaxSampleRate(); 265 266 bool TraceOffCpu(); 267 bool ParseCallStackOption(const std::vector<std::string> &callStackType); 268 bool ParseDataLimitOption(const std::string &str); 269 bool ParseBranchSampleType(const std::vector<std::string> &vecBranchSampleTypes); 270 bool ParseControlCmd(const std::string cmd); 271 bool CheckTargetProcessOptions(); 272 bool CheckTargetPids(); 273 274 pid_t GetAppPackagePid(const std::string &appPackge); 275 276 VirtualRuntime virtualRuntime_; 277 #if USE_COLLECT_SYMBOLIC 278 std::unordered_set<uint64_t> kernelSymbolsHits_; 279 std::unordered_map<pid_t, std::unordered_set<uint64_t>> userSymbolsHits_; 280 void SymbolicHits(); 281 #endif 282 283 #ifdef HIPERF_DEBUG_TIME 284 std::chrono::microseconds prcessRecordTimes_ = std::chrono::microseconds::zero(); 285 std::chrono::microseconds saveRecordTimes_ = std::chrono::microseconds::zero(); 286 std::chrono::microseconds saveFeatureTimes_ = std::chrono::microseconds::zero(); 287 #endif 288 std::chrono::time_point<std::chrono::steady_clock> startSaveFileTimes_; 289 }; 290 } // namespace HiPerf 291 } // namespace Developtools 292 } // namespace OHOS 293 #endif 294