• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <inttypes.h>
18 #include <signal.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <sys/prctl.h>
22 
23 #include <algorithm>
24 #include <chrono>
25 #include <optional>
26 #include <set>
27 #include <string>
28 #include <string_view>
29 #include <vector>
30 
31 #include <android-base/file.h>
32 #include <android-base/logging.h>
33 #include <android-base/strings.h>
34 #include <android-base/unique_fd.h>
35 
36 #include "IOEventLoop.h"
37 #include "ProbeEvents.h"
38 #include "cmd_stat_impl.h"
39 #include "command.h"
40 #include "environment.h"
41 #include "event_attr.h"
42 #include "event_fd.h"
43 #include "event_selection_set.h"
44 #include "event_type.h"
45 #include "utils.h"
46 #include "workload.h"
47 
48 namespace simpleperf {
49 
50 using android::base::Split;
51 
52 static std::vector<std::string> default_measured_event_types{
53     "cpu-cycles",   "stalled-cycles-frontend", "stalled-cycles-backend",
54     "instructions", "branch-instructions",     "branch-misses",
55     "task-clock",   "context-switches",        "page-faults",
56 };
57 
58 static const std::unordered_map<std::string_view, std::pair<std::string_view, std::string_view>>
59     COMMON_EVENT_RATE_MAP = {
60         {"cache-misses", {"cache-references", "miss rate"}},
61         {"branch-misses", {"branch-instructions", "miss rate"}},
62 };
63 
64 static const std::unordered_map<std::string_view, std::pair<std::string_view, std::string_view>>
65     ARM_EVENT_RATE_MAP = {
66         // Refer to "D6.10.5 Meaningful ratios between common microarchitectural events" in ARMv8
67         // specification.
68         {"raw-l1i-cache-refill", {"raw-l1i-cache", "level 1 instruction cache refill rate"}},
69         {"raw-l1i-tlb-refill", {"raw-l1i-tlb", "level 1 instruction TLB refill rate"}},
70         {"raw-l1d-cache-refill", {"raw-l1d-cache", "level 1 data or unified cache refill rate"}},
71         {"raw-l1d-tlb-refill", {"raw-l1d-tlb", "level 1 data or unified TLB refill rate"}},
72         {"raw-l2d-cache-refill", {"raw-l2d-cache", "level 2 data or unified cache refill rate"}},
73         {"raw-l2i-cache-refill", {"raw-l2i-cache", "level 2 instruction cache refill rate"}},
74         {"raw-l3d-cache-refill", {"raw-l3d-cache", "level 3 data or unified cache refill rate"}},
75         {"raw-l2d-tlb-refill", {"raw-l2d-tlb", "level 2 data or unified TLB refill rate"}},
76         {"raw-l2i-tlb-refill", {"raw-l2i-tlb", "level 2 instruction TLB refill rate"}},
77         {"raw-bus-access", {"raw-bus-cycles", "bus accesses per cycle"}},
78         {"raw-ll-cache-miss", {"raw-ll-cache", "last level data or unified cache refill rate"}},
79         {"raw-dtlb-walk", {"raw-l1d-tlb", "data TLB miss rate"}},
80         {"raw-itlb-walk", {"raw-l1i-tlb", "instruction TLB miss rate"}},
81         {"raw-ll-cache-miss-rd", {"raw-ll-cache-rd", "memory read operation miss rate"}},
82         {"raw-remote-access-rd",
83          {"raw-remote-access", "read accesses to another socket in a multi-socket system"}},
84         // Refer to "Table K3-2 Relationship between REFILL events and associated access events" in
85         // ARMv8 specification.
86         {"raw-l1d-cache-refill-rd", {"raw-l1d-cache-rd", "level 1 cache refill rate, read"}},
87         {"raw-l1d-cache-refill-wr", {"raw-l1d-cache-wr", "level 1 cache refill rate, write"}},
88         {"raw-l1d-tlb-refill-rd", {"raw-l1d-tlb-rd", "level 1 TLB refill rate, read"}},
89         {"raw-l1d-tlb-refill-wr", {"raw-l1d-tlb-wr", "level 1 TLB refill rate, write"}},
90         {"raw-l2d-cache-refill-rd", {"raw-l2d-cache-rd", "level 2 data cache refill rate, read"}},
91         {"raw-l2d-cache-refill-wr", {"raw-l2d-cache-wr", "level 2 data cache refill rate, write"}},
92         {"raw-l2d-tlb-refill-rd", {"raw-l2d-tlb-rd", "level 2 data TLB refill rate, read"}},
93 };
94 
ReadableCountValue(bool csv)95 std::string CounterSummary::ReadableCountValue(bool csv) {
96   if (type_name == "cpu-clock" || type_name == "task-clock") {
97     // Convert nanoseconds to milliseconds.
98     double value = count / 1e6;
99     return android::base::StringPrintf("%lf(ms)", value);
100   }
101   if (csv) {
102     return android::base::StringPrintf("%" PRIu64, count);
103   }
104   return ReadableCount(count);
105 }
106 
FindSummary(const std::string & type_name,const std::string & modifier,const ThreadInfo * thread,int cpu)107 const CounterSummary* CounterSummaries::FindSummary(const std::string& type_name,
108                                                     const std::string& modifier,
109                                                     const ThreadInfo* thread, int cpu) {
110   for (const auto& s : summaries_) {
111     if (s.type_name == type_name && s.modifier == modifier && s.thread == thread && s.cpu == cpu) {
112       return &s;
113     }
114   }
115   return nullptr;
116 }
117 
AutoGenerateSummaries()118 void CounterSummaries::AutoGenerateSummaries() {
119   for (size_t i = 0; i < summaries_.size(); ++i) {
120     const CounterSummary& s = summaries_[i];
121     if (s.modifier == "u") {
122       const CounterSummary* other = FindSummary(s.type_name, "k", s.thread, s.cpu);
123       if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
124         if (FindSummary(s.type_name, "", s.thread, s.cpu) == nullptr) {
125           summaries_.emplace_back(s.type_name, "", s.group_id, s.thread, s.cpu,
126                                   s.count + other->count, s.runtime_in_ns, s.scale, true, csv_);
127         }
128       }
129     }
130   }
131 }
132 
GenerateComments(double duration_in_sec)133 void CounterSummaries::GenerateComments(double duration_in_sec) {
134   for (auto& s : summaries_) {
135     s.comment = GetCommentForSummary(s, duration_in_sec);
136   }
137 }
138 
Show(FILE * fp)139 void CounterSummaries::Show(FILE* fp) {
140   bool show_thread = !summaries_.empty() && summaries_[0].thread != nullptr;
141   bool show_cpu = !summaries_.empty() && summaries_[0].cpu != -1;
142   if (csv_) {
143     ShowCSV(fp, show_thread, show_cpu);
144   } else {
145     ShowText(fp, show_thread, show_cpu);
146   }
147 }
148 
ShowCSV(FILE * fp,bool show_thread,bool show_cpu)149 void CounterSummaries::ShowCSV(FILE* fp, bool show_thread, bool show_cpu) {
150   for (auto& s : summaries_) {
151     if (show_thread) {
152       fprintf(fp, "%s,%d,%d,", s.thread->name.c_str(), s.thread->pid, s.thread->tid);
153     }
154     if (show_cpu) {
155       fprintf(fp, "%d,", s.cpu);
156     }
157     fprintf(fp, "%s,%s,%s,%s\n", s.readable_count.c_str(), s.Name().c_str(), s.comment.c_str(),
158             (s.auto_generated ? "(generated)," : ""));
159   }
160 }
161 
ShowText(FILE * fp,bool show_thread,bool show_cpu)162 void CounterSummaries::ShowText(FILE* fp, bool show_thread, bool show_cpu) {
163   std::vector<std::string> titles;
164 
165   if (show_thread) {
166     titles = {"thread_name", "pid", "tid"};
167   }
168   if (show_cpu) {
169     titles.emplace_back("cpu");
170   }
171   titles.emplace_back("count");
172   titles.emplace_back("event_name");
173   titles.emplace_back(" # count / runtime");
174 
175   std::vector<size_t> width(titles.size(), 0);
176 
177   auto adjust_width = [](size_t& w, size_t size) { w = std::max(w, size); };
178 
179   // The last title is too long. Don't include it for width adjustment.
180   for (size_t i = 0; i + 1 < titles.size(); i++) {
181     adjust_width(width[i], titles[i].size());
182   }
183 
184   for (auto& s : summaries_) {
185     size_t i = 0;
186     if (show_thread) {
187       adjust_width(width[i++], s.thread->name.size());
188       adjust_width(width[i++], std::to_string(s.thread->pid).size());
189       adjust_width(width[i++], std::to_string(s.thread->tid).size());
190     }
191     if (show_cpu) {
192       adjust_width(width[i++], std::to_string(s.cpu).size());
193     }
194     adjust_width(width[i++], s.readable_count.size());
195     adjust_width(width[i++], s.Name().size());
196     adjust_width(width[i++], s.comment.size());
197   }
198 
199   fprintf(fp, "# ");
200   for (size_t i = 0; i < titles.size(); i++) {
201     if (titles[i] == "count") {
202       fprintf(fp, "%*s", static_cast<int>(width[i]), titles[i].c_str());
203     } else {
204       fprintf(fp, "%-*s", static_cast<int>(width[i]), titles[i].c_str());
205     }
206     if (i + 1 < titles.size()) {
207       fprintf(fp, "  ");
208     }
209   }
210   fprintf(fp, "\n");
211 
212   for (auto& s : summaries_) {
213     size_t i = 0;
214     if (show_thread) {
215       fprintf(fp, "  %-*s", static_cast<int>(width[i++]), s.thread->name.c_str());
216       fprintf(fp, "  %-*d", static_cast<int>(width[i++]), s.thread->pid);
217       fprintf(fp, "  %-*d", static_cast<int>(width[i++]), s.thread->tid);
218     }
219     if (show_cpu) {
220       fprintf(fp, "  %-*d", static_cast<int>(width[i++]), s.cpu);
221     }
222     fprintf(fp, "  %*s  %-*s   # %-*s%s\n", static_cast<int>(width[i]), s.readable_count.c_str(),
223             static_cast<int>(width[i + 1]), s.Name().c_str(), static_cast<int>(width[i + 2]),
224             s.comment.c_str(), (s.auto_generated ? " (generated)" : ""));
225   }
226 }
227 
GetCommentForSummary(const CounterSummary & s,double duration_in_sec)228 std::string CounterSummaries::GetCommentForSummary(const CounterSummary& s,
229                                                    double duration_in_sec) {
230   char sap_mid;
231   if (csv_) {
232     sap_mid = ',';
233   } else {
234     sap_mid = ' ';
235   }
236   if (s.type_name == "task-clock") {
237     double run_sec = s.count / 1e9;
238     double used_cpus = run_sec / duration_in_sec;
239     return android::base::StringPrintf("%f%ccpus used", used_cpus, sap_mid);
240   }
241   if (s.type_name == "cpu-clock") {
242     return "";
243   }
244   if (s.type_name == "cpu-cycles") {
245     if (s.runtime_in_ns == 0) {
246       return "";
247     }
248     double ghz = static_cast<double>(s.count) / s.runtime_in_ns;
249     return android::base::StringPrintf("%f%cGHz", ghz, sap_mid);
250   }
251   if (s.type_name == "instructions" && s.count != 0) {
252     const CounterSummary* other = FindSummary("cpu-cycles", s.modifier, s.thread, s.cpu);
253     if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
254       double cpi = static_cast<double>(other->count) / s.count;
255       return android::base::StringPrintf("%f%ccycles per instruction", cpi, sap_mid);
256     }
257   }
258   std::string rate_comment = GetRateComment(s, sap_mid);
259   if (!rate_comment.empty()) {
260     return rate_comment;
261   }
262   if (s.runtime_in_ns == 0) {
263     return "";
264   }
265   double runtime_in_sec = static_cast<double>(s.runtime_in_ns) / 1e9;
266   double rate = s.count / runtime_in_sec;
267   if (rate >= 1e9 - 1e5) {
268     return android::base::StringPrintf("%.3f%cG/sec", rate / 1e9, sap_mid);
269   }
270   if (rate >= 1e6 - 1e2) {
271     return android::base::StringPrintf("%.3f%cM/sec", rate / 1e6, sap_mid);
272   }
273   if (rate >= 1e3) {
274     return android::base::StringPrintf("%.3f%cK/sec", rate / 1e3, sap_mid);
275   }
276   return android::base::StringPrintf("%.3f%c/sec", rate, sap_mid);
277 }
278 
GetRateComment(const CounterSummary & s,char sep)279 std::string CounterSummaries::GetRateComment(const CounterSummary& s, char sep) {
280   std::string_view miss_event_name = s.type_name;
281   std::string event_name;
282   std::string rate_desc;
283   if (auto it = COMMON_EVENT_RATE_MAP.find(miss_event_name); it != COMMON_EVENT_RATE_MAP.end()) {
284     event_name = it->second.first;
285     rate_desc = it->second.second;
286   }
287   if (event_name.empty() && (GetTargetArch() == ARCH_ARM || GetTargetArch() == ARCH_ARM64)) {
288     if (auto it = ARM_EVENT_RATE_MAP.find(miss_event_name); it != ARM_EVENT_RATE_MAP.end()) {
289       event_name = it->second.first;
290       rate_desc = it->second.second;
291     }
292   }
293   if (event_name.empty() && android::base::ConsumeSuffix(&miss_event_name, "-misses")) {
294     event_name = std::string(miss_event_name) + "s";
295     rate_desc = "miss rate";
296   }
297   if (!event_name.empty()) {
298     const CounterSummary* other = FindSummary(event_name, s.modifier, s.thread, s.cpu);
299     if (other != nullptr && other->IsMonitoredAtTheSameTime(s) && other->count != 0) {
300       double miss_rate = static_cast<double>(s.count) / other->count;
301       return android::base::StringPrintf("%f%%%c%s", miss_rate * 100, sep, rate_desc.c_str());
302     }
303   }
304   return "";
305 }
306 
307 namespace {
308 
309 // devfreq may use performance counters to calculate memory latency (as in
310 // drivers/devfreq/arm-memlat-mon.c). Hopefully we can get more available counters by asking devfreq
311 // to not use the memory latency governor temporarily.
312 class DevfreqCounters {
313  public:
Use()314   bool Use() {
315     if (!IsRoot()) {
316       LOG(ERROR) << "--use-devfreq-counters needs root permission to set devfreq governors";
317       return false;
318     }
319     std::string devfreq_dir = "/sys/class/devfreq/";
320     for (auto& name : GetSubDirs(devfreq_dir)) {
321       std::string governor_path = devfreq_dir + name + "/governor";
322       if (IsRegularFile(governor_path)) {
323         std::string governor;
324         if (!android::base::ReadFileToString(governor_path, &governor)) {
325           LOG(ERROR) << "failed to read " << governor_path;
326           return false;
327         }
328         governor = android::base::Trim(governor);
329         if (governor == "mem_latency") {
330           if (!android::base::WriteStringToFile("performance", governor_path)) {
331             PLOG(ERROR) << "failed to write " << governor_path;
332             return false;
333           }
334           mem_latency_governor_paths_.emplace_back(std::move(governor_path));
335         }
336       }
337     }
338     return true;
339   }
340 
~DevfreqCounters()341   ~DevfreqCounters() {
342     for (auto& path : mem_latency_governor_paths_) {
343       android::base::WriteStringToFile("mem_latency", path);
344     }
345   }
346 
347  private:
348   std::vector<std::string> mem_latency_governor_paths_;
349 };
350 
351 // Periodically scan /proc for new threads. If found, create new perf event files for the
352 // new threads.
353 class NewThreadMonitor {
354  private:
355   const int SCAN_INTERVAL_US = 1;
356 
357  public:
NewThreadMonitor(EventSelectionSet & event_selection_set,bool monitor_all_processes,const std::set<pid_t> & monitored_processes,std::unordered_map<pid_t,ThreadInfo> & threads)358   NewThreadMonitor(EventSelectionSet& event_selection_set, bool monitor_all_processes,
359                    const std::set<pid_t>& monitored_processes,
360                    std::unordered_map<pid_t, ThreadInfo>& threads)
361       : event_selection_set_(event_selection_set),
362         monitor_all_processes_(monitor_all_processes),
363         monitored_processes_(monitored_processes),
364         threads_(threads) {}
365 
Start()366   bool Start() {
367     IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
368     timeval tv;
369     tv.tv_sec = 0;
370     tv.tv_usec = SCAN_INTERVAL_US;
371     if (!loop->AddPeriodicEvent(tv, std::bind(&NewThreadMonitor::Scan, this))) {
372       return false;
373     }
374     // Ensure perf event files opened for new threads are immediately enabled.
375     event_selection_set_.SetEnableCondition(true, false);
376     return true;
377   }
378 
379  private:
Scan()380   bool Scan() {
381     std::unordered_set<pid_t> new_tids;
382     if (monitor_all_processes_) {
383       for (int pid : GetAllProcesses()) {
384         for (auto tid : GetThreadsInProcess(pid)) {
385           if (threads_.count(tid) == 0) {
386             new_tids.insert(tid);
387           }
388         }
389       }
390     } else {
391       for (auto tid : monitored_processes_) {
392         for (auto tid : GetThreadsInProcess(tid)) {
393           if (threads_.count(tid) == 0) {
394             new_tids.insert(tid);
395           }
396         }
397       }
398     }
399     std::set<pid_t> open_event_file_tids;
400     for (auto tid : new_tids) {
401       ThreadInfo info;
402       if (ReadThreadNameAndPid(tid, &info.name, &info.pid)) {
403         info.tid = tid;
404         threads_[tid] = std::move(info);
405         open_event_file_tids.insert(tid);
406       }
407     }
408     if (!open_event_file_tids.empty()) {
409       // It's okay for OpenEventFilesForThreads() to return false. It happens
410       // when the new threads exit before we can open event files for them.
411       event_selection_set_.OpenEventFilesForThreads(open_event_file_tids);
412     }
413     return true;
414   }
415 
416  private:
417   EventSelectionSet& event_selection_set_;
418   bool monitor_all_processes_ = false;
419   std::set<pid_t> monitored_processes_;
420   std::unordered_map<pid_t, ThreadInfo>& threads_;
421 };
422 
423 class StatCommand : public Command {
424  public:
StatCommand()425   StatCommand()
426       : Command(
427             "stat", "gather performance counter information",
428             // clang-format off
429 "Usage: simpleperf stat [options] [command [command-args]]\n"
430 "       Gather performance counter information of running [command].\n"
431 "       And -a/-p/-t option can be used to change target of counter information.\n"
432 "-a           Collect system-wide information.\n"
433 #if defined(__ANDROID__)
434 "--app package_name    Profile the process of an Android application.\n"
435 "                      On non-rooted devices, the app must be debuggable,\n"
436 "                      because we use run-as to switch to the app's context.\n"
437 #endif
438 "--cpu cpu_item1,cpu_item2,...  Monitor events on selected cpus. cpu_item can be a number like\n"
439 "                               1, or a range like 0-3. A --cpu option affects all event types\n"
440 "                               following it until meeting another --cpu option.\n"
441 "--csv            Write report in comma separate form.\n"
442 "--duration time_in_sec  Monitor for time_in_sec seconds instead of running\n"
443 "                        [command]. Here time_in_sec may be any positive\n"
444 "                        floating point number.\n"
445 "--interval time_in_ms   Print stat for every time_in_ms milliseconds.\n"
446 "                        Here time_in_ms may be any positive floating point\n"
447 "                        number. Simpleperf prints total values from the\n"
448 "                        starting point. But this can be changed by\n"
449 "                        --interval-only-values.\n"
450 "--interval-only-values  Print numbers of events happened in each interval.\n"
451 "-e event1[:modifier1],event2[:modifier2],...\n"
452 "                 Select a list of events to count. An event can be:\n"
453 "                   1) an event name listed in `simpleperf list`;\n"
454 "                   2) a raw PMU event in rN format. N is a hex number.\n"
455 "                      For example, r1b selects event number 0x1b.\n"
456 "                 Modifiers can be added to define how the event should be\n"
457 "                 monitored. Possible modifiers are:\n"
458 "                   u - monitor user space events only\n"
459 "                   k - monitor kernel space events only\n"
460 "--group event1[:modifier],event2[:modifier2],...\n"
461 "             Similar to -e option. But events specified in the same --group\n"
462 "             option are monitored as a group, and scheduled in and out at the\n"
463 "             same time.\n"
464 "--kprobe kprobe_event1,kprobe_event2,...\n"
465 "             Add kprobe events during stating. The kprobe_event format is in\n"
466 "             Documentation/trace/kprobetrace.rst in the kernel. Examples:\n"
467 "               'p:myprobe do_sys_openat2 $arg2:string'   - add event kprobes:myprobe\n"
468 "               'r:myretprobe do_sys_openat2 $retval:s64' - add event kprobes:myretprobe\n"
469 "--uprobe uprobe_event1,uprobe_event2,...\n"
470 "             Add uprobe events during stating. The uprobe_event format is in\n"
471 "             Documentation/trace/uprobetracer.rst in the kernel. Examples:\n"
472 "               'p:myprobe /system/lib64/libc.so:0x1000'\n"
473 "                   - add event uprobes:myprobe\n"
474 "               'r:myretprobe /system/lib64/libc.so:0x1000'\n"
475 "                   - add event uprobes:myretprobe\n"
476 "--no-inherit     Don't stat created child threads/processes.\n"
477 "-o output_filename  Write report to output_filename instead of standard output.\n"
478 "--per-core       Print counters for each cpu core.\n"
479 "--per-thread     Print counters for each thread.\n"
480 "--monitor-new-thread  Print counters for new threads created after stating. It should be used\n"
481 "                      With --per-thread and --no-inherit.\n"
482 "-p pid_or_process_name_regex1,pid_or_process_name_regex2,...\n"
483 "                      Stat events on existing processes. Processes are searched either by pid\n"
484 "                      or process name regex. Mutually exclusive with -a.\n"
485 "-t tid1,tid2,...      Stat events on existing threads. Mutually exclusive with -a.\n"
486 "--tp-filter filter_string    Set filter_string for the previous tracepoint event.\n"
487 "                             Format is in Documentation/trace/events.rst in the kernel.\n"
488 "                             An example: 'prev_comm != \"simpleperf\" && (prev_pid > 1)'.\n"
489 "--print-hw-counter    Test and print CPU PMU hardware counters available on the device.\n"
490 "--sort key1,key2,...  Select keys used to sort the report, used when --per-thread\n"
491 "                      or --per-core appears. The appearance order of keys decides\n"
492 "                      the order of keys used to sort the report.\n"
493 "                      Possible keys include:\n"
494 "                        count             -- event count for each entry\n"
495 "                        count_per_thread  -- event count for a thread on all cpus\n"
496 "                        cpu               -- cpu id\n"
497 "                        pid               -- process id\n"
498 "                        tid               -- thread id\n"
499 "                        comm              -- thread name\n"
500 "                      The default sort keys are:\n"
501 "                        count_per_thread,tid,cpu,count\n"
502 #if defined(__ANDROID__)
503 "--use-devfreq-counters    On devices with Qualcomm SOCs, some hardware counters may be used\n"
504 "                          to monitor memory latency (in drivers/devfreq/arm-memlat-mon.c),\n"
505 "                          making fewer counters available to users. This option asks devfreq\n"
506 "                          to temporarily release counters by replacing memory-latency governor\n"
507 "                          with performance governor. It affects memory latency during profiling,\n"
508 "                          and may cause wedged power if simpleperf is killed in between.\n"
509 #endif
510 "--verbose        Show result in verbose mode.\n"
511 #if 0
512 // Below options are only used internally and shouldn't be visible to the public.
513 "--in-app         We are already running in the app's context.\n"
514 "--tracepoint-events file_name   Read tracepoint events from [file_name] instead of tracefs.\n"
515 "--out-fd <fd>    Write output to a file descriptor.\n"
516 "--stop-signal-fd <fd>   Stop stating when fd is readable.\n"
517 #endif
518             // clang-format on
519             ),
520         verbose_mode_(false),
521         system_wide_collection_(false),
522         child_inherit_(true),
523         duration_in_sec_(0),
524         interval_in_ms_(0),
525         interval_only_values_(false),
526         event_selection_set_(true),
527         csv_(false),
528         in_app_context_(false) {
529     // Die if parent exits.
530     prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
531     // Set default sort keys. Full key list is in BuildSummaryComparator().
532     sort_keys_ = {"count_per_thread", "tid", "cpu", "count"};
533   }
534 
535   bool Run(const std::vector<std::string>& args);
536 
537  private:
538   bool ParseOptions(const std::vector<std::string>& args, std::vector<std::string>* non_option_args,
539                     ProbeEvents& probe_events);
540   void PrintHardwareCounters();
541   bool AddDefaultMeasuredEventTypes();
542   void SetEventSelectionFlags();
543   void MonitorEachThread(std::unique_ptr<Workload>& workload);
544   void AdjustToIntervalOnlyValues(std::vector<CountersInfo>& counters);
545   bool ShowCounters(const std::vector<CountersInfo>& counters, double duration_in_sec, FILE* fp);
546   void CheckHardwareCounterMultiplexing();
547   void PrintWarningForInaccurateEvents();
548 
549   bool verbose_mode_;
550   bool system_wide_collection_;
551   bool child_inherit_;
552   double duration_in_sec_;
553   double interval_in_ms_;
554   bool interval_only_values_;
555   std::vector<std::vector<CounterSum>> last_sum_values_;
556   EventSelectionSet event_selection_set_;
557   std::string output_filename_;
558   android::base::unique_fd out_fd_;
559   bool csv_;
560   std::string app_package_name_;
561   bool in_app_context_;
562   android::base::unique_fd stop_signal_fd_;
563   bool use_devfreq_counters_ = false;
564 
565   bool report_per_core_ = false;
566   bool report_per_thread_ = false;
567   bool monitor_new_thread_ = false;
568   // used to report event count for each thread
569   std::unordered_map<pid_t, ThreadInfo> thread_info_;
570   // used to sort report
571   std::vector<std::string> sort_keys_;
572   std::optional<SummaryComparator> summary_comparator_;
573   bool print_hw_counter_ = false;
574 };
575 
Run(const std::vector<std::string> & args)576 bool StatCommand::Run(const std::vector<std::string>& args) {
577   if (!CheckPerfEventLimit()) {
578     return false;
579   }
580   AllowMoreOpenedFiles();
581 
582   // 1. Parse options, and use default measured event types if not given.
583   std::vector<std::string> workload_args;
584   ProbeEvents probe_events(event_selection_set_);
585   if (!ParseOptions(args, &workload_args, probe_events)) {
586     return false;
587   }
588   if (print_hw_counter_) {
589     PrintHardwareCounters();
590     return true;
591   }
592   if (!app_package_name_.empty() && !in_app_context_) {
593     if (!IsRoot()) {
594       return RunInAppContext(app_package_name_, "stat", args, workload_args.size(),
595                              output_filename_, !event_selection_set_.GetTracepointEvents().empty());
596     }
597   }
598   DevfreqCounters devfreq_counters;
599   if (use_devfreq_counters_) {
600     if (!devfreq_counters.Use()) {
601       return false;
602     }
603   }
604   if (event_selection_set_.empty()) {
605     if (!AddDefaultMeasuredEventTypes()) {
606       return false;
607     }
608   }
609   SetEventSelectionFlags();
610 
611   // 2. Create workload.
612   std::unique_ptr<Workload> workload;
613   if (!workload_args.empty()) {
614     workload = Workload::CreateWorkload(workload_args);
615     if (workload == nullptr) {
616       return false;
617     }
618   }
619   bool need_to_check_targets = false;
620   if (system_wide_collection_) {
621     if (report_per_thread_) {
622       event_selection_set_.AddMonitoredProcesses(GetAllProcesses());
623     } else {
624       event_selection_set_.AddMonitoredThreads({-1});
625     }
626   } else if (!event_selection_set_.HasMonitoredTarget()) {
627     if (workload != nullptr) {
628       event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
629       event_selection_set_.SetEnableCondition(false, true);
630     } else if (!app_package_name_.empty()) {
631       std::set<pid_t> pids = WaitForAppProcesses(app_package_name_);
632       event_selection_set_.AddMonitoredProcesses(pids);
633     } else {
634       LOG(ERROR) << "No threads to monitor. Try `simpleperf help stat` for help\n";
635       return false;
636     }
637   } else {
638     need_to_check_targets = true;
639   }
640   std::unique_ptr<NewThreadMonitor> new_thread_monitor;
641   if (monitor_new_thread_) {
642     new_thread_monitor.reset(new NewThreadMonitor(event_selection_set_, system_wide_collection_,
643                                                   event_selection_set_.GetMonitoredProcesses(),
644                                                   thread_info_));
645   }
646   if (report_per_thread_) {
647     MonitorEachThread(workload);
648   }
649 
650   // 3. Open perf_event_files and output file if defined.
651   if (!event_selection_set_.OpenEventFiles()) {
652     return false;
653   }
654   std::unique_ptr<FILE, decltype(&fclose)> fp_holder(nullptr, fclose);
655   if (!output_filename_.empty()) {
656     fp_holder.reset(fopen(output_filename_.c_str(), "we"));
657     if (fp_holder == nullptr) {
658       PLOG(ERROR) << "failed to open " << output_filename_;
659       return false;
660     }
661   } else if (out_fd_ != -1) {
662     fp_holder.reset(fdopen(out_fd_.release(), "we"));
663     if (fp_holder == nullptr) {
664       PLOG(ERROR) << "failed to write output.";
665       return false;
666     }
667   }
668   FILE* fp = fp_holder ? fp_holder.get() : stdout;
669 
670   // 4. Add signal/periodic Events.
671   IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
672   std::chrono::time_point<std::chrono::steady_clock> start_time;
673   std::vector<CountersInfo> counters;
674   if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
675     return false;
676   }
677   auto exit_loop_callback = [loop]() { return loop->ExitLoop(); };
678   if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP}, exit_loop_callback)) {
679     return false;
680   }
681   if (stop_signal_fd_ != -1) {
682     if (!loop->AddReadEvent(stop_signal_fd_, exit_loop_callback)) {
683       return false;
684     }
685   }
686   if (duration_in_sec_ != 0) {
687     if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_), exit_loop_callback)) {
688       return false;
689     }
690   }
691   auto print_counters = [&]() {
692     auto end_time = std::chrono::steady_clock::now();
693     if (!event_selection_set_.ReadCounters(&counters)) {
694       return false;
695     }
696     double duration_in_sec =
697         std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time).count();
698     if (interval_only_values_) {
699       AdjustToIntervalOnlyValues(counters);
700     }
701     if (!ShowCounters(counters, duration_in_sec, fp)) {
702       return false;
703     }
704     return true;
705   };
706 
707   if (interval_in_ms_ != 0) {
708     if (!loop->AddPeriodicEvent(SecondToTimeval(interval_in_ms_ / 1000.0), print_counters)) {
709       return false;
710     }
711   }
712   if (new_thread_monitor && !new_thread_monitor->Start()) {
713     return false;
714   }
715 
716   // 5. Count events while workload running.
717   start_time = std::chrono::steady_clock::now();
718   if (workload != nullptr && !workload->Start()) {
719     return false;
720   }
721   if (!loop->RunLoop()) {
722     return false;
723   }
724 
725   // 6. Read and print counters.
726   if (interval_in_ms_ == 0) {
727     if (!print_counters()) {
728       return false;
729     }
730   }
731 
732   // 7. Print warnings when needed.
733   event_selection_set_.CloseEventFiles();
734   CheckHardwareCounterMultiplexing();
735   PrintWarningForInaccurateEvents();
736 
737   return true;
738 }
739 
ParseOptions(const std::vector<std::string> & args,std::vector<std::string> * non_option_args,ProbeEvents & probe_events)740 bool StatCommand::ParseOptions(const std::vector<std::string>& args,
741                                std::vector<std::string>* non_option_args,
742                                ProbeEvents& probe_events) {
743   OptionValueMap options;
744   std::vector<std::pair<OptionName, OptionValue>> ordered_options;
745 
746   if (!PreprocessOptions(args, GetStatCmdOptionFormats(), &options, &ordered_options,
747                          non_option_args)) {
748     return false;
749   }
750 
751   // Process options.
752   system_wide_collection_ = options.PullBoolValue("-a");
753 
754   if (auto value = options.PullValue("--app"); value) {
755     app_package_name_ = value->str_value;
756   }
757   csv_ = options.PullBoolValue("--csv");
758 
759   if (!options.PullDoubleValue("--duration", &duration_in_sec_, 1e-9)) {
760     return false;
761   }
762   if (!options.PullDoubleValue("--interval", &interval_in_ms_, 1e-9)) {
763     return false;
764   }
765   interval_only_values_ = options.PullBoolValue("--interval-only-values");
766 
767   in_app_context_ = options.PullBoolValue("--in-app");
768   for (const OptionValue& value : options.PullValues("--kprobe")) {
769     for (const auto& cmd : Split(value.str_value, ",")) {
770       if (!probe_events.AddProbe(ProbeEventType::kKprobe, cmd)) {
771         return false;
772       }
773     }
774   }
775   for (const OptionValue& value : options.PullValues("--uprobe")) {
776     for (const auto& cmd : Split(value.str_value, ",")) {
777       if (!probe_events.AddProbe(ProbeEventType::kUprobe, cmd)) {
778         return false;
779       }
780     }
781   }
782   monitor_new_thread_ = options.PullBoolValue("--monitor-new-thread");
783   child_inherit_ = !options.PullBoolValue("--no-inherit");
784 
785   if (auto value = options.PullValue("-o"); value) {
786     output_filename_ = value->str_value;
787   }
788   if (auto value = options.PullValue("--out-fd"); value) {
789     out_fd_.reset(static_cast<int>(value->uint_value));
790   }
791 
792   report_per_core_ = options.PullBoolValue("--per-core");
793   report_per_thread_ = options.PullBoolValue("--per-thread");
794 
795   if (auto strs = options.PullStringValues("-p"); !strs.empty()) {
796     if (auto pids = GetPidsFromStrings(strs, true, true); pids) {
797       event_selection_set_.AddMonitoredProcesses(pids.value());
798     } else {
799       return false;
800     }
801   }
802   print_hw_counter_ = options.PullBoolValue("--print-hw-counter");
803 
804   if (auto value = options.PullValue("--sort"); value) {
805     sort_keys_ = Split(value->str_value, ",");
806   }
807 
808   if (auto value = options.PullValue("--stop-signal-fd"); value) {
809     stop_signal_fd_.reset(static_cast<int>(value->uint_value));
810   }
811 
812   for (const OptionValue& value : options.PullValues("-t")) {
813     if (auto tids = GetTidsFromString(value.str_value, true); tids) {
814       event_selection_set_.AddMonitoredThreads(tids.value());
815     } else {
816       return false;
817     }
818   }
819 
820   if (auto value = options.PullValue("--tracepoint-events"); value) {
821     if (!EventTypeManager::Instance().ReadTracepointsFromFile(value->str_value)) {
822       return false;
823     }
824   }
825 
826   use_devfreq_counters_ = options.PullBoolValue("--use-devfreq-counters");
827   verbose_mode_ = options.PullBoolValue("--verbose");
828 
829   CHECK(options.values.empty());
830 
831   bool check_event_type = true;
832   if (!app_package_name_.empty() && !in_app_context_ && !IsRoot()) {
833     // Defer event type checking when RunInAppContext() is called.
834     check_event_type = false;
835   }
836 
837   // Process ordered options.
838   for (const auto& pair : ordered_options) {
839     const OptionName& name = pair.first;
840     const OptionValue& value = pair.second;
841 
842     if (name == "--cpu") {
843       if (auto v = GetCpusFromString(value.str_value); v) {
844         std::set<int>& cpus = v.value();
845         event_selection_set_.SetCpusForNewEvents(std::vector<int>(cpus.begin(), cpus.end()));
846       } else {
847         return false;
848       }
849     } else if (name == "-e") {
850       for (const auto& event_type : Split(value.str_value, ",")) {
851         if (!probe_events.CreateProbeEventIfNotExist(event_type)) {
852           return false;
853         }
854         if (!event_selection_set_.AddEventType(event_type, check_event_type)) {
855           return false;
856         }
857       }
858     } else if (name == "--group") {
859       std::vector<std::string> event_types = Split(value.str_value, ",");
860       for (const auto& event_type : event_types) {
861         if (!probe_events.CreateProbeEventIfNotExist(event_type)) {
862           return false;
863         }
864       }
865       if (!event_selection_set_.AddEventGroup(event_types, check_event_type)) {
866         return false;
867       }
868     } else if (name == "--tp-filter") {
869       if (!event_selection_set_.SetTracepointFilter(value.str_value)) {
870         return false;
871       }
872     } else {
873       LOG(ERROR) << "unprocessed option: " << name;
874       return false;
875     }
876   }
877 
878   if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
879     LOG(ERROR) << "Stat system wide and existing processes/threads can't be "
880                   "used at the same time.";
881     return false;
882   }
883   if (system_wide_collection_ && !IsRoot()) {
884     LOG(ERROR) << "System wide profiling needs root privilege.";
885     return false;
886   }
887   if (monitor_new_thread_) {
888     if (!report_per_thread_ || child_inherit_) {
889       LOG(ERROR) << "--monitor-new-thread should be used with --per-thread and --no-inherit";
890       return false;
891     }
892   }
893 
894   if (report_per_core_ || report_per_thread_) {
895     summary_comparator_ = BuildSummaryComparator(sort_keys_, report_per_thread_, report_per_core_);
896     if (!summary_comparator_) {
897       return false;
898     }
899   }
900   return true;
901 }
902 
CheckHardwareCountersOnCpu(int cpu,size_t counters)903 std::optional<bool> CheckHardwareCountersOnCpu(int cpu, size_t counters) {
904   if (counters == 0) {
905     return true;
906   }
907   const EventType* event = FindEventTypeByName("cpu-cycles", true);
908   if (event == nullptr) {
909     return std::nullopt;
910   }
911   perf_event_attr attr = CreateDefaultPerfEventAttr(*event);
912   attr.exclude_kernel = true;
913   auto workload = Workload::CreateWorkload({"sleep", "0.1"});
914   if (!workload || !workload->SetCpuAffinity(cpu)) {
915     return std::nullopt;
916   }
917   std::vector<std::unique_ptr<EventFd>> event_fds;
918   for (size_t i = 0; i < counters; i++) {
919     EventFd* group_event_fd = event_fds.empty() ? nullptr : event_fds[0].get();
920     auto event_fd =
921         EventFd::OpenEventFile(attr, workload->GetPid(), cpu, group_event_fd, "cpu-cycles", false);
922     if (!event_fd) {
923       return false;
924     }
925     event_fds.emplace_back(std::move(event_fd));
926   }
927   if (!workload->Start() || !workload->WaitChildProcess(true, nullptr)) {
928     return std::nullopt;
929   }
930   for (auto& event_fd : event_fds) {
931     PerfCounter counter;
932     if (!event_fd->ReadCounter(&counter)) {
933       return std::nullopt;
934     }
935     if (counter.time_enabled == 0 || counter.time_enabled > counter.time_running) {
936       return false;
937     }
938   }
939   return true;
940 }
941 
GetHardwareCountersOnCpu(int cpu)942 std::optional<size_t> GetHardwareCountersOnCpu(int cpu) {
943   size_t available_counters = 0;
944   while (true) {
945     std::optional<bool> result = CheckHardwareCountersOnCpu(cpu, available_counters + 1);
946     if (!result.has_value()) {
947       return std::nullopt;
948     }
949     if (!result.value()) {
950       break;
951     }
952     available_counters++;
953   }
954   return available_counters;
955 }
956 
PrintHardwareCounters()957 void StatCommand::PrintHardwareCounters() {
958   for (int cpu : GetOnlineCpus()) {
959     std::optional<size_t> counters = GetHardwareCountersOnCpu(cpu);
960     if (!counters) {
961       // When built as a 32-bit program, we can't set sched_affinity to a 64-bit only CPU. So we
962       // may not be able to get hardware counters on that CPU.
963       LOG(WARNING) << "Failed to get CPU PMU hardware counters on cpu " << cpu;
964       continue;
965     }
966     printf("There are %zu CPU PMU hardware counters available on cpu %d.\n", counters.value(), cpu);
967   }
968 }
969 
AddDefaultMeasuredEventTypes()970 bool StatCommand::AddDefaultMeasuredEventTypes() {
971   for (std::string name : default_measured_event_types) {
972     // It is not an error when some event types in the default list are not
973     // supported by the kernel.
974     const EventType* type = FindEventTypeByName(name);
975     if (type == nullptr) {
976       continue;
977     }
978     perf_event_attr attr = CreateDefaultPerfEventAttr(*type);
979     if (!IsKernelEventSupported()) {
980       attr.exclude_kernel = true;
981       if (name == "cpu-clock" || name == "task-clock") {
982         continue;
983       }
984       name += ":u";
985     }
986     if (IsEventAttrSupported(attr, name)) {
987       if (!event_selection_set_.AddEventType(name)) {
988         return false;
989       }
990     }
991   }
992   if (event_selection_set_.empty()) {
993     LOG(ERROR) << "Failed to add any supported default measured types";
994     return false;
995   }
996   return true;
997 }
998 
SetEventSelectionFlags()999 void StatCommand::SetEventSelectionFlags() {
1000   event_selection_set_.SetInherit(child_inherit_);
1001 }
1002 
MonitorEachThread(std::unique_ptr<Workload> & workload)1003 void StatCommand::MonitorEachThread(std::unique_ptr<Workload>& workload) {
1004   std::vector<pid_t> threads;
1005   for (auto pid : event_selection_set_.GetMonitoredProcesses()) {
1006     for (auto tid : GetThreadsInProcess(pid)) {
1007       ThreadInfo info;
1008       if (GetThreadName(tid, &info.name)) {
1009         if (tid == pid && workload && workload->GetPid() == pid) {
1010           info.name = workload->GetCommandName();
1011         }
1012         info.tid = tid;
1013         info.pid = pid;
1014         thread_info_[tid] = std::move(info);
1015         threads.push_back(tid);
1016       }
1017     }
1018   }
1019   for (auto tid : event_selection_set_.GetMonitoredThreads()) {
1020     ThreadInfo info;
1021     if (ReadThreadNameAndPid(tid, &info.name, &info.pid)) {
1022       info.tid = tid;
1023       thread_info_[tid] = std::move(info);
1024       threads.push_back(tid);
1025     }
1026   }
1027   event_selection_set_.ClearMonitoredTargets();
1028   event_selection_set_.AddMonitoredThreads(threads);
1029 }
1030 
AdjustToIntervalOnlyValues(std::vector<CountersInfo> & counters)1031 void StatCommand::AdjustToIntervalOnlyValues(std::vector<CountersInfo>& counters) {
1032   if (last_sum_values_.size() < counters.size()) {
1033     last_sum_values_.resize(counters.size());
1034   }
1035   for (size_t i = 0; i < counters.size(); i++) {
1036     std::vector<CounterInfo>& counters_per_event = counters[i].counters;
1037     std::vector<CounterSum>& last_sum = last_sum_values_[i];
1038 
1039     if (last_sum.size() < counters_per_event.size()) {
1040       last_sum.resize(counters_per_event.size());
1041     }
1042     for (size_t j = 0; j < counters_per_event.size(); j++) {
1043       PerfCounter& counter = counters_per_event[j].counter;
1044       CounterSum new_sum;
1045       new_sum.FromCounter(counter);
1046       CounterSum delta = new_sum - last_sum[j];
1047       delta.ToCounter(counter);
1048       last_sum[j] = new_sum;
1049     }
1050   }
1051 }
1052 
ShowCounters(const std::vector<CountersInfo> & counters,double duration_in_sec,FILE * fp)1053 bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters, double duration_in_sec,
1054                                FILE* fp) {
1055   if (csv_) {
1056     fprintf(fp, "Performance counter statistics,\n");
1057   } else {
1058     fprintf(fp, "Performance counter statistics:\n\n");
1059   }
1060 
1061   if (verbose_mode_) {
1062     for (auto& counters_info : counters) {
1063       for (auto& counter_info : counters_info.counters) {
1064         if (csv_) {
1065           fprintf(fp,
1066                   "%s,tid,%d,cpu,%d,count,%" PRIu64 ",time_enabled,%" PRIu64
1067                   ",time running,%" PRIu64 ",id,%" PRIu64 ",\n",
1068                   counters_info.event_name.c_str(), counter_info.tid, counter_info.cpu,
1069                   counter_info.counter.value, counter_info.counter.time_enabled,
1070                   counter_info.counter.time_running, counter_info.counter.id);
1071         } else {
1072           fprintf(fp,
1073                   "%s(tid %d, cpu %d): count %" PRIu64 ", time_enabled %" PRIu64
1074                   ", time running %" PRIu64 ", id %" PRIu64 "\n",
1075                   counters_info.event_name.c_str(), counter_info.tid, counter_info.cpu,
1076                   counter_info.counter.value, counter_info.counter.time_enabled,
1077                   counter_info.counter.time_running, counter_info.counter.id);
1078         }
1079       }
1080     }
1081   }
1082 
1083   CounterSummaryBuilder builder(report_per_thread_, report_per_core_, csv_, thread_info_,
1084                                 summary_comparator_);
1085   for (const auto& info : counters) {
1086     builder.AddCountersForOneEventType(info);
1087   }
1088   CounterSummaries summaries(builder.Build(), csv_);
1089   summaries.AutoGenerateSummaries();
1090   summaries.GenerateComments(duration_in_sec);
1091   summaries.Show(fp);
1092 
1093   if (csv_) {
1094     fprintf(fp, "Total test time,%lf,seconds,\n", duration_in_sec);
1095   } else {
1096     fprintf(fp, "\nTotal test time: %lf seconds.\n", duration_in_sec);
1097   }
1098   return true;
1099 }
1100 
CheckHardwareCounterMultiplexing()1101 void StatCommand::CheckHardwareCounterMultiplexing() {
1102   for (const auto& [cpu, hardware_events] : event_selection_set_.GetHardwareCountersForCpus()) {
1103     std::optional<bool> result = CheckHardwareCountersOnCpu(cpu, hardware_events);
1104     if (result.has_value() && !result.value()) {
1105       LOG(WARNING) << "It seems the number of hardware events are more than the number of\n"
1106                    << "available CPU PMU hardware counters. That will trigger hardware counter\n"
1107                    << "multiplexing. As a result, events are not counted all the time processes\n"
1108                    << "running, and event counts are smaller than what really happen.\n"
1109                    << "Use --print-hw-counter to show available hardware counters.\n"
1110 #if defined(__ANDROID__)
1111                    << "If on a rooted device, try --use-devfreq-counters to get more counters.\n"
1112 #endif
1113           ;
1114       break;
1115     }
1116   }
1117 }
1118 
PrintWarningForInaccurateEvents()1119 void StatCommand::PrintWarningForInaccurateEvents() {
1120   for (const EventType* event : event_selection_set_.GetEvents()) {
1121     if (event->name == "raw-l3d-cache-lmiss-rd") {
1122       LOG(WARNING) << "PMU event L3D_CACHE_LMISS_RD might undercount on A510. Please use "
1123                       "L3D_CACHE_REFILL_RD instead.";
1124       break;
1125     }
1126   }
1127 }
1128 
1129 }  // namespace
1130 
RegisterStatCommand()1131 void RegisterStatCommand() {
1132   RegisterCommand("stat", [] { return std::unique_ptr<Command>(new StatCommand); });
1133 }
1134 
1135 }  // namespace simpleperf
1136