• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <inttypes.h>
18 #include <signal.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <sys/prctl.h>
22 
23 #include <algorithm>
24 #include <chrono>
25 #include <optional>
26 #include <set>
27 #include <string>
28 #include <string_view>
29 #include <vector>
30 
31 #include <android-base/file.h>
32 #include <android-base/logging.h>
33 #include <android-base/strings.h>
34 #include <android-base/unique_fd.h>
35 
36 #include "IOEventLoop.h"
37 #include "cmd_stat_impl.h"
38 #include "command.h"
39 #include "environment.h"
40 #include "event_attr.h"
41 #include "event_fd.h"
42 #include "event_selection_set.h"
43 #include "event_type.h"
44 #include "utils.h"
45 #include "workload.h"
46 
47 namespace simpleperf {
48 
49 using android::base::Split;
50 
51 static std::vector<std::string> default_measured_event_types{
52     "cpu-cycles",   "stalled-cycles-frontend", "stalled-cycles-backend",
53     "instructions", "branch-instructions",     "branch-misses",
54     "task-clock",   "context-switches",        "page-faults",
55 };
56 
57 static const std::unordered_map<std::string_view, std::pair<std::string_view, std::string_view>>
58     COMMON_EVENT_RATE_MAP = {
59         {"cache-misses", {"cache-references", "miss rate"}},
60         {"branch-misses", {"branch-instructions", "miss rate"}},
61 };
62 
63 static const std::unordered_map<std::string_view, std::pair<std::string_view, std::string_view>>
64     ARM_EVENT_RATE_MAP = {
65         // Refer to "D6.10.5 Meaningful ratios between common microarchitectural events" in ARMv8
66         // specification.
67         {"raw-l1i-cache-refill", {"raw-l1i-cache", "level 1 instruction cache refill rate"}},
68         {"raw-l1i-tlb-refill", {"raw-l1i-tlb", "level 1 instruction TLB refill rate"}},
69         {"raw-l1d-cache-refill", {"raw-l1d-cache", "level 1 data or unified cache refill rate"}},
70         {"raw-l1d-tlb-refill", {"raw-l1d-tlb", "level 1 data or unified TLB refill rate"}},
71         {"raw-l2d-cache-refill", {"raw-l2d-cache", "level 2 data or unified cache refill rate"}},
72         {"raw-l2i-cache-refill", {"raw-l2i-cache", "level 2 instruction cache refill rate"}},
73         {"raw-l3d-cache-refill", {"raw-l3d-cache", "level 3 data or unified cache refill rate"}},
74         {"raw-l2d-tlb-refill", {"raw-l2d-tlb", "level 2 data or unified TLB refill rate"}},
75         {"raw-l2i-tlb-refill", {"raw-l2i-tlb", "level 2 instruction TLB refill rate"}},
76         {"raw-bus-access", {"raw-bus-cycles", "bus accesses per cycle"}},
77         {"raw-ll-cache-miss", {"raw-ll-cache", "last level data or unified cache refill rate"}},
78         {"raw-dtlb-walk", {"raw-l1d-tlb", "data TLB miss rate"}},
79         {"raw-itlb-walk", {"raw-l1i-tlb", "instruction TLB miss rate"}},
80         {"raw-ll-cache-miss-rd", {"raw-ll-cache-rd", "memory read operation miss rate"}},
81         {"raw-remote-access-rd",
82          {"raw-remote-access", "read accesses to another socket in a multi-socket system"}},
83         // Refer to "Table K3-2 Relationship between REFILL events and associated access events" in
84         // ARMv8 specification.
85         {"raw-l1d-cache-refill-rd", {"raw-l1d-cache-rd", "level 1 cache refill rate, read"}},
86         {"raw-l1d-cache-refill-wr", {"raw-l1d-cache-wr", "level 1 cache refill rate, write"}},
87         {"raw-l1d-tlb-refill-rd", {"raw-l1d-tlb-rd", "level 1 TLB refill rate, read"}},
88         {"raw-l1d-tlb-refill-wr", {"raw-l1d-tlb-wr", "level 1 TLB refill rate, write"}},
89         {"raw-l2d-cache-refill-rd", {"raw-l2d-cache-rd", "level 2 data cache refill rate, read"}},
90         {"raw-l2d-cache-refill-wr", {"raw-l2d-cache-wr", "level 2 data cache refill rate, write"}},
91         {"raw-l2d-tlb-refill-rd", {"raw-l2d-tlb-rd", "level 2 data TLB refill rate, read"}},
92 };
93 
FindSummary(const std::string & type_name,const std::string & modifier,const ThreadInfo * thread,int cpu)94 const CounterSummary* CounterSummaries::FindSummary(const std::string& type_name,
95                                                     const std::string& modifier,
96                                                     const ThreadInfo* thread, int cpu) {
97   for (const auto& s : summaries_) {
98     if (s.type_name == type_name && s.modifier == modifier && s.thread == thread && s.cpu == cpu) {
99       return &s;
100     }
101   }
102   return nullptr;
103 }
104 
AutoGenerateSummaries()105 void CounterSummaries::AutoGenerateSummaries() {
106   for (size_t i = 0; i < summaries_.size(); ++i) {
107     const CounterSummary& s = summaries_[i];
108     if (s.modifier == "u") {
109       const CounterSummary* other = FindSummary(s.type_name, "k", s.thread, s.cpu);
110       if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
111         if (FindSummary(s.type_name, "", s.thread, s.cpu) == nullptr) {
112           summaries_.emplace_back(s.type_name, "", s.group_id, s.thread, s.cpu,
113                                   s.count + other->count, s.runtime_in_ns, s.scale, true, csv_);
114         }
115       }
116     }
117   }
118 }
119 
GenerateComments(double duration_in_sec)120 void CounterSummaries::GenerateComments(double duration_in_sec) {
121   for (auto& s : summaries_) {
122     s.comment = GetCommentForSummary(s, duration_in_sec);
123   }
124 }
125 
Show(FILE * fp)126 void CounterSummaries::Show(FILE* fp) {
127   bool show_thread = !summaries_.empty() && summaries_[0].thread != nullptr;
128   bool show_cpu = !summaries_.empty() && summaries_[0].cpu != -1;
129   if (csv_) {
130     ShowCSV(fp, show_thread, show_cpu);
131   } else {
132     ShowText(fp, show_thread, show_cpu);
133   }
134 }
135 
ShowCSV(FILE * fp,bool show_thread,bool show_cpu)136 void CounterSummaries::ShowCSV(FILE* fp, bool show_thread, bool show_cpu) {
137   for (auto& s : summaries_) {
138     if (show_thread) {
139       fprintf(fp, "%s,%d,%d,", s.thread->name.c_str(), s.thread->pid, s.thread->tid);
140     }
141     if (show_cpu) {
142       fprintf(fp, "%d,", s.cpu);
143     }
144     fprintf(fp, "%s,%s,%s,%s\n", s.readable_count.c_str(), s.Name().c_str(), s.comment.c_str(),
145             (s.auto_generated ? "(generated)," : ""));
146   }
147 }
148 
ShowText(FILE * fp,bool show_thread,bool show_cpu)149 void CounterSummaries::ShowText(FILE* fp, bool show_thread, bool show_cpu) {
150   std::vector<std::string> titles;
151 
152   if (show_thread) {
153     titles = {"thread_name", "pid", "tid"};
154   }
155   if (show_cpu) {
156     titles.emplace_back("cpu");
157   }
158   titles.emplace_back("count");
159   titles.emplace_back("event_name");
160   titles.emplace_back(" # count / runtime");
161 
162   std::vector<size_t> width(titles.size(), 0);
163 
164   auto adjust_width = [](size_t& w, size_t size) { w = std::max(w, size); };
165 
166   // The last title is too long. Don't include it for width adjustment.
167   for (size_t i = 0; i + 1 < titles.size(); i++) {
168     adjust_width(width[i], titles[i].size());
169   }
170 
171   for (auto& s : summaries_) {
172     size_t i = 0;
173     if (show_thread) {
174       adjust_width(width[i++], s.thread->name.size());
175       adjust_width(width[i++], std::to_string(s.thread->pid).size());
176       adjust_width(width[i++], std::to_string(s.thread->tid).size());
177     }
178     if (show_cpu) {
179       adjust_width(width[i++], std::to_string(s.cpu).size());
180     }
181     adjust_width(width[i++], s.readable_count.size());
182     adjust_width(width[i++], s.Name().size());
183     adjust_width(width[i++], s.comment.size());
184   }
185 
186   fprintf(fp, "# ");
187   for (size_t i = 0; i < titles.size(); i++) {
188     if (titles[i] == "count") {
189       fprintf(fp, "%*s", static_cast<int>(width[i]), titles[i].c_str());
190     } else {
191       fprintf(fp, "%-*s", static_cast<int>(width[i]), titles[i].c_str());
192     }
193     if (i + 1 < titles.size()) {
194       fprintf(fp, "  ");
195     }
196   }
197   fprintf(fp, "\n");
198 
199   for (auto& s : summaries_) {
200     size_t i = 0;
201     if (show_thread) {
202       fprintf(fp, "  %-*s", static_cast<int>(width[i++]), s.thread->name.c_str());
203       fprintf(fp, "  %-*d", static_cast<int>(width[i++]), s.thread->pid);
204       fprintf(fp, "  %-*d", static_cast<int>(width[i++]), s.thread->tid);
205     }
206     if (show_cpu) {
207       fprintf(fp, "  %-*d", static_cast<int>(width[i++]), s.cpu);
208     }
209     fprintf(fp, "  %*s  %-*s   # %-*s%s\n", static_cast<int>(width[i]), s.readable_count.c_str(),
210             static_cast<int>(width[i + 1]), s.Name().c_str(), static_cast<int>(width[i + 2]),
211             s.comment.c_str(), (s.auto_generated ? " (generated)" : ""));
212   }
213 }
214 
GetCommentForSummary(const CounterSummary & s,double duration_in_sec)215 std::string CounterSummaries::GetCommentForSummary(const CounterSummary& s,
216                                                    double duration_in_sec) {
217   char sap_mid;
218   if (csv_) {
219     sap_mid = ',';
220   } else {
221     sap_mid = ' ';
222   }
223   if (s.type_name == "task-clock") {
224     double run_sec = s.count / 1e9;
225     double used_cpus = run_sec / duration_in_sec;
226     return android::base::StringPrintf("%f%ccpus used", used_cpus, sap_mid);
227   }
228   if (s.type_name == "cpu-clock") {
229     return "";
230   }
231   if (s.type_name == "cpu-cycles") {
232     if (s.runtime_in_ns == 0) {
233       return "";
234     }
235     double ghz = static_cast<double>(s.count) / s.runtime_in_ns;
236     return android::base::StringPrintf("%f%cGHz", ghz, sap_mid);
237   }
238   if (s.type_name == "instructions" && s.count != 0) {
239     const CounterSummary* other = FindSummary("cpu-cycles", s.modifier, s.thread, s.cpu);
240     if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
241       double cpi = static_cast<double>(other->count) / s.count;
242       return android::base::StringPrintf("%f%ccycles per instruction", cpi, sap_mid);
243     }
244   }
245   std::string rate_comment = GetRateComment(s, sap_mid);
246   if (!rate_comment.empty()) {
247     return rate_comment;
248   }
249   if (s.runtime_in_ns == 0) {
250     return "";
251   }
252   double runtime_in_sec = static_cast<double>(s.runtime_in_ns) / 1e9;
253   double rate = s.count / runtime_in_sec;
254   if (rate >= 1e9 - 1e5) {
255     return android::base::StringPrintf("%.3f%cG/sec", rate / 1e9, sap_mid);
256   }
257   if (rate >= 1e6 - 1e2) {
258     return android::base::StringPrintf("%.3f%cM/sec", rate / 1e6, sap_mid);
259   }
260   if (rate >= 1e3) {
261     return android::base::StringPrintf("%.3f%cK/sec", rate / 1e3, sap_mid);
262   }
263   return android::base::StringPrintf("%.3f%c/sec", rate, sap_mid);
264 }
265 
GetRateComment(const CounterSummary & s,char sep)266 std::string CounterSummaries::GetRateComment(const CounterSummary& s, char sep) {
267   std::string_view miss_event_name = s.type_name;
268   std::string event_name;
269   std::string rate_desc;
270   if (auto it = COMMON_EVENT_RATE_MAP.find(miss_event_name); it != COMMON_EVENT_RATE_MAP.end()) {
271     event_name = it->second.first;
272     rate_desc = it->second.second;
273   }
274   if (event_name.empty() && (GetTargetArch() == ARCH_ARM || GetTargetArch() == ARCH_ARM64)) {
275     if (auto it = ARM_EVENT_RATE_MAP.find(miss_event_name); it != ARM_EVENT_RATE_MAP.end()) {
276       event_name = it->second.first;
277       rate_desc = it->second.second;
278     }
279   }
280   if (event_name.empty() && android::base::ConsumeSuffix(&miss_event_name, "-misses")) {
281     event_name = std::string(miss_event_name) + "s";
282     rate_desc = "miss rate";
283   }
284   if (!event_name.empty()) {
285     const CounterSummary* other = FindSummary(event_name, s.modifier, s.thread, s.cpu);
286     if (other != nullptr && other->IsMonitoredAtTheSameTime(s) && other->count != 0) {
287       double miss_rate = static_cast<double>(s.count) / other->count;
288       return android::base::StringPrintf("%f%%%c%s", miss_rate * 100, sep, rate_desc.c_str());
289     }
290   }
291   return "";
292 }
293 
294 namespace {
295 
296 // devfreq may use performance counters to calculate memory latency (as in
297 // drivers/devfreq/arm-memlat-mon.c). Hopefully we can get more available counters by asking devfreq
298 // to not use the memory latency governor temporarily.
299 class DevfreqCounters {
300  public:
Use()301   bool Use() {
302     if (!IsRoot()) {
303       LOG(ERROR) << "--use-devfreq-counters needs root permission to set devfreq governors";
304       return false;
305     }
306     std::string devfreq_dir = "/sys/class/devfreq/";
307     for (auto& name : GetSubDirs(devfreq_dir)) {
308       std::string governor_path = devfreq_dir + name + "/governor";
309       if (IsRegularFile(governor_path)) {
310         std::string governor;
311         if (!android::base::ReadFileToString(governor_path, &governor)) {
312           LOG(ERROR) << "failed to read " << governor_path;
313           return false;
314         }
315         governor = android::base::Trim(governor);
316         if (governor == "mem_latency") {
317           if (!android::base::WriteStringToFile("performance", governor_path)) {
318             PLOG(ERROR) << "failed to write " << governor_path;
319             return false;
320           }
321           mem_latency_governor_paths_.emplace_back(std::move(governor_path));
322         }
323       }
324     }
325     return true;
326   }
327 
~DevfreqCounters()328   ~DevfreqCounters() {
329     for (auto& path : mem_latency_governor_paths_) {
330       android::base::WriteStringToFile("mem_latency", path);
331     }
332   }
333 
334  private:
335   std::vector<std::string> mem_latency_governor_paths_;
336 };
337 
338 class StatCommand : public Command {
339  public:
StatCommand()340   StatCommand()
341       : Command(
342             "stat", "gather performance counter information",
343             // clang-format off
344 "Usage: simpleperf stat [options] [command [command-args]]\n"
345 "       Gather performance counter information of running [command].\n"
346 "       And -a/-p/-t option can be used to change target of counter information.\n"
347 "-a           Collect system-wide information.\n"
348 #if defined(__ANDROID__)
349 "--app package_name    Profile the process of an Android application.\n"
350 "                      On non-rooted devices, the app must be debuggable,\n"
351 "                      because we use run-as to switch to the app's context.\n"
352 #endif
353 "--cpu cpu_item1,cpu_item2,...\n"
354 "                 Collect information only on the selected cpus. cpu_item can\n"
355 "                 be a cpu number like 1, or a cpu range like 0-3.\n"
356 "--csv            Write report in comma separate form.\n"
357 "--duration time_in_sec  Monitor for time_in_sec seconds instead of running\n"
358 "                        [command]. Here time_in_sec may be any positive\n"
359 "                        floating point number.\n"
360 "--interval time_in_ms   Print stat for every time_in_ms milliseconds.\n"
361 "                        Here time_in_ms may be any positive floating point\n"
362 "                        number. Simpleperf prints total values from the\n"
363 "                        starting point. But this can be changed by\n"
364 "                        --interval-only-values.\n"
365 "--interval-only-values  Print numbers of events happened in each interval.\n"
366 "-e event1[:modifier1],event2[:modifier2],...\n"
367 "                 Select a list of events to count. An event can be:\n"
368 "                   1) an event name listed in `simpleperf list`;\n"
369 "                   2) a raw PMU event in rN format. N is a hex number.\n"
370 "                      For example, r1b selects event number 0x1b.\n"
371 "                 Modifiers can be added to define how the event should be\n"
372 "                 monitored. Possible modifiers are:\n"
373 "                   u - monitor user space events only\n"
374 "                   k - monitor kernel space events only\n"
375 "--group event1[:modifier],event2[:modifier2],...\n"
376 "             Similar to -e option. But events specified in the same --group\n"
377 "             option are monitored as a group, and scheduled in and out at the\n"
378 "             same time.\n"
379 "--no-inherit     Don't stat created child threads/processes.\n"
380 "-o output_filename  Write report to output_filename instead of standard output.\n"
381 "--per-core       Print counters for each cpu core.\n"
382 "--per-thread     Print counters for each thread.\n"
383 "-p pid1,pid2,... Stat events on existing processes. Mutually exclusive with -a.\n"
384 "-t tid1,tid2,... Stat events on existing threads. Mutually exclusive with -a.\n"
385 "--print-hw-counter    Test and print CPU PMU hardware counters available on the device.\n"
386 "--sort key1,key2,...  Select keys used to sort the report, used when --per-thread\n"
387 "                      or --per-core appears. The appearance order of keys decides\n"
388 "                      the order of keys used to sort the report.\n"
389 "                      Possible keys include:\n"
390 "                        count             -- event count for each entry\n"
391 "                        count_per_thread  -- event count for a thread on all cpus\n"
392 "                        cpu               -- cpu id\n"
393 "                        pid               -- process id\n"
394 "                        tid               -- thread id\n"
395 "                        comm              -- thread name\n"
396 "                      The default sort keys are:\n"
397 "                        count_per_thread,tid,cpu,count\n"
398 #if defined(__ANDROID__)
399 "--use-devfreq-counters    On devices with Qualcomm SOCs, some hardware counters may be used\n"
400 "                          to monitor memory latency (in drivers/devfreq/arm-memlat-mon.c),\n"
401 "                          making fewer counters available to users. This option asks devfreq\n"
402 "                          to temporarily release counters by replacing memory-latency governor\n"
403 "                          with performance governor. It affects memory latency during profiling,\n"
404 "                          and may cause wedged power if simpleperf is killed in between.\n"
405 #endif
406 "--verbose        Show result in verbose mode.\n"
407 #if 0
408 // Below options are only used internally and shouldn't be visible to the public.
409 "--in-app         We are already running in the app's context.\n"
410 "--tracepoint-events file_name   Read tracepoint events from [file_name] instead of tracefs.\n"
411 "--out-fd <fd>    Write output to a file descriptor.\n"
412 "--stop-signal-fd <fd>   Stop stating when fd is readable.\n"
413 #endif
414             // clang-format on
415             ),
416         verbose_mode_(false),
417         system_wide_collection_(false),
418         child_inherit_(true),
419         duration_in_sec_(0),
420         interval_in_ms_(0),
421         interval_only_values_(false),
422         event_selection_set_(true),
423         csv_(false),
424         in_app_context_(false) {
425     // Die if parent exits.
426     prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
427     // Set default sort keys. Full key list is in BuildSummaryComparator().
428     sort_keys_ = {"count_per_thread", "tid", "cpu", "count"};
429   }
430 
431   bool Run(const std::vector<std::string>& args);
432 
433  private:
434   bool ParseOptions(const std::vector<std::string>& args,
435                     std::vector<std::string>* non_option_args);
436   void PrintHardwareCounters();
437   bool AddDefaultMeasuredEventTypes();
438   void SetEventSelectionFlags();
439   void MonitorEachThread();
440   void AdjustToIntervalOnlyValues(std::vector<CountersInfo>& counters);
441   bool ShowCounters(const std::vector<CountersInfo>& counters, double duration_in_sec, FILE* fp);
442   void CheckHardwareCounterMultiplexing();
443 
444   bool verbose_mode_;
445   bool system_wide_collection_;
446   bool child_inherit_;
447   double duration_in_sec_;
448   double interval_in_ms_;
449   bool interval_only_values_;
450   std::vector<std::vector<CounterSum>> last_sum_values_;
451   std::vector<int> cpus_;
452   EventSelectionSet event_selection_set_;
453   std::string output_filename_;
454   android::base::unique_fd out_fd_;
455   bool csv_;
456   std::string app_package_name_;
457   bool in_app_context_;
458   android::base::unique_fd stop_signal_fd_;
459   bool use_devfreq_counters_ = false;
460 
461   bool report_per_core_ = false;
462   bool report_per_thread_ = false;
463   // used to report event count for each thread
464   std::unordered_map<pid_t, ThreadInfo> thread_info_;
465   // used to sort report
466   std::vector<std::string> sort_keys_;
467   std::optional<SummaryComparator> summary_comparator_;
468   bool print_hw_counter_ = false;
469 };
470 
Run(const std::vector<std::string> & args)471 bool StatCommand::Run(const std::vector<std::string>& args) {
472   if (!CheckPerfEventLimit()) {
473     return false;
474   }
475   AllowMoreOpenedFiles();
476 
477   // 1. Parse options, and use default measured event types if not given.
478   std::vector<std::string> workload_args;
479   if (!ParseOptions(args, &workload_args)) {
480     return false;
481   }
482   if (print_hw_counter_) {
483     PrintHardwareCounters();
484     return true;
485   }
486   if (!app_package_name_.empty() && !in_app_context_) {
487     if (!IsRoot()) {
488       return RunInAppContext(app_package_name_, "stat", args, workload_args.size(),
489                              output_filename_, !event_selection_set_.GetTracepointEvents().empty());
490     }
491   }
492   DevfreqCounters devfreq_counters;
493   if (use_devfreq_counters_) {
494     if (!devfreq_counters.Use()) {
495       return false;
496     }
497   }
498   if (event_selection_set_.empty()) {
499     if (!AddDefaultMeasuredEventTypes()) {
500       return false;
501     }
502   }
503   SetEventSelectionFlags();
504 
505   // 2. Create workload.
506   std::unique_ptr<Workload> workload;
507   if (!workload_args.empty()) {
508     workload = Workload::CreateWorkload(workload_args);
509     if (workload == nullptr) {
510       return false;
511     }
512   }
513   bool need_to_check_targets = false;
514   if (system_wide_collection_) {
515     if (report_per_thread_) {
516       event_selection_set_.AddMonitoredProcesses(GetAllProcesses());
517     } else {
518       event_selection_set_.AddMonitoredThreads({-1});
519     }
520   } else if (!event_selection_set_.HasMonitoredTarget()) {
521     if (workload != nullptr) {
522       event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
523       event_selection_set_.SetEnableOnExec(true);
524     } else if (!app_package_name_.empty()) {
525       std::set<pid_t> pids = WaitForAppProcesses(app_package_name_);
526       event_selection_set_.AddMonitoredProcesses(pids);
527     } else {
528       LOG(ERROR) << "No threads to monitor. Try `simpleperf help stat` for help\n";
529       return false;
530     }
531   } else {
532     need_to_check_targets = true;
533   }
534 
535   if (report_per_thread_) {
536     MonitorEachThread();
537   }
538 
539   // 3. Open perf_event_files and output file if defined.
540   if (!event_selection_set_.OpenEventFiles(cpus_)) {
541     return false;
542   }
543   std::unique_ptr<FILE, decltype(&fclose)> fp_holder(nullptr, fclose);
544   if (!output_filename_.empty()) {
545     fp_holder.reset(fopen(output_filename_.c_str(), "we"));
546     if (fp_holder == nullptr) {
547       PLOG(ERROR) << "failed to open " << output_filename_;
548       return false;
549     }
550   } else if (out_fd_ != -1) {
551     fp_holder.reset(fdopen(out_fd_.release(), "we"));
552     if (fp_holder == nullptr) {
553       PLOG(ERROR) << "failed to write output.";
554       return false;
555     }
556   }
557   FILE* fp = fp_holder ? fp_holder.get() : stdout;
558 
559   // 4. Add signal/periodic Events.
560   IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
561   if (interval_in_ms_ != 0) {
562     if (!loop->UsePreciseTimer()) {
563       return false;
564     }
565   }
566   std::chrono::time_point<std::chrono::steady_clock> start_time;
567   std::vector<CountersInfo> counters;
568   if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
569     return false;
570   }
571   auto exit_loop_callback = [loop]() { return loop->ExitLoop(); };
572   if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP}, exit_loop_callback)) {
573     return false;
574   }
575   if (stop_signal_fd_ != -1) {
576     if (!loop->AddReadEvent(stop_signal_fd_, exit_loop_callback)) {
577       return false;
578     }
579   }
580   if (duration_in_sec_ != 0) {
581     if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_), exit_loop_callback)) {
582       return false;
583     }
584   }
585   auto print_counters = [&]() {
586     auto end_time = std::chrono::steady_clock::now();
587     if (!event_selection_set_.ReadCounters(&counters)) {
588       return false;
589     }
590     double duration_in_sec =
591         std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time).count();
592     if (interval_only_values_) {
593       AdjustToIntervalOnlyValues(counters);
594     }
595     if (!ShowCounters(counters, duration_in_sec, fp)) {
596       return false;
597     }
598     return true;
599   };
600 
601   if (interval_in_ms_ != 0) {
602     if (!loop->AddPeriodicEvent(SecondToTimeval(interval_in_ms_ / 1000.0), print_counters)) {
603       return false;
604     }
605   }
606 
607   // 5. Count events while workload running.
608   start_time = std::chrono::steady_clock::now();
609   if (workload != nullptr && !workload->Start()) {
610     return false;
611   }
612   if (!loop->RunLoop()) {
613     return false;
614   }
615 
616   // 6. Read and print counters.
617   if (interval_in_ms_ == 0) {
618     if (!print_counters()) {
619       return false;
620     }
621   }
622 
623   // 7. Print hardware counter multiplexing warning when needed.
624   event_selection_set_.CloseEventFiles();
625   CheckHardwareCounterMultiplexing();
626 
627   return true;
628 }
629 
ParseOptions(const std::vector<std::string> & args,std::vector<std::string> * non_option_args)630 bool StatCommand::ParseOptions(const std::vector<std::string>& args,
631                                std::vector<std::string>* non_option_args) {
632   OptionValueMap options;
633   std::vector<std::pair<OptionName, OptionValue>> ordered_options;
634 
635   if (!PreprocessOptions(args, GetStatCmdOptionFormats(), &options, &ordered_options,
636                          non_option_args)) {
637     return false;
638   }
639 
640   // Process options.
641   system_wide_collection_ = options.PullBoolValue("-a");
642 
643   if (auto value = options.PullValue("--app"); value) {
644     app_package_name_ = *value->str_value;
645   }
646   if (auto value = options.PullValue("--cpu"); value) {
647     if (auto cpus = GetCpusFromString(*value->str_value); cpus) {
648       cpus_.assign(cpus->begin(), cpus->end());
649     } else {
650       return false;
651     }
652   }
653 
654   csv_ = options.PullBoolValue("--csv");
655 
656   if (!options.PullDoubleValue("--duration", &duration_in_sec_, 1e-9)) {
657     return false;
658   }
659   if (!options.PullDoubleValue("--interval", &interval_in_ms_, 1e-9)) {
660     return false;
661   }
662   interval_only_values_ = options.PullBoolValue("--interval-only-values");
663 
664   for (const OptionValue& value : options.PullValues("-e")) {
665     for (const auto& event_type : Split(*value.str_value, ",")) {
666       if (!event_selection_set_.AddEventType(event_type)) {
667         return false;
668       }
669     }
670   }
671 
672   for (const OptionValue& value : options.PullValues("--group")) {
673     if (!event_selection_set_.AddEventGroup(Split(*value.str_value, ","))) {
674       return false;
675     }
676   }
677 
678   in_app_context_ = options.PullBoolValue("--in-app");
679   child_inherit_ = !options.PullBoolValue("--no-inherit");
680 
681   if (auto value = options.PullValue("-o"); value) {
682     output_filename_ = *value->str_value;
683   }
684   if (auto value = options.PullValue("--out-fd"); value) {
685     out_fd_.reset(static_cast<int>(value->uint_value));
686   }
687 
688   report_per_core_ = options.PullBoolValue("--per-core");
689   report_per_thread_ = options.PullBoolValue("--per-thread");
690 
691   for (const OptionValue& value : options.PullValues("-p")) {
692     if (auto pids = GetTidsFromString(*value.str_value, true); pids) {
693       event_selection_set_.AddMonitoredProcesses(pids.value());
694     } else {
695       return false;
696     }
697   }
698   print_hw_counter_ = options.PullBoolValue("--print-hw-counter");
699 
700   if (auto value = options.PullValue("--sort"); value) {
701     sort_keys_ = Split(*value->str_value, ",");
702   }
703 
704   if (auto value = options.PullValue("--stop-signal-fd"); value) {
705     stop_signal_fd_.reset(static_cast<int>(value->uint_value));
706   }
707 
708   for (const OptionValue& value : options.PullValues("-t")) {
709     if (auto tids = GetTidsFromString(*value.str_value, true); tids) {
710       event_selection_set_.AddMonitoredThreads(tids.value());
711     } else {
712       return false;
713     }
714   }
715 
716   if (auto value = options.PullValue("--tracepoint-events"); value) {
717     if (!EventTypeManager::Instance().ReadTracepointsFromFile(*value->str_value)) {
718       return false;
719     }
720   }
721 
722   use_devfreq_counters_ = options.PullBoolValue("--use-devfreq-counters");
723   verbose_mode_ = options.PullBoolValue("--verbose");
724 
725   CHECK(options.values.empty());
726   CHECK(ordered_options.empty());
727 
728   if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
729     LOG(ERROR) << "Stat system wide and existing processes/threads can't be "
730                   "used at the same time.";
731     return false;
732   }
733   if (system_wide_collection_ && !IsRoot()) {
734     LOG(ERROR) << "System wide profiling needs root privilege.";
735     return false;
736   }
737 
738   if (report_per_core_ || report_per_thread_) {
739     summary_comparator_ = BuildSummaryComparator(sort_keys_, report_per_thread_, report_per_core_);
740     if (!summary_comparator_) {
741       return false;
742     }
743   }
744   return true;
745 }
746 
CheckHardwareCountersOnCpu(int cpu,size_t counters)747 std::optional<bool> CheckHardwareCountersOnCpu(int cpu, size_t counters) {
748   const EventType* event = FindEventTypeByName("cpu-cycles", true);
749   if (event == nullptr) {
750     return std::nullopt;
751   }
752   perf_event_attr attr = CreateDefaultPerfEventAttr(*event);
753   auto workload = Workload::CreateWorkload({"sleep", "0.1"});
754   if (!workload || !workload->SetCpuAffinity(cpu)) {
755     return std::nullopt;
756   }
757   std::vector<std::unique_ptr<EventFd>> event_fds;
758   for (size_t i = 0; i < counters; i++) {
759     EventFd* group_event_fd = event_fds.empty() ? nullptr : event_fds[0].get();
760     auto event_fd =
761         EventFd::OpenEventFile(attr, workload->GetPid(), cpu, group_event_fd, "cpu-cycles", false);
762     if (!event_fd) {
763       return false;
764     }
765     event_fds.emplace_back(std::move(event_fd));
766   }
767   if (!workload->Start() || !workload->WaitChildProcess(true, nullptr)) {
768     return std::nullopt;
769   }
770   for (auto& event_fd : event_fds) {
771     PerfCounter counter;
772     if (!event_fd->ReadCounter(&counter)) {
773       return std::nullopt;
774     }
775     if (counter.time_enabled == 0 || counter.time_enabled > counter.time_running) {
776       return false;
777     }
778   }
779   return true;
780 }
781 
GetHardwareCountersOnCpu(int cpu)782 std::optional<size_t> GetHardwareCountersOnCpu(int cpu) {
783   size_t available_counters = 0;
784   while (true) {
785     std::optional<bool> result = CheckHardwareCountersOnCpu(cpu, available_counters + 1);
786     if (!result.has_value()) {
787       return std::nullopt;
788     }
789     if (!result.value()) {
790       break;
791     }
792     available_counters++;
793   }
794   return available_counters;
795 }
796 
PrintHardwareCounters()797 void StatCommand::PrintHardwareCounters() {
798   for (int cpu : GetOnlineCpus()) {
799     std::optional<size_t> counters = GetHardwareCountersOnCpu(cpu);
800     if (!counters) {
801       // When built as a 32-bit program, we can't set sched_affinity to a 64-bit only CPU. So we
802       // may not be able to get hardware counters on that CPU.
803       LOG(WARNING) << "Failed to get CPU PMU hardware counters on cpu " << cpu;
804       continue;
805     }
806     printf("There are %zu CPU PMU hardware counters available on cpu %d.\n", counters.value(), cpu);
807   }
808 }
809 
AddDefaultMeasuredEventTypes()810 bool StatCommand::AddDefaultMeasuredEventTypes() {
811   for (auto& name : default_measured_event_types) {
812     // It is not an error when some event types in the default list are not
813     // supported by the kernel.
814     const EventType* type = FindEventTypeByName(name);
815     if (type != nullptr && IsEventAttrSupported(CreateDefaultPerfEventAttr(*type), name)) {
816       if (!event_selection_set_.AddEventType(name)) {
817         return false;
818       }
819     }
820   }
821   if (event_selection_set_.empty()) {
822     LOG(ERROR) << "Failed to add any supported default measured types";
823     return false;
824   }
825   return true;
826 }
827 
SetEventSelectionFlags()828 void StatCommand::SetEventSelectionFlags() {
829   event_selection_set_.SetInherit(child_inherit_);
830 }
831 
MonitorEachThread()832 void StatCommand::MonitorEachThread() {
833   std::vector<pid_t> threads;
834   for (auto pid : event_selection_set_.GetMonitoredProcesses()) {
835     for (auto tid : GetThreadsInProcess(pid)) {
836       ThreadInfo info;
837       if (GetThreadName(tid, &info.name)) {
838         info.tid = tid;
839         info.pid = pid;
840         thread_info_[tid] = std::move(info);
841         threads.push_back(tid);
842       }
843     }
844   }
845   for (auto tid : event_selection_set_.GetMonitoredThreads()) {
846     ThreadInfo info;
847     if (ReadThreadNameAndPid(tid, &info.name, &info.pid)) {
848       info.tid = tid;
849       thread_info_[tid] = std::move(info);
850       threads.push_back(tid);
851     }
852   }
853   event_selection_set_.ClearMonitoredTargets();
854   event_selection_set_.AddMonitoredThreads(threads);
855 }
856 
AdjustToIntervalOnlyValues(std::vector<CountersInfo> & counters)857 void StatCommand::AdjustToIntervalOnlyValues(std::vector<CountersInfo>& counters) {
858   if (last_sum_values_.size() < counters.size()) {
859     last_sum_values_.resize(counters.size());
860   }
861   for (size_t i = 0; i < counters.size(); i++) {
862     std::vector<CounterInfo>& counters_per_event = counters[i].counters;
863     std::vector<CounterSum>& last_sum = last_sum_values_[i];
864 
865     if (last_sum.size() < counters_per_event.size()) {
866       last_sum.resize(counters_per_event.size());
867     }
868     for (size_t j = 0; j < counters_per_event.size(); j++) {
869       PerfCounter& counter = counters_per_event[j].counter;
870       CounterSum new_sum;
871       new_sum.FromCounter(counter);
872       CounterSum delta = new_sum - last_sum[j];
873       delta.ToCounter(counter);
874       last_sum[j] = new_sum;
875     }
876   }
877 }
878 
ShowCounters(const std::vector<CountersInfo> & counters,double duration_in_sec,FILE * fp)879 bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters, double duration_in_sec,
880                                FILE* fp) {
881   if (csv_) {
882     fprintf(fp, "Performance counter statistics,\n");
883   } else {
884     fprintf(fp, "Performance counter statistics:\n\n");
885   }
886 
887   if (verbose_mode_) {
888     for (auto& counters_info : counters) {
889       for (auto& counter_info : counters_info.counters) {
890         if (csv_) {
891           fprintf(fp,
892                   "%s,tid,%d,cpu,%d,count,%" PRIu64 ",time_enabled,%" PRIu64
893                   ",time running,%" PRIu64 ",id,%" PRIu64 ",\n",
894                   counters_info.event_name.c_str(), counter_info.tid, counter_info.cpu,
895                   counter_info.counter.value, counter_info.counter.time_enabled,
896                   counter_info.counter.time_running, counter_info.counter.id);
897         } else {
898           fprintf(fp,
899                   "%s(tid %d, cpu %d): count %" PRIu64 ", time_enabled %" PRIu64
900                   ", time running %" PRIu64 ", id %" PRIu64 "\n",
901                   counters_info.event_name.c_str(), counter_info.tid, counter_info.cpu,
902                   counter_info.counter.value, counter_info.counter.time_enabled,
903                   counter_info.counter.time_running, counter_info.counter.id);
904         }
905       }
906     }
907   }
908 
909   CounterSummaryBuilder builder(report_per_thread_, report_per_core_, csv_, thread_info_,
910                                 summary_comparator_);
911   for (const auto& info : counters) {
912     builder.AddCountersForOneEventType(info);
913   }
914   CounterSummaries summaries(builder.Build(), csv_);
915   summaries.AutoGenerateSummaries();
916   summaries.GenerateComments(duration_in_sec);
917   summaries.Show(fp);
918 
919   if (csv_) {
920     fprintf(fp, "Total test time,%lf,seconds,\n", duration_in_sec);
921   } else {
922     fprintf(fp, "\nTotal test time: %lf seconds.\n", duration_in_sec);
923   }
924   return true;
925 }
926 
CheckHardwareCounterMultiplexing()927 void StatCommand::CheckHardwareCounterMultiplexing() {
928   size_t hardware_events = 0;
929   for (const EventType* event : event_selection_set_.GetEvents()) {
930     if (event->IsHardwareEvent()) {
931       hardware_events++;
932     }
933   }
934   if (hardware_events == 0) {
935     return;
936   }
937   std::vector<int> cpus = cpus_;
938   if (cpus.empty()) {
939     cpus = GetOnlineCpus();
940   }
941   for (int cpu : cpus) {
942     std::optional<bool> result = CheckHardwareCountersOnCpu(cpu, hardware_events);
943     if (result.has_value() && !result.value()) {
944       LOG(WARNING) << "It seems the number of hardware events are more than the number of\n"
945                    << "available CPU PMU hardware counters. That will trigger hardware counter\n"
946                    << "multiplexing. As a result, events are not counted all the time processes\n"
947                    << "running, and event counts are smaller than what really happen.\n"
948                    << "Use --print-hw-counter to show available hardware counters.\n"
949 #if defined(__ANDROID__)
950                    << "If on a rooted device, try --use-devfreq-counters to get more counters.\n"
951 #endif
952           ;
953       break;
954     }
955   }
956 }
957 
958 }  // namespace
959 
RegisterStatCommand()960 void RegisterStatCommand() {
961   RegisterCommand("stat", [] { return std::unique_ptr<Command>(new StatCommand); });
962 }
963 
964 }  // namespace simpleperf
965