1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <inttypes.h>
18 #include <signal.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <sys/prctl.h>
22
23 #include <algorithm>
24 #include <chrono>
25 #include <set>
26 #include <string>
27 #include <string_view>
28 #include <vector>
29
30 #include <android-base/file.h>
31 #include <android-base/logging.h>
32 #include <android-base/strings.h>
33 #include <android-base/unique_fd.h>
34
35 #include "cmd_stat_impl.h"
36 #include "command.h"
37 #include "environment.h"
38 #include "event_attr.h"
39 #include "event_fd.h"
40 #include "event_selection_set.h"
41 #include "event_type.h"
42 #include "IOEventLoop.h"
43 #include "utils.h"
44 #include "workload.h"
45
46 using namespace simpleperf;
47
48 namespace simpleperf {
49
50 static std::vector<std::string> default_measured_event_types{
51 "cpu-cycles", "stalled-cycles-frontend", "stalled-cycles-backend",
52 "instructions", "branch-instructions", "branch-misses",
53 "task-clock", "context-switches", "page-faults",
54 };
55
56 static const std::unordered_map<std::string_view, std::pair<std::string_view, std::string_view>>
57 COMMON_EVENT_RATE_MAP = {
58 {"cache-misses", {"cache-references", "miss rate"}},
59 {"branch-misses", {"branch-instructions", "miss rate"}},
60 };
61
62 static const std::unordered_map<std::string_view, std::pair<std::string_view, std::string_view>>
63 ARM_EVENT_RATE_MAP = {
64 // Refer to "D6.10.5 Meaningful ratios between common microarchitectural events" in ARMv8
65 // specification.
66 {"raw-l1i-cache-refill", {"raw-l1i-cache", "level 1 instruction cache refill rate"}},
67 {"raw-l1i-tlb-refill", {"raw-l1i-tlb", "level 1 instruction TLB refill rate"}},
68 {"raw-l1d-cache-refill", {"raw-l1d-cache", "level 1 data or unified cache refill rate"}},
69 {"raw-l1d-tlb-refill", {"raw-l1d-tlb", "level 1 data or unified TLB refill rate"}},
70 {"raw-l2d-cache-refill", {"raw-l2d-cache", "level 2 data or unified cache refill rate"}},
71 {"raw-l2i-cache-refill", {"raw-l2i-cache", "level 2 instruction cache refill rate"}},
72 {"raw-l3d-cache-refill", {"raw-l3d-cache", "level 3 data or unified cache refill rate"}},
73 {"raw-l2d-tlb-refill", {"raw-l2d-tlb", "level 2 data or unified TLB refill rate"}},
74 {"raw-l2i-tlb-refill", {"raw-l2i-tlb", "level 2 instruction TLB refill rate"}},
75 {"raw-bus-access", {"raw-bus-cycles", "bus accesses per cycle"}},
76 {"raw-ll-cache-miss", {"raw-ll-cache", "last level data or unified cache refill rate"}},
77 {"raw-dtlb-walk", {"raw-l1d-tlb", "data TLB miss rate"}},
78 {"raw-itlb-walk", {"raw-l1i-tlb", "instruction TLB miss rate"}},
79 {"raw-ll-cache-miss-rd", {"raw-ll-cache-rd", "memory read operation miss rate"}},
80 {"raw-remote-access-rd",
81 {"raw-remote-access", "read accesses to another socket in a multi-socket system"}},
82 // Refer to "Table K3-2 Relationship between REFILL events and associated access events" in
83 // ARMv8 specification.
84 {"raw-l1d-cache-refill-rd", {"raw-l1d-cache-rd", "level 1 cache refill rate, read"}},
85 {"raw-l1d-cache-refill-wr", {"raw-l1d-cache-wr", "level 1 cache refill rate, write"}},
86 {"raw-l1d-tlb-refill-rd", {"raw-l1d-tlb-rd", "level 1 TLB refill rate, read"}},
87 {"raw-l1d-tlb-refill-wr", {"raw-l1d-tlb-wr", "level 1 TLB refill rate, write"}},
88 {"raw-l2d-cache-refill-rd", {"raw-l2d-cache-rd", "level 2 data cache refill rate, read"}},
89 {"raw-l2d-cache-refill-wr", {"raw-l2d-cache-wr", "level 2 data cache refill rate, write"}},
90 {"raw-l2d-tlb-refill-rd", {"raw-l2d-tlb-rd", "level 2 data TLB refill rate, read"}},
91 };
92
FindSummary(const std::string & type_name,const std::string & modifier,const ThreadInfo * thread,int cpu)93 const CounterSummary* CounterSummaries::FindSummary(const std::string& type_name,
94 const std::string& modifier,
95 const ThreadInfo* thread, int cpu) {
96 for (const auto& s : summaries_) {
97 if (s.type_name == type_name && s.modifier == modifier && s.thread == thread && s.cpu == cpu) {
98 return &s;
99 }
100 }
101 return nullptr;
102 }
103
AutoGenerateSummaries()104 void CounterSummaries::AutoGenerateSummaries() {
105 for (size_t i = 0; i < summaries_.size(); ++i) {
106 const CounterSummary& s = summaries_[i];
107 if (s.modifier == "u") {
108 const CounterSummary* other = FindSummary(s.type_name, "k", s.thread, s.cpu);
109 if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
110 if (FindSummary(s.type_name, "", s.thread, s.cpu) == nullptr) {
111 summaries_.emplace_back(s.type_name, "", s.group_id, s.thread, s.cpu,
112 s.count + other->count, s.runtime_in_ns, s.scale, true, csv_);
113 }
114 }
115 }
116 }
117 }
118
GenerateComments(double duration_in_sec)119 void CounterSummaries::GenerateComments(double duration_in_sec) {
120 for (auto& s : summaries_) {
121 s.comment = GetCommentForSummary(s, duration_in_sec);
122 }
123 }
124
Show(FILE * fp)125 void CounterSummaries::Show(FILE* fp) {
126 if (csv_) {
127 ShowCSV(fp);
128 } else {
129 ShowText(fp);
130 }
131 }
132
ShowCSV(FILE * fp)133 void CounterSummaries::ShowCSV(FILE* fp) {
134 for (auto& s : summaries_) {
135 if (s.thread != nullptr) {
136 fprintf(fp, "%s,%d,%d,", s.thread->name.c_str(), s.thread->pid, s.thread->tid);
137 }
138 fprintf(fp, "%s,%s,%s,(%.0f%%)%s\n", s.readable_count.c_str(), s.Name().c_str(),
139 s.comment.c_str(), 1.0 / s.scale * 100, (s.auto_generated ? " (generated)," : ","));
140 }
141 }
142
ShowText(FILE * fp)143 void CounterSummaries::ShowText(FILE* fp) {
144 bool show_thread = !summaries_.empty() && summaries_[0].thread != nullptr;
145 bool show_cpu = !summaries_.empty() && summaries_[0].cpu != -1;
146 std::vector<std::string> titles;
147
148 if (show_thread) {
149 titles = {"thread_name", "pid", "tid"};
150 }
151 if (show_cpu) {
152 titles.emplace_back("cpu");
153 }
154 titles.emplace_back("count");
155 titles.emplace_back("event_name");
156 titles.emplace_back(" # count / runtime, runtime / enabled_time");
157
158 std::vector<size_t> width(titles.size(), 0);
159
160 auto adjust_width = [](size_t& w, size_t size) { w = std::max(w, size); };
161
162 // The last title is too long. Don't include it for width adjustment.
163 for (size_t i = 0; i + 1 < titles.size(); i++) {
164 adjust_width(width[i], titles[i].size());
165 }
166
167 for (auto& s : summaries_) {
168 size_t i = 0;
169 if (show_thread) {
170 adjust_width(width[i++], s.thread->name.size());
171 adjust_width(width[i++], std::to_string(s.thread->pid).size());
172 adjust_width(width[i++], std::to_string(s.thread->tid).size());
173 }
174 if (show_cpu) {
175 adjust_width(width[i++], std::to_string(s.cpu).size());
176 }
177 adjust_width(width[i++], s.readable_count.size());
178 adjust_width(width[i++], s.Name().size());
179 adjust_width(width[i++], s.comment.size());
180 }
181
182 fprintf(fp, "# ");
183 for (size_t i = 0; i < titles.size(); i++) {
184 if (titles[i] == "count") {
185 fprintf(fp, "%*s", static_cast<int>(width[i]), titles[i].c_str());
186 } else {
187 fprintf(fp, "%-*s", static_cast<int>(width[i]), titles[i].c_str());
188 }
189 if (i + 1 < titles.size()) {
190 fprintf(fp, " ");
191 }
192 }
193 fprintf(fp, "\n");
194
195 for (auto& s : summaries_) {
196 size_t i = 0;
197 if (show_thread) {
198 fprintf(fp, " %-*s", static_cast<int>(width[i++]), s.thread->name.c_str());
199 fprintf(fp, " %-*d", static_cast<int>(width[i++]), s.thread->pid);
200 fprintf(fp, " %-*d", static_cast<int>(width[i++]), s.thread->tid);
201 }
202 if (show_cpu) {
203 fprintf(fp, " %-*d", static_cast<int>(width[i++]), s.cpu);
204 }
205 fprintf(fp, " %*s %-*s # %-*s (%.0f%%)%s\n", static_cast<int>(width[i]),
206 s.readable_count.c_str(), static_cast<int>(width[i + 1]), s.Name().c_str(),
207 static_cast<int>(width[i + 2]), s.comment.c_str(), 1.0 / s.scale * 100,
208 (s.auto_generated ? " (generated)" : ""));
209 }
210 }
211
GetCommentForSummary(const CounterSummary & s,double duration_in_sec)212 std::string CounterSummaries::GetCommentForSummary(const CounterSummary& s,
213 double duration_in_sec) {
214 char sap_mid;
215 if (csv_) {
216 sap_mid = ',';
217 } else {
218 sap_mid = ' ';
219 }
220 if (s.type_name == "task-clock") {
221 double run_sec = s.count / 1e9;
222 double used_cpus = run_sec / duration_in_sec;
223 return android::base::StringPrintf("%f%ccpus used", used_cpus, sap_mid);
224 }
225 if (s.type_name == "cpu-clock") {
226 return "";
227 }
228 if (s.type_name == "cpu-cycles") {
229 if (s.runtime_in_ns == 0) {
230 return "";
231 }
232 double ghz = static_cast<double>(s.count) / s.runtime_in_ns;
233 return android::base::StringPrintf("%f%cGHz", ghz, sap_mid);
234 }
235 if (s.type_name == "instructions" && s.count != 0) {
236 const CounterSummary* other = FindSummary("cpu-cycles", s.modifier, s.thread, s.cpu);
237 if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
238 double cpi = static_cast<double>(other->count) / s.count;
239 return android::base::StringPrintf("%f%ccycles per instruction", cpi, sap_mid);
240 }
241 }
242 std::string rate_comment = GetRateComment(s, sap_mid);
243 if (!rate_comment.empty()) {
244 return rate_comment;
245 }
246 if (s.runtime_in_ns == 0) {
247 return "";
248 }
249 double runtime_in_sec = static_cast<double>(s.runtime_in_ns) / 1e9;
250 double rate = s.count / runtime_in_sec;
251 if (rate >= 1e9 - 1e5) {
252 return android::base::StringPrintf("%.3f%cG/sec", rate / 1e9, sap_mid);
253 }
254 if (rate >= 1e6 - 1e2) {
255 return android::base::StringPrintf("%.3f%cM/sec", rate / 1e6, sap_mid);
256 }
257 if (rate >= 1e3) {
258 return android::base::StringPrintf("%.3f%cK/sec", rate / 1e3, sap_mid);
259 }
260 return android::base::StringPrintf("%.3f%c/sec", rate, sap_mid);
261 }
262
GetRateComment(const CounterSummary & s,char sep)263 std::string CounterSummaries::GetRateComment(const CounterSummary& s, char sep) {
264 std::string_view miss_event_name = s.type_name;
265 std::string event_name;
266 std::string rate_desc;
267 if (auto it = COMMON_EVENT_RATE_MAP.find(miss_event_name); it != COMMON_EVENT_RATE_MAP.end()) {
268 event_name = it->second.first;
269 rate_desc = it->second.second;
270 }
271 if (event_name.empty() && (GetBuildArch() == ARCH_ARM || GetBuildArch() == ARCH_ARM64)) {
272 if (auto it = ARM_EVENT_RATE_MAP.find(miss_event_name); it != ARM_EVENT_RATE_MAP.end()) {
273 event_name = it->second.first;
274 rate_desc = it->second.second;
275 }
276 }
277 if (event_name.empty() && android::base::ConsumeSuffix(&miss_event_name, "-misses")) {
278 event_name = std::string(miss_event_name) + "s";
279 rate_desc = "miss rate";
280 }
281 if (!event_name.empty()) {
282 const CounterSummary* other = FindSummary(event_name, s.modifier, s.thread, s.cpu);
283 if (other != nullptr && other->IsMonitoredAtTheSameTime(s) && other->count != 0) {
284 double miss_rate = static_cast<double>(s.count) / other->count;
285 return android::base::StringPrintf("%f%%%c%s", miss_rate * 100, sep, rate_desc.c_str());
286 }
287 }
288 return "";
289 }
290
291 } // namespace simpleperf
292
293 namespace {
294
295 // devfreq may use performance counters to calculate memory latency (as in
296 // drivers/devfreq/arm-memlat-mon.c). Hopefully we can get more available counters by asking devfreq
297 // to not use the memory latency governor temporarily.
298 class DevfreqCounters {
299 public:
Use()300 bool Use() {
301 if (!IsRoot()) {
302 LOG(ERROR) << "--use-devfreq-counters needs root permission to set devfreq governors";
303 return false;
304 }
305 std::string devfreq_dir = "/sys/class/devfreq/";
306 for (auto& name : GetSubDirs(devfreq_dir)) {
307 std::string governor_path = devfreq_dir + name + "/governor";
308 if (IsRegularFile(governor_path)) {
309 std::string governor;
310 if (!android::base::ReadFileToString(governor_path, &governor)) {
311 LOG(ERROR) << "failed to read " << governor_path;
312 return false;
313 }
314 governor = android::base::Trim(governor);
315 if (governor == "mem_latency") {
316 if (!android::base::WriteStringToFile("performance", governor_path)) {
317 PLOG(ERROR) << "failed to write " << governor_path;
318 return false;
319 }
320 mem_latency_governor_paths_.emplace_back(std::move(governor_path));
321 }
322 }
323 }
324 return true;
325 }
326
~DevfreqCounters()327 ~DevfreqCounters() {
328 for (auto& path : mem_latency_governor_paths_) {
329 android::base::WriteStringToFile("mem_latency", path);
330 }
331 }
332
333 private:
334 std::vector<std::string> mem_latency_governor_paths_;
335 };
336
337 class StatCommand : public Command {
338 public:
StatCommand()339 StatCommand()
340 : Command("stat", "gather performance counter information",
341 // clang-format off
342 "Usage: simpleperf stat [options] [command [command-args]]\n"
343 " Gather performance counter information of running [command].\n"
344 " And -a/-p/-t option can be used to change target of counter information.\n"
345 "-a Collect system-wide information.\n"
346 #if defined(__ANDROID__)
347 "--app package_name Profile the process of an Android application.\n"
348 " On non-rooted devices, the app must be debuggable,\n"
349 " because we use run-as to switch to the app's context.\n"
350 #endif
351 "--cpu cpu_item1,cpu_item2,...\n"
352 " Collect information only on the selected cpus. cpu_item can\n"
353 " be a cpu number like 1, or a cpu range like 0-3.\n"
354 "--csv Write report in comma separate form.\n"
355 "--duration time_in_sec Monitor for time_in_sec seconds instead of running\n"
356 " [command]. Here time_in_sec may be any positive\n"
357 " floating point number.\n"
358 "--interval time_in_ms Print stat for every time_in_ms milliseconds.\n"
359 " Here time_in_ms may be any positive floating point\n"
360 " number. Simpleperf prints total values from the\n"
361 " starting point. But this can be changed by\n"
362 " --interval-only-values.\n"
363 "--interval-only-values Print numbers of events happened in each interval.\n"
364 "-e event1[:modifier1],event2[:modifier2],...\n"
365 " Select a list of events to count. An event can be:\n"
366 " 1) an event name listed in `simpleperf list`;\n"
367 " 2) a raw PMU event in rN format. N is a hex number.\n"
368 " For example, r1b selects event number 0x1b.\n"
369 " Modifiers can be added to define how the event should be\n"
370 " monitored. Possible modifiers are:\n"
371 " u - monitor user space events only\n"
372 " k - monitor kernel space events only\n"
373 "--group event1[:modifier],event2[:modifier2],...\n"
374 " Similar to -e option. But events specified in the same --group\n"
375 " option are monitored as a group, and scheduled in and out at the\n"
376 " same time.\n"
377 "--no-inherit Don't stat created child threads/processes.\n"
378 "-o output_filename Write report to output_filename instead of standard output.\n"
379 "--per-core Print counters for each cpu core.\n"
380 "--per-thread Print counters for each thread.\n"
381 "-p pid1,pid2,... Stat events on existing processes. Mutually exclusive with -a.\n"
382 "-t tid1,tid2,... Stat events on existing threads. Mutually exclusive with -a.\n"
383 #if defined(__ANDROID__)
384 "--use-devfreq-counters On devices with Qualcomm SOCs, some hardware counters may be used\n"
385 " to monitor memory latency (in drivers/devfreq/arm-memlat-mon.c),\n"
386 " making fewer counters available to users. This option asks devfreq\n"
387 " to temporarily release counters by replacing memory-latency governor\n"
388 " with performance governor. It affects memory latency during profiling,\n"
389 " and may cause wedged power if simpleperf is killed in between.\n"
390 #endif
391 "--verbose Show result in verbose mode.\n"
392 #if 0
393 // Below options are only used internally and shouldn't be visible to the public.
394 "--in-app We are already running in the app's context.\n"
395 "--tracepoint-events file_name Read tracepoint events from [file_name] instead of tracefs.\n"
396 "--out-fd <fd> Write output to a file descriptor.\n"
397 "--stop-signal-fd <fd> Stop stating when fd is readable.\n"
398 #endif
399 // clang-format on
400 ),
401 verbose_mode_(false),
402 system_wide_collection_(false),
403 child_inherit_(true),
404 duration_in_sec_(0),
405 interval_in_ms_(0),
406 interval_only_values_(false),
407 event_selection_set_(true),
408 csv_(false),
409 in_app_context_(false) {
410 // Die if parent exits.
411 prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
412 }
413
414 bool Run(const std::vector<std::string>& args);
415
416 private:
417 bool ParseOptions(const std::vector<std::string>& args,
418 std::vector<std::string>* non_option_args);
419 bool AddDefaultMeasuredEventTypes();
420 void SetEventSelectionFlags();
421 void MonitorEachThread();
422 void AdjustToIntervalOnlyValues(std::vector<CountersInfo>& counters);
423 bool ShowCounters(const std::vector<CountersInfo>& counters,
424 double duration_in_sec, FILE* fp);
425
426 bool verbose_mode_;
427 bool system_wide_collection_;
428 bool child_inherit_;
429 double duration_in_sec_;
430 double interval_in_ms_;
431 bool interval_only_values_;
432 std::vector<std::vector<CounterSum>> last_sum_values_;
433 std::vector<int> cpus_;
434 EventSelectionSet event_selection_set_;
435 std::string output_filename_;
436 android::base::unique_fd out_fd_;
437 bool csv_;
438 std::string app_package_name_;
439 bool in_app_context_;
440 android::base::unique_fd stop_signal_fd_;
441 bool use_devfreq_counters_ = false;
442
443 bool report_per_core_ = false;
444 bool report_per_thread_ = false;
445 // used to report event count for each thread
446 std::unordered_map<pid_t, ThreadInfo> thread_info_;
447 };
448
Run(const std::vector<std::string> & args)449 bool StatCommand::Run(const std::vector<std::string>& args) {
450 if (!CheckPerfEventLimit()) {
451 return false;
452 }
453 AllowMoreOpenedFiles();
454
455 // 1. Parse options, and use default measured event types if not given.
456 std::vector<std::string> workload_args;
457 if (!ParseOptions(args, &workload_args)) {
458 return false;
459 }
460 if (!app_package_name_.empty() && !in_app_context_) {
461 if (!IsRoot()) {
462 return RunInAppContext(app_package_name_, "stat", args, workload_args.size(),
463 output_filename_, !event_selection_set_.GetTracepointEvents().empty());
464 }
465 }
466 DevfreqCounters devfreq_counters;
467 if (use_devfreq_counters_) {
468 if (!devfreq_counters.Use()) {
469 return false;
470 }
471 }
472 if (event_selection_set_.empty()) {
473 if (!AddDefaultMeasuredEventTypes()) {
474 return false;
475 }
476 }
477 SetEventSelectionFlags();
478
479 // 2. Create workload.
480 std::unique_ptr<Workload> workload;
481 if (!workload_args.empty()) {
482 workload = Workload::CreateWorkload(workload_args);
483 if (workload == nullptr) {
484 return false;
485 }
486 }
487 bool need_to_check_targets = false;
488 if (system_wide_collection_) {
489 if (report_per_thread_) {
490 event_selection_set_.AddMonitoredProcesses(GetAllProcesses());
491 } else {
492 event_selection_set_.AddMonitoredThreads({-1});
493 }
494 } else if (!event_selection_set_.HasMonitoredTarget()) {
495 if (workload != nullptr) {
496 event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
497 event_selection_set_.SetEnableOnExec(true);
498 } else if (!app_package_name_.empty()) {
499 std::set<pid_t> pids = WaitForAppProcesses(app_package_name_);
500 event_selection_set_.AddMonitoredProcesses(pids);
501 } else {
502 LOG(ERROR)
503 << "No threads to monitor. Try `simpleperf help stat` for help\n";
504 return false;
505 }
506 } else {
507 need_to_check_targets = true;
508 }
509
510 if (report_per_thread_) {
511 MonitorEachThread();
512 }
513
514 // 3. Open perf_event_files and output file if defined.
515 if (cpus_.empty() && !report_per_core_ && (report_per_thread_ || !system_wide_collection_)) {
516 cpus_.push_back(-1); // Get event count for each thread on all cpus.
517 }
518 if (!event_selection_set_.OpenEventFiles(cpus_)) {
519 return false;
520 }
521 std::unique_ptr<FILE, decltype(&fclose)> fp_holder(nullptr, fclose);
522 if (!output_filename_.empty()) {
523 fp_holder.reset(fopen(output_filename_.c_str(), "we"));
524 if (fp_holder == nullptr) {
525 PLOG(ERROR) << "failed to open " << output_filename_;
526 return false;
527 }
528 } else if (out_fd_ != -1) {
529 fp_holder.reset(fdopen(out_fd_.release(), "we"));
530 if (fp_holder == nullptr) {
531 PLOG(ERROR) << "failed to write output.";
532 return false;
533 }
534 }
535 FILE* fp = fp_holder ? fp_holder.get() : stdout;
536
537 // 4. Add signal/periodic Events.
538 IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
539 if (interval_in_ms_ != 0) {
540 if (!loop->UsePreciseTimer()) {
541 return false;
542 }
543 }
544 std::chrono::time_point<std::chrono::steady_clock> start_time;
545 std::vector<CountersInfo> counters;
546 if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
547 return false;
548 }
549 auto exit_loop_callback = [loop]() {
550 return loop->ExitLoop();
551 };
552 if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP}, exit_loop_callback)) {
553 return false;
554 }
555 if (stop_signal_fd_ != -1) {
556 if (!loop->AddReadEvent(stop_signal_fd_, exit_loop_callback)) {
557 return false;
558 }
559 }
560 if (duration_in_sec_ != 0) {
561 if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_), exit_loop_callback)) {
562 return false;
563 }
564 }
565 auto print_counters = [&]() {
566 auto end_time = std::chrono::steady_clock::now();
567 if (!event_selection_set_.ReadCounters(&counters)) {
568 return false;
569 }
570 double duration_in_sec =
571 std::chrono::duration_cast<std::chrono::duration<double>>(end_time -
572 start_time)
573 .count();
574 if (interval_only_values_) {
575 AdjustToIntervalOnlyValues(counters);
576 }
577 if (!ShowCounters(counters, duration_in_sec, fp)) {
578 return false;
579 }
580 return true;
581 };
582
583 if (interval_in_ms_ != 0) {
584 if (!loop->AddPeriodicEvent(SecondToTimeval(interval_in_ms_ / 1000.0),
585 print_counters)) {
586 return false;
587 }
588 }
589
590 // 5. Count events while workload running.
591 start_time = std::chrono::steady_clock::now();
592 if (workload != nullptr && !workload->Start()) {
593 return false;
594 }
595 if (!loop->RunLoop()) {
596 return false;
597 }
598
599 // 6. Read and print counters.
600 if (interval_in_ms_ == 0) {
601 return print_counters();
602 }
603 return true;
604 }
605
ParseOptions(const std::vector<std::string> & args,std::vector<std::string> * non_option_args)606 bool StatCommand::ParseOptions(const std::vector<std::string>& args,
607 std::vector<std::string>* non_option_args) {
608 std::set<pid_t> tid_set;
609 size_t i;
610 for (i = 0; i < args.size() && args[i].size() > 0 && args[i][0] == '-'; ++i) {
611 if (args[i] == "-a") {
612 system_wide_collection_ = true;
613 } else if (args[i] == "--app") {
614 if (!NextArgumentOrError(args, &i)) {
615 return false;
616 }
617 app_package_name_ = args[i];
618 } else if (args[i] == "--cpu") {
619 if (!NextArgumentOrError(args, &i)) {
620 return false;
621 }
622 cpus_ = GetCpusFromString(args[i]);
623 } else if (args[i] == "--csv") {
624 csv_ = true;
625 } else if (args[i] == "--duration") {
626 if (!GetDoubleOption(args, &i, &duration_in_sec_, 1e-9)) {
627 return false;
628 }
629 } else if (args[i] == "--interval") {
630 if (!GetDoubleOption(args, &i, &interval_in_ms_, 1e-9)) {
631 return false;
632 }
633 } else if (args[i] == "--interval-only-values") {
634 interval_only_values_ = true;
635 } else if (args[i] == "-e") {
636 if (!NextArgumentOrError(args, &i)) {
637 return false;
638 }
639 std::vector<std::string> event_types = android::base::Split(args[i], ",");
640 for (auto& event_type : event_types) {
641 if (!event_selection_set_.AddEventType(event_type)) {
642 return false;
643 }
644 }
645 } else if (args[i] == "--group") {
646 if (!NextArgumentOrError(args, &i)) {
647 return false;
648 }
649 std::vector<std::string> event_types = android::base::Split(args[i], ",");
650 if (!event_selection_set_.AddEventGroup(event_types)) {
651 return false;
652 }
653 } else if (args[i] == "--in-app") {
654 in_app_context_ = true;
655 } else if (args[i] == "--no-inherit") {
656 child_inherit_ = false;
657 } else if (args[i] == "-o") {
658 if (!NextArgumentOrError(args, &i)) {
659 return false;
660 }
661 output_filename_ = args[i];
662 } else if (args[i] == "--out-fd") {
663 int fd;
664 if (!GetUintOption(args, &i, &fd)) {
665 return false;
666 }
667 out_fd_.reset(fd);
668 } else if (args[i] == "--per-core") {
669 report_per_core_ = true;
670 } else if (args[i] == "--per-thread") {
671 report_per_thread_ = true;
672 } else if (args[i] == "-p") {
673 if (!NextArgumentOrError(args, &i)) {
674 return false;
675 }
676 std::set<pid_t> pids;
677 if (!GetValidThreadsFromThreadString(args[i], &pids)) {
678 return false;
679 }
680 event_selection_set_.AddMonitoredProcesses(pids);
681 } else if (args[i] == "--stop-signal-fd") {
682 int fd;
683 if (!GetUintOption(args, &i, &fd)) {
684 return false;
685 }
686 stop_signal_fd_.reset(fd);
687 } else if (args[i] == "-t") {
688 if (!NextArgumentOrError(args, &i)) {
689 return false;
690 }
691 std::set<pid_t> tids;
692 if (!GetValidThreadsFromThreadString(args[i], &tids)) {
693 return false;
694 }
695 event_selection_set_.AddMonitoredThreads(tids);
696 } else if (args[i] == "--tracepoint-events") {
697 if (!NextArgumentOrError(args, &i)) {
698 return false;
699 }
700 if (!SetTracepointEventsFilePath(args[i])) {
701 return false;
702 }
703 #if defined(__ANDROID__)
704 } else if (args[i] == "--use-devfreq-counters") {
705 use_devfreq_counters_ = true;
706 #endif
707 } else if (args[i] == "--verbose") {
708 verbose_mode_ = true;
709 } else {
710 ReportUnknownOption(args, i);
711 return false;
712 }
713 }
714
715 if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
716 LOG(ERROR) << "Stat system wide and existing processes/threads can't be "
717 "used at the same time.";
718 return false;
719 }
720 if (system_wide_collection_ && !IsRoot()) {
721 LOG(ERROR) << "System wide profiling needs root privilege.";
722 return false;
723 }
724
725 non_option_args->clear();
726 for (; i < args.size(); ++i) {
727 non_option_args->push_back(args[i]);
728 }
729 return true;
730 }
731
AddDefaultMeasuredEventTypes()732 bool StatCommand::AddDefaultMeasuredEventTypes() {
733 for (auto& name : default_measured_event_types) {
734 // It is not an error when some event types in the default list are not
735 // supported by the kernel.
736 const EventType* type = FindEventTypeByName(name);
737 if (type != nullptr &&
738 IsEventAttrSupported(CreateDefaultPerfEventAttr(*type), name)) {
739 if (!event_selection_set_.AddEventType(name)) {
740 return false;
741 }
742 }
743 }
744 if (event_selection_set_.empty()) {
745 LOG(ERROR) << "Failed to add any supported default measured types";
746 return false;
747 }
748 return true;
749 }
750
SetEventSelectionFlags()751 void StatCommand::SetEventSelectionFlags() {
752 event_selection_set_.SetInherit(child_inherit_);
753 }
754
MonitorEachThread()755 void StatCommand::MonitorEachThread() {
756 std::vector<pid_t> threads;
757 for (auto pid : event_selection_set_.GetMonitoredProcesses()) {
758 for (auto tid : GetThreadsInProcess(pid)) {
759 ThreadInfo info;
760 if (GetThreadName(tid, &info.name)) {
761 info.tid = tid;
762 info.pid = pid;
763 thread_info_[tid] = std::move(info);
764 threads.push_back(tid);
765 }
766 }
767 }
768 for (auto tid : event_selection_set_.GetMonitoredThreads()) {
769 ThreadInfo info;
770 if (ReadThreadNameAndPid(tid, &info.name, &info.pid)) {
771 info.tid = tid;
772 thread_info_[tid] = std::move(info);
773 threads.push_back(tid);
774 }
775 }
776 event_selection_set_.ClearMonitoredTargets();
777 event_selection_set_.AddMonitoredThreads(threads);
778 }
779
AdjustToIntervalOnlyValues(std::vector<CountersInfo> & counters)780 void StatCommand::AdjustToIntervalOnlyValues(std::vector<CountersInfo>& counters) {
781 if (last_sum_values_.size() < counters.size()) {
782 last_sum_values_.resize(counters.size());
783 }
784 for (size_t i = 0; i < counters.size(); i++) {
785 std::vector<CounterInfo>& counters_per_event = counters[i].counters;
786 std::vector<CounterSum>& last_sum = last_sum_values_[i];
787
788 if (last_sum.size() < counters_per_event.size()) {
789 last_sum.resize(counters_per_event.size());
790 }
791 for (size_t j = 0; j < counters_per_event.size(); j++) {
792 PerfCounter& counter = counters_per_event[j].counter;
793 CounterSum new_sum;
794 new_sum.FromCounter(counter);
795 CounterSum delta = new_sum - last_sum[j];
796 delta.ToCounter(counter);
797 last_sum[j] = new_sum;
798 }
799 }
800 }
801
ShowCounters(const std::vector<CountersInfo> & counters,double duration_in_sec,FILE * fp)802 bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters,
803 double duration_in_sec, FILE* fp) {
804 if (csv_) {
805 fprintf(fp, "Performance counter statistics,\n");
806 } else {
807 fprintf(fp, "Performance counter statistics:\n\n");
808 }
809
810 if (verbose_mode_) {
811 for (auto& counters_info : counters) {
812 for (auto& counter_info : counters_info.counters) {
813 if (csv_) {
814 fprintf(fp, "%s,tid,%d,cpu,%d,count,%" PRIu64 ",time_enabled,%" PRIu64
815 ",time running,%" PRIu64 ",id,%" PRIu64 ",\n",
816 counters_info.event_name.c_str(), counter_info.tid,
817 counter_info.cpu, counter_info.counter.value,
818 counter_info.counter.time_enabled,
819 counter_info.counter.time_running, counter_info.counter.id);
820 } else {
821 fprintf(fp,
822 "%s(tid %d, cpu %d): count %" PRIu64 ", time_enabled %" PRIu64
823 ", time running %" PRIu64 ", id %" PRIu64 "\n",
824 counters_info.event_name.c_str(), counter_info.tid,
825 counter_info.cpu, counter_info.counter.value,
826 counter_info.counter.time_enabled,
827 counter_info.counter.time_running, counter_info.counter.id);
828 }
829 }
830 }
831 }
832
833 CounterSummaryBuilder builder(report_per_thread_, report_per_core_, csv_, thread_info_);
834 for (const auto& info : counters) {
835 builder.AddCountersForOneEventType(info);
836 }
837 CounterSummaries summaries(builder.Build(), csv_);
838 summaries.AutoGenerateSummaries();
839 summaries.GenerateComments(duration_in_sec);
840 summaries.Show(fp);
841
842 if (csv_)
843 fprintf(fp, "Total test time,%lf,seconds,\n", duration_in_sec);
844 else
845 fprintf(fp, "\nTotal test time: %lf seconds.\n", duration_in_sec);
846
847 const char* COUNTER_MULTIPLEX_INFO =
848 "probably caused by hardware counter multiplexing (less counters than events).\n"
849 "Try --use-devfreq-counters if on a rooted device.";
850
851 if (cpus_ == std::vector<int>(1, -1) ||
852 event_selection_set_.GetMonitoredThreads() == std::set<pid_t>({-1})) {
853 // We either monitor a thread on all cpus, or monitor all threads on a cpu. In both cases,
854 // if percentages < 100%, probably it is caused by hardware counter multiplexing.
855 bool counters_always_available = true;
856 for (const auto& summary : summaries.Summaries()) {
857 if (!summary.IsMonitoredAllTheTime()) {
858 counters_always_available = false;
859 break;
860 }
861 }
862 if (!counters_always_available) {
863 LOG(WARNING) << "Percentages < 100% means some events only run a subset of enabled time,\n"
864 << COUNTER_MULTIPLEX_INFO;
865 }
866 } else if (report_per_thread_) {
867 // We monitor each thread on each cpu.
868 LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for each thread.\n"
869 << "If percentage sum of a thread < 99%, or report for a running thread is missing,\n"
870 << COUNTER_MULTIPLEX_INFO;
871 } else {
872 // We monitor some threads on each cpu.
873 LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for monitored\n"
874 << "threads. If percentage sum < 99%, or report for an event is missing,\n"
875 << COUNTER_MULTIPLEX_INFO;
876 }
877 return true;
878 }
879
880 } // namespace
881
RegisterStatCommand()882 void RegisterStatCommand() {
883 RegisterCommand("stat",
884 [] { return std::unique_ptr<Command>(new StatCommand); });
885 }
886