1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "command.h"
18
19 #include <unordered_map>
20
21 #include <android-base/logging.h>
22 #include <android-base/strings.h>
23
24 #include "callchain.h"
25 #include "event_attr.h"
26 #include "event_type.h"
27 #include "record_file.h"
28 #include "sample_tree.h"
29 #include "tracing.h"
30 #include "utils.h"
31
32 namespace {
33
34 struct SlabSample {
35 const Symbol* symbol; // the function making allocation
36 uint64_t ptr; // the start address of the allocated space
37 uint64_t bytes_req; // requested space size
38 uint64_t bytes_alloc; // allocated space size
39 uint64_t sample_count; // count of allocations
40 uint64_t gfp_flags; // flags used for allocation
41 uint64_t cross_cpu_allocations; // count of allocations freed not on the
42 // cpu allocating them
43 CallChainRoot<SlabSample> callchain; // a callchain tree representing all
44 // callchains in this sample
SlabSample__anon73c5044a0111::SlabSample45 SlabSample(const Symbol* symbol, uint64_t ptr, uint64_t bytes_req,
46 uint64_t bytes_alloc, uint64_t sample_count, uint64_t gfp_flags,
47 uint64_t cross_cpu_allocations)
48 : symbol(symbol),
49 ptr(ptr),
50 bytes_req(bytes_req),
51 bytes_alloc(bytes_alloc),
52 sample_count(sample_count),
53 gfp_flags(gfp_flags),
54 cross_cpu_allocations(cross_cpu_allocations) {}
55
GetPeriod__anon73c5044a0111::SlabSample56 uint64_t GetPeriod() const {
57 return sample_count;
58 }
59 };
60
61 struct SlabAccumulateInfo {
62 uint64_t bytes_req;
63 uint64_t bytes_alloc;
64 };
65
66 BUILD_COMPARE_VALUE_FUNCTION(ComparePtr, ptr);
67 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesReq, bytes_req);
68 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesAlloc, bytes_alloc);
69 BUILD_COMPARE_VALUE_FUNCTION(CompareGfpFlags, gfp_flags);
70 BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareCrossCpuAllocations,
71 cross_cpu_allocations);
72
73 BUILD_DISPLAY_HEX64_FUNCTION(DisplayPtr, ptr);
74 BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesReq, bytes_req);
75 BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesAlloc, bytes_alloc);
76 BUILD_DISPLAY_HEX64_FUNCTION(DisplayGfpFlags, gfp_flags);
77 BUILD_DISPLAY_UINT64_FUNCTION(DisplayCrossCpuAllocations,
78 cross_cpu_allocations);
79
CompareFragment(const SlabSample * sample1,const SlabSample * sample2)80 static int CompareFragment(const SlabSample* sample1,
81 const SlabSample* sample2) {
82 uint64_t frag1 = sample1->bytes_alloc - sample1->bytes_req;
83 uint64_t frag2 = sample2->bytes_alloc - sample2->bytes_req;
84 return Compare(frag2, frag1);
85 }
86
DisplayFragment(const SlabSample * sample)87 static std::string DisplayFragment(const SlabSample* sample) {
88 return android::base::StringPrintf("%" PRIu64,
89 sample->bytes_alloc - sample->bytes_req);
90 }
91
92 struct SlabSampleTree {
93 std::vector<SlabSample*> samples;
94 uint64_t total_requested_bytes;
95 uint64_t total_allocated_bytes;
96 uint64_t nr_allocations;
97 uint64_t nr_frees;
98 uint64_t nr_cross_cpu_allocations;
99 };
100
101 struct SlabFormat {
102 enum {
103 KMEM_ALLOC,
104 KMEM_FREE,
105 } type;
106 TracingFieldPlace call_site;
107 TracingFieldPlace ptr;
108 TracingFieldPlace bytes_req;
109 TracingFieldPlace bytes_alloc;
110 TracingFieldPlace gfp_flags;
111 };
112
113 class SlabSampleTreeBuilder
114 : public SampleTreeBuilder<SlabSample, SlabAccumulateInfo> {
115 public:
SlabSampleTreeBuilder(const SampleComparator<SlabSample> & sample_comparator,ThreadTree * thread_tree)116 SlabSampleTreeBuilder(const SampleComparator<SlabSample>& sample_comparator,
117 ThreadTree* thread_tree)
118 : SampleTreeBuilder(sample_comparator),
119 thread_tree_(thread_tree),
120 total_requested_bytes_(0),
121 total_allocated_bytes_(0),
122 nr_allocations_(0),
123 nr_cross_cpu_allocations_(0) {}
124
GetSampleTree() const125 SlabSampleTree GetSampleTree() const {
126 SlabSampleTree sample_tree;
127 sample_tree.samples = GetSamples();
128 sample_tree.total_requested_bytes = total_requested_bytes_;
129 sample_tree.total_allocated_bytes = total_allocated_bytes_;
130 sample_tree.nr_allocations = nr_allocations_;
131 sample_tree.nr_frees = nr_frees_;
132 sample_tree.nr_cross_cpu_allocations = nr_cross_cpu_allocations_;
133 return sample_tree;
134 }
135
AddSlabFormat(const std::vector<uint64_t> & event_ids,SlabFormat format)136 void AddSlabFormat(const std::vector<uint64_t>& event_ids,
137 SlabFormat format) {
138 std::unique_ptr<SlabFormat> p(new SlabFormat(format));
139 for (auto id : event_ids) {
140 event_id_to_format_map_[id] = p.get();
141 }
142 formats_.push_back(std::move(p));
143 }
144
145 protected:
CreateSample(const SampleRecord & r,bool in_kernel,SlabAccumulateInfo * acc_info)146 SlabSample* CreateSample(const SampleRecord& r, bool in_kernel,
147 SlabAccumulateInfo* acc_info) override {
148 if (!in_kernel) {
149 // Normally we don't parse records in user space because tracepoint
150 // events all happen in kernel. But if r.ip_data.ip == 0, it may be
151 // a kernel record failed to dump ip register and is still useful.
152 if (r.ip_data.ip == 0) {
153 // It seems we are on a kernel can't dump regset for tracepoint events
154 // because of lacking perf_arch_fetch_caller_regs(). We can't get
155 // callchain, but we can still do a normal report.
156 static bool first = true;
157 if (first) {
158 first = false;
159 if (accumulate_callchain_) {
160 // The kernel doesn't seem to support dumping registers for
161 // tracepoint events because of lacking
162 // perf_arch_fetch_caller_regs().
163 LOG(WARNING) << "simpleperf may not get callchains for tracepoint"
164 << " events because of lacking kernel support.";
165 }
166 }
167 } else {
168 return nullptr;
169 }
170 }
171 uint64_t id = r.id_data.id;
172 auto it = event_id_to_format_map_.find(id);
173 if (it == event_id_to_format_map_.end()) {
174 return nullptr;
175 }
176 const char* raw_data = r.raw_data.data;
177 SlabFormat* format = it->second;
178 if (format->type == SlabFormat::KMEM_ALLOC) {
179 uint64_t call_site = format->call_site.ReadFromData(raw_data);
180 const Symbol* symbol = thread_tree_->FindKernelSymbol(call_site);
181 uint64_t ptr = format->ptr.ReadFromData(raw_data);
182 uint64_t bytes_req = format->bytes_req.ReadFromData(raw_data);
183 uint64_t bytes_alloc = format->bytes_alloc.ReadFromData(raw_data);
184 uint64_t gfp_flags = format->gfp_flags.ReadFromData(raw_data);
185 SlabSample* sample =
186 InsertSample(std::unique_ptr<SlabSample>(new SlabSample(
187 symbol, ptr, bytes_req, bytes_alloc, 1, gfp_flags, 0)));
188 alloc_cpu_record_map_.insert(
189 std::make_pair(ptr, std::make_pair(r.cpu_data.cpu, sample)));
190 acc_info->bytes_req = bytes_req;
191 acc_info->bytes_alloc = bytes_alloc;
192 return sample;
193 } else if (format->type == SlabFormat::KMEM_FREE) {
194 uint64_t ptr = format->ptr.ReadFromData(raw_data);
195 auto it = alloc_cpu_record_map_.find(ptr);
196 if (it != alloc_cpu_record_map_.end()) {
197 SlabSample* sample = it->second.second;
198 if (r.cpu_data.cpu != it->second.first) {
199 sample->cross_cpu_allocations++;
200 nr_cross_cpu_allocations_++;
201 }
202 alloc_cpu_record_map_.erase(it);
203 }
204 nr_frees_++;
205 }
206 return nullptr;
207 }
208
CreateBranchSample(const SampleRecord &,const BranchStackItemType &)209 SlabSample* CreateBranchSample(const SampleRecord&,
210 const BranchStackItemType&) override {
211 return nullptr;
212 }
213
CreateCallChainSample(const SlabSample * sample,uint64_t ip,bool in_kernel,const std::vector<SlabSample * > & callchain,const SlabAccumulateInfo & acc_info)214 SlabSample* CreateCallChainSample(
215 const SlabSample* sample, uint64_t ip, bool in_kernel,
216 const std::vector<SlabSample*>& callchain,
217 const SlabAccumulateInfo& acc_info) override {
218 if (!in_kernel) {
219 return nullptr;
220 }
221 const Symbol* symbol = thread_tree_->FindKernelSymbol(ip);
222 return InsertCallChainSample(
223 std::unique_ptr<SlabSample>(
224 new SlabSample(symbol, sample->ptr, acc_info.bytes_req,
225 acc_info.bytes_alloc, 1, sample->gfp_flags, 0)),
226 callchain);
227 }
228
GetThreadOfSample(SlabSample *)229 const ThreadEntry* GetThreadOfSample(SlabSample*) override { return nullptr; }
230
GetPeriodForCallChain(const SlabAccumulateInfo &)231 uint64_t GetPeriodForCallChain(const SlabAccumulateInfo&) override {
232 // Decide the percentage of callchain by the sample_count, so use 1 as the
233 // period when calling AddCallChain().
234 return 1;
235 }
236
UpdateSummary(const SlabSample * sample)237 void UpdateSummary(const SlabSample* sample) override {
238 total_requested_bytes_ += sample->bytes_req;
239 total_allocated_bytes_ += sample->bytes_alloc;
240 nr_allocations_++;
241 }
242
MergeSample(SlabSample * sample1,SlabSample * sample2)243 void MergeSample(SlabSample* sample1, SlabSample* sample2) override {
244 sample1->bytes_req += sample2->bytes_req;
245 sample1->bytes_alloc += sample2->bytes_alloc;
246 sample1->sample_count += sample2->sample_count;
247 }
248
249 private:
250 ThreadTree* thread_tree_;
251 uint64_t total_requested_bytes_;
252 uint64_t total_allocated_bytes_;
253 uint64_t nr_allocations_;
254 uint64_t nr_frees_;
255 uint64_t nr_cross_cpu_allocations_;
256
257 std::unordered_map<uint64_t, SlabFormat*> event_id_to_format_map_;
258 std::vector<std::unique_ptr<SlabFormat>> formats_;
259 std::unordered_map<uint64_t, std::pair<uint32_t, SlabSample*>>
260 alloc_cpu_record_map_;
261 };
262
263 using SlabSampleTreeSorter = SampleTreeSorter<SlabSample>;
264 using SlabSampleTreeDisplayer = SampleTreeDisplayer<SlabSample, SlabSampleTree>;
265 using SlabSampleCallgraphDisplayer =
266 CallgraphDisplayer<SlabSample, CallChainNode<SlabSample>>;
267
268 struct EventAttrWithName {
269 perf_event_attr attr;
270 std::string name;
271 std::vector<uint64_t> event_ids;
272 };
273
274 class KmemCommand : public Command {
275 public:
KmemCommand()276 KmemCommand()
277 : Command(
278 "kmem", "collect kernel memory allocation information",
279 // clang-format off
280 "Usage: kmem (record [record options] | report [report options])\n"
281 "kmem record\n"
282 "-g Enable call graph recording. Same as '--call-graph fp'.\n"
283 "--slab Collect slab allocation information. Default option.\n"
284 "Other record options provided by simpleperf record command are also available.\n"
285 "kmem report\n"
286 "--children Print the accumulated allocation info appeared in the callchain.\n"
287 " Can be used on perf.data recorded with `--call-graph fp` option.\n"
288 "-g [callee|caller] Print call graph for perf.data recorded with\n"
289 " `--call-graph fp` option. If callee mode is used, the graph\n"
290 " shows how functions are called from others. Otherwise, the\n"
291 " graph shows how functions call others. Default is callee\n"
292 " mode. The percentage shown in the graph is determined by\n"
293 " the hit count of the callchain.\n"
294 "-i Specify path of record file, default is perf.data\n"
295 "-o report_file_name Set report file name, default is stdout.\n"
296 "--slab Report slab allocation information. Default option.\n"
297 "--slab-sort key1,key2,...\n"
298 " Select the keys to sort and print slab allocation information.\n"
299 " Should be used with --slab option. Possible keys include:\n"
300 " hit -- the allocation count.\n"
301 " caller -- the function calling allocation.\n"
302 " ptr -- the address of the allocated space.\n"
303 " bytes_req -- the total requested space size.\n"
304 " bytes_alloc -- the total allocated space size.\n"
305 " fragment -- the extra allocated space size\n"
306 " (bytes_alloc - bytes_req).\n"
307 " gfp_flags -- the flags used for allocation.\n"
308 " pingpong -- the count of allocations that are freed not on\n"
309 " the cpu allocating them.\n"
310 " The default slab sort keys are:\n"
311 " hit,caller,bytes_req,bytes_alloc,fragment,pingpong.\n"
312 // clang-format on
313 ),
314 is_record_(false),
315 use_slab_(false),
316 accumulate_callchain_(false),
317 print_callgraph_(false),
318 callgraph_show_callee_(false),
319 record_filename_("perf.data"),
320 record_file_arch_(GetBuildArch()) {}
321
322 bool Run(const std::vector<std::string>& args);
323
324 private:
325 bool ParseOptions(const std::vector<std::string>& args,
326 std::vector<std::string>* left_args);
327 bool RecordKmemInfo(const std::vector<std::string>& record_args);
328 bool ReportKmemInfo();
329 bool PrepareToBuildSampleTree();
330 void ReadEventAttrsFromRecordFile();
331 bool ReadFeaturesFromRecordFile();
332 bool ReadSampleTreeFromRecordFile();
333 bool ProcessRecord(std::unique_ptr<Record> record);
334 void ProcessTracingData(const std::vector<char>& data);
335 bool PrintReport();
336 void PrintReportContext(FILE* fp);
337 void PrintSlabReportContext(FILE* fp);
338
339 bool is_record_;
340 bool use_slab_;
341 std::vector<std::string> slab_sort_keys_;
342 bool accumulate_callchain_;
343 bool print_callgraph_;
344 bool callgraph_show_callee_;
345
346 std::string record_filename_;
347 std::unique_ptr<RecordFileReader> record_file_reader_;
348 std::vector<EventAttrWithName> event_attrs_;
349 std::string record_cmdline_;
350 ArchType record_file_arch_;
351
352 ThreadTree thread_tree_;
353 SlabSampleTree slab_sample_tree_;
354 std::unique_ptr<SlabSampleTreeBuilder> slab_sample_tree_builder_;
355 std::unique_ptr<SlabSampleTreeSorter> slab_sample_tree_sorter_;
356 std::unique_ptr<SlabSampleTreeDisplayer> slab_sample_tree_displayer_;
357
358 std::string report_filename_;
359 };
360
Run(const std::vector<std::string> & args)361 bool KmemCommand::Run(const std::vector<std::string>& args) {
362 std::vector<std::string> left_args;
363 if (!ParseOptions(args, &left_args)) {
364 return false;
365 }
366 if (!use_slab_) {
367 use_slab_ = true;
368 }
369 if (is_record_) {
370 return RecordKmemInfo(left_args);
371 }
372 return ReportKmemInfo();
373 }
374
ParseOptions(const std::vector<std::string> & args,std::vector<std::string> * left_args)375 bool KmemCommand::ParseOptions(const std::vector<std::string>& args,
376 std::vector<std::string>* left_args) {
377 if (args.empty()) {
378 LOG(ERROR) << "No subcommand specified";
379 return false;
380 }
381 if (args[0] == "record") {
382 if (!IsRoot()) {
383 LOG(ERROR) << "simpleperf kmem record command needs root privilege";
384 return false;
385 }
386 is_record_ = true;
387 size_t i;
388 for (i = 1; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) {
389 if (args[i] == "-g") {
390 left_args->push_back("--call-graph");
391 left_args->push_back("fp");
392 } else if (args[i] == "--slab") {
393 use_slab_ = true;
394 } else {
395 left_args->push_back(args[i]);
396 }
397 }
398 left_args->insert(left_args->end(), args.begin() + i, args.end());
399 } else if (args[0] == "report") {
400 is_record_ = false;
401 for (size_t i = 1; i < args.size(); ++i) {
402 if (args[i] == "--children") {
403 accumulate_callchain_ = true;
404 } else if (args[i] == "-g") {
405 print_callgraph_ = true;
406 accumulate_callchain_ = true;
407 callgraph_show_callee_ = true;
408 if (i + 1 < args.size() && args[i + 1][0] != '-') {
409 ++i;
410 if (args[i] == "callee") {
411 callgraph_show_callee_ = true;
412 } else if (args[i] == "caller") {
413 callgraph_show_callee_ = false;
414 } else {
415 LOG(ERROR) << "Unknown argument with -g option: " << args[i];
416 return false;
417 }
418 }
419 } else if (args[i] == "-i") {
420 if (!NextArgumentOrError(args, &i)) {
421 return false;
422 }
423 record_filename_ = args[i];
424 } else if (args[i] == "-o") {
425 if (!NextArgumentOrError(args, &i)) {
426 return false;
427 }
428 report_filename_ = args[i];
429 } else if (args[i] == "--slab") {
430 use_slab_ = true;
431 } else if (args[i] == "--slab-sort") {
432 if (!NextArgumentOrError(args, &i)) {
433 return false;
434 }
435 slab_sort_keys_ = android::base::Split(args[i], ",");
436 } else {
437 ReportUnknownOption(args, i);
438 return false;
439 }
440 }
441 } else {
442 LOG(ERROR) << "Unknown subcommand for " << Name() << ": " << args[0]
443 << ". Try `simpleperf help " << Name() << "`";
444 return false;
445 }
446 return true;
447 }
448
RecordKmemInfo(const std::vector<std::string> & record_args)449 bool KmemCommand::RecordKmemInfo(const std::vector<std::string>& record_args) {
450 std::vector<std::string> args;
451 if (use_slab_) {
452 std::vector<std::string> trace_events = {
453 "kmem:kmalloc", "kmem:kmem_cache_alloc",
454 "kmem:kmalloc_node", "kmem:kmem_cache_alloc_node",
455 "kmem:kfree", "kmem:kmem_cache_free"};
456 for (const auto& name : trace_events) {
457 if (ParseEventType(name)) {
458 args.insert(args.end(), {"-e", name});
459 }
460 }
461 }
462 if (args.empty()) {
463 LOG(ERROR) << "Kernel allocation related trace events are not supported.";
464 return false;
465 }
466 args.push_back("-a");
467 args.insert(args.end(), record_args.begin(), record_args.end());
468 std::unique_ptr<Command> record_cmd = CreateCommandInstance("record");
469 if (record_cmd == nullptr) {
470 LOG(ERROR) << "record command isn't available";
471 return false;
472 }
473 return record_cmd->Run(args);
474 }
475
ReportKmemInfo()476 bool KmemCommand::ReportKmemInfo() {
477 if (!PrepareToBuildSampleTree()) {
478 return false;
479 }
480 record_file_reader_ = RecordFileReader::CreateInstance(record_filename_);
481 if (record_file_reader_ == nullptr) {
482 return false;
483 }
484 ReadEventAttrsFromRecordFile();
485 if (!ReadFeaturesFromRecordFile()) {
486 return false;
487 }
488 if (!ReadSampleTreeFromRecordFile()) {
489 return false;
490 }
491 if (!PrintReport()) {
492 return false;
493 }
494 return true;
495 }
496
PrepareToBuildSampleTree()497 bool KmemCommand::PrepareToBuildSampleTree() {
498 if (use_slab_) {
499 if (slab_sort_keys_.empty()) {
500 slab_sort_keys_ = {"hit", "caller", "bytes_req",
501 "bytes_alloc", "fragment", "pingpong"};
502 }
503 SampleComparator<SlabSample> comparator;
504 SampleComparator<SlabSample> sort_comparator;
505 SampleDisplayer<SlabSample, SlabSampleTree> displayer;
506 std::string accumulated_name = accumulate_callchain_ ? "Accumulated_" : "";
507
508 if (print_callgraph_) {
509 displayer.AddExclusiveDisplayFunction(SlabSampleCallgraphDisplayer());
510 }
511
512 for (const auto& key : slab_sort_keys_) {
513 if (key == "hit") {
514 sort_comparator.AddCompareFunction(CompareSampleCount);
515 displayer.AddDisplayFunction(accumulated_name + "Hit",
516 DisplaySampleCount);
517 } else if (key == "caller") {
518 comparator.AddCompareFunction(CompareSymbol);
519 displayer.AddDisplayFunction("Caller", DisplaySymbol);
520 } else if (key == "ptr") {
521 comparator.AddCompareFunction(ComparePtr);
522 displayer.AddDisplayFunction("Ptr", DisplayPtr);
523 } else if (key == "bytes_req") {
524 sort_comparator.AddCompareFunction(CompareBytesReq);
525 displayer.AddDisplayFunction(accumulated_name + "BytesReq",
526 DisplayBytesReq);
527 } else if (key == "bytes_alloc") {
528 sort_comparator.AddCompareFunction(CompareBytesAlloc);
529 displayer.AddDisplayFunction(accumulated_name + "BytesAlloc",
530 DisplayBytesAlloc);
531 } else if (key == "fragment") {
532 sort_comparator.AddCompareFunction(CompareFragment);
533 displayer.AddDisplayFunction(accumulated_name + "Fragment",
534 DisplayFragment);
535 } else if (key == "gfp_flags") {
536 comparator.AddCompareFunction(CompareGfpFlags);
537 displayer.AddDisplayFunction("GfpFlags", DisplayGfpFlags);
538 } else if (key == "pingpong") {
539 sort_comparator.AddCompareFunction(CompareCrossCpuAllocations);
540 displayer.AddDisplayFunction("Pingpong", DisplayCrossCpuAllocations);
541 } else {
542 LOG(ERROR) << "Unknown sort key for slab allocation: " << key;
543 return false;
544 }
545 slab_sample_tree_builder_.reset(
546 new SlabSampleTreeBuilder(comparator, &thread_tree_));
547 slab_sample_tree_builder_->SetCallChainSampleOptions(
548 accumulate_callchain_, print_callgraph_, !callgraph_show_callee_);
549 sort_comparator.AddComparator(comparator);
550 slab_sample_tree_sorter_.reset(new SlabSampleTreeSorter(sort_comparator));
551 slab_sample_tree_displayer_.reset(new SlabSampleTreeDisplayer(displayer));
552 }
553 }
554 return true;
555 }
556
ReadEventAttrsFromRecordFile()557 void KmemCommand::ReadEventAttrsFromRecordFile() {
558 std::vector<EventAttrWithId> attrs = record_file_reader_->AttrSection();
559 for (const auto& attr_with_id : attrs) {
560 EventAttrWithName attr;
561 attr.attr = *attr_with_id.attr;
562 attr.event_ids = attr_with_id.ids;
563 attr.name = GetEventNameByAttr(attr.attr);
564 event_attrs_.push_back(attr);
565 }
566 }
567
ReadFeaturesFromRecordFile()568 bool KmemCommand::ReadFeaturesFromRecordFile() {
569 record_file_reader_->LoadBuildIdAndFileFeatures(thread_tree_);
570 std::string arch =
571 record_file_reader_->ReadFeatureString(PerfFileFormat::FEAT_ARCH);
572 if (!arch.empty()) {
573 record_file_arch_ = GetArchType(arch);
574 if (record_file_arch_ == ARCH_UNSUPPORTED) {
575 return false;
576 }
577 }
578 std::vector<std::string> cmdline = record_file_reader_->ReadCmdlineFeature();
579 if (!cmdline.empty()) {
580 record_cmdline_ = android::base::Join(cmdline, ' ');
581 }
582 if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_TRACING_DATA)) {
583 std::vector<char> tracing_data;
584 if (!record_file_reader_->ReadFeatureSection(
585 PerfFileFormat::FEAT_TRACING_DATA, &tracing_data)) {
586 return false;
587 }
588 ProcessTracingData(tracing_data);
589 }
590 return true;
591 }
592
ReadSampleTreeFromRecordFile()593 bool KmemCommand::ReadSampleTreeFromRecordFile() {
594 if (!record_file_reader_->ReadDataSection(
595 [this](std::unique_ptr<Record> record) {
596 return ProcessRecord(std::move(record));
597 })) {
598 return false;
599 }
600 if (use_slab_) {
601 slab_sample_tree_ = slab_sample_tree_builder_->GetSampleTree();
602 slab_sample_tree_sorter_->Sort(slab_sample_tree_.samples, print_callgraph_);
603 }
604 return true;
605 }
606
ProcessRecord(std::unique_ptr<Record> record)607 bool KmemCommand::ProcessRecord(std::unique_ptr<Record> record) {
608 thread_tree_.Update(*record);
609 if (record->type() == PERF_RECORD_SAMPLE) {
610 if (use_slab_) {
611 slab_sample_tree_builder_->ProcessSampleRecord(
612 *static_cast<const SampleRecord*>(record.get()));
613 }
614 } else if (record->type() == PERF_RECORD_TRACING_DATA) {
615 const auto& r = *static_cast<TracingDataRecord*>(record.get());
616 ProcessTracingData(std::vector<char>(r.data, r.data + r.data_size));
617 }
618 return true;
619 }
620
ProcessTracingData(const std::vector<char> & data)621 void KmemCommand::ProcessTracingData(const std::vector<char>& data) {
622 Tracing tracing(data);
623 for (auto& attr : event_attrs_) {
624 if (attr.attr.type == PERF_TYPE_TRACEPOINT) {
625 uint64_t trace_event_id = attr.attr.config;
626 attr.name = tracing.GetTracingEventNameHavingId(trace_event_id);
627 TracingFormat format = tracing.GetTracingFormatHavingId(trace_event_id);
628 if (use_slab_) {
629 if (format.name == "kmalloc" || format.name == "kmem_cache_alloc" ||
630 format.name == "kmalloc_node" ||
631 format.name == "kmem_cache_alloc_node") {
632 SlabFormat f;
633 f.type = SlabFormat::KMEM_ALLOC;
634 format.GetField("call_site", f.call_site);
635 format.GetField("ptr", f.ptr);
636 format.GetField("bytes_req", f.bytes_req);
637 format.GetField("bytes_alloc", f.bytes_alloc);
638 format.GetField("gfp_flags", f.gfp_flags);
639 slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f);
640 } else if (format.name == "kfree" || format.name == "kmem_cache_free") {
641 SlabFormat f;
642 f.type = SlabFormat::KMEM_FREE;
643 format.GetField("call_site", f.call_site);
644 format.GetField("ptr", f.ptr);
645 slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f);
646 }
647 }
648 }
649 }
650 }
651
PrintReport()652 bool KmemCommand::PrintReport() {
653 std::unique_ptr<FILE, decltype(&fclose)> file_handler(nullptr, fclose);
654 FILE* report_fp = stdout;
655 if (!report_filename_.empty()) {
656 file_handler.reset(fopen(report_filename_.c_str(), "w"));
657 if (file_handler == nullptr) {
658 PLOG(ERROR) << "failed to open " << report_filename_;
659 return false;
660 }
661 report_fp = file_handler.get();
662 }
663 PrintReportContext(report_fp);
664 if (use_slab_) {
665 fprintf(report_fp, "\n\n");
666 PrintSlabReportContext(report_fp);
667 slab_sample_tree_displayer_->DisplaySamples(
668 report_fp, slab_sample_tree_.samples, &slab_sample_tree_);
669 }
670 return true;
671 }
672
PrintReportContext(FILE * fp)673 void KmemCommand::PrintReportContext(FILE* fp) {
674 if (!record_cmdline_.empty()) {
675 fprintf(fp, "Cmdline: %s\n", record_cmdline_.c_str());
676 }
677 fprintf(fp, "Arch: %s\n", GetArchString(record_file_arch_).c_str());
678 for (const auto& attr : event_attrs_) {
679 fprintf(fp, "Event: %s (type %u, config %llu)\n", attr.name.c_str(),
680 attr.attr.type, attr.attr.config);
681 }
682 }
683
PrintSlabReportContext(FILE * fp)684 void KmemCommand::PrintSlabReportContext(FILE* fp) {
685 fprintf(fp, "Slab allocation information:\n");
686 fprintf(fp, "Total requested bytes: %" PRIu64 "\n",
687 slab_sample_tree_.total_requested_bytes);
688 fprintf(fp, "Total allocated bytes: %" PRIu64 "\n",
689 slab_sample_tree_.total_allocated_bytes);
690 uint64_t fragment = slab_sample_tree_.total_allocated_bytes -
691 slab_sample_tree_.total_requested_bytes;
692 double percentage = 0.0;
693 if (slab_sample_tree_.total_allocated_bytes != 0) {
694 percentage = 100.0 * fragment / slab_sample_tree_.total_allocated_bytes;
695 }
696 fprintf(fp, "Total fragment: %" PRIu64 ", %f%%\n", fragment, percentage);
697 fprintf(fp, "Total allocations: %" PRIu64 "\n",
698 slab_sample_tree_.nr_allocations);
699 fprintf(fp, "Total frees: %" PRIu64 "\n", slab_sample_tree_.nr_frees);
700 percentage = 0.0;
701 if (slab_sample_tree_.nr_allocations != 0) {
702 percentage = 100.0 * slab_sample_tree_.nr_cross_cpu_allocations /
703 slab_sample_tree_.nr_allocations;
704 }
705 fprintf(fp, "Total cross cpu allocation/free: %" PRIu64 ", %f%%\n",
706 slab_sample_tree_.nr_cross_cpu_allocations, percentage);
707 fprintf(fp, "\n");
708 }
709
710 } // namespace
711
RegisterKmemCommand()712 void RegisterKmemCommand() {
713 RegisterCommand("kmem",
714 [] { return std::unique_ptr<Command>(new KmemCommand()); });
715 }
716