• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <stdint.h>
18 #include <stdio.h>
19 #include <unistd.h>
20 
21 #include <memory>
22 #include <optional>
23 #include <string>
24 
25 #include <android-base/parseint.h>
26 #include <android-base/strings.h>
27 
28 #include "ETMBranchListFile.h"
29 #include "ETMDecoder.h"
30 #include "RegEx.h"
31 #include "command.h"
32 #include "record_file.h"
33 #include "system/extras/simpleperf/etm_branch_list.pb.h"
34 #include "thread_tree.h"
35 #include "utils.h"
36 
37 namespace simpleperf {
38 
39 namespace {
40 
41 using AddrPair = std::pair<uint64_t, uint64_t>;
42 
43 struct AddrPairHash {
operator ()simpleperf::__anon8f351e250111::AddrPairHash44   size_t operator()(const AddrPair& ap) const noexcept {
45     size_t seed = 0;
46     HashCombine(seed, ap.first);
47     HashCombine(seed, ap.second);
48     return seed;
49   }
50 };
51 
52 enum class OutputFormat {
53   AutoFDO,
54   BranchList,
55 };
56 
57 struct AutoFDOBinaryInfo {
58   uint64_t first_load_segment_addr = 0;
59   std::unordered_map<AddrPair, uint64_t, AddrPairHash> range_count_map;
60   std::unordered_map<AddrPair, uint64_t, AddrPairHash> branch_count_map;
61 
AddInstrRangesimpleperf::__anon8f351e250111::AutoFDOBinaryInfo62   void AddInstrRange(const ETMInstrRange& instr_range) {
63     uint64_t total_count = instr_range.branch_taken_count;
64     OverflowSafeAdd(total_count, instr_range.branch_not_taken_count);
65     OverflowSafeAdd(range_count_map[AddrPair(instr_range.start_addr, instr_range.end_addr)],
66                     total_count);
67     if (instr_range.branch_taken_count > 0) {
68       OverflowSafeAdd(branch_count_map[AddrPair(instr_range.end_addr, instr_range.branch_to_addr)],
69                       instr_range.branch_taken_count);
70     }
71   }
72 
Mergesimpleperf::__anon8f351e250111::AutoFDOBinaryInfo73   void Merge(const AutoFDOBinaryInfo& other) {
74     for (const auto& p : other.range_count_map) {
75       auto res = range_count_map.emplace(p.first, p.second);
76       if (!res.second) {
77         OverflowSafeAdd(res.first->second, p.second);
78       }
79     }
80     for (const auto& p : other.branch_count_map) {
81       auto res = branch_count_map.emplace(p.first, p.second);
82       if (!res.second) {
83         OverflowSafeAdd(res.first->second, p.second);
84       }
85     }
86   }
87 };
88 
89 using AutoFDOBinaryCallback = std::function<void(const BinaryKey&, AutoFDOBinaryInfo&)>;
90 using BranchListBinaryCallback = std::function<void(const BinaryKey&, BranchListBinaryInfo&)>;
91 
92 class ETMThreadTreeWithFilter : public ETMThreadTree {
93  public:
ExcludePid(pid_t pid)94   void ExcludePid(pid_t pid) { exclude_pid_ = pid; }
GetThreadTree()95   ThreadTree& GetThreadTree() { return thread_tree_; }
DisableThreadExitRecords()96   void DisableThreadExitRecords() override { thread_tree_.DisableThreadExitRecords(); }
97 
FindThread(int tid)98   const ThreadEntry* FindThread(int tid) override {
99     const ThreadEntry* thread = thread_tree_.FindThread(tid);
100     if (thread != nullptr && exclude_pid_ && thread->pid == exclude_pid_) {
101       return nullptr;
102     }
103     return thread;
104   }
105 
GetKernelMaps()106   const MapSet& GetKernelMaps() override { return thread_tree_.GetKernelMaps(); }
107 
108  private:
109   ThreadTree thread_tree_;
110   std::optional<pid_t> exclude_pid_;
111 };
112 
113 class BinaryFilter {
114  public:
BinaryFilter(const RegEx * binary_name_regex)115   BinaryFilter(const RegEx* binary_name_regex) : binary_name_regex_(binary_name_regex) {}
116 
Filter(Dso * dso)117   bool Filter(Dso* dso) {
118     auto lookup = dso_filter_cache_.find(dso);
119     if (lookup != dso_filter_cache_.end()) {
120       return lookup->second;
121     }
122     bool match = Filter(dso->Path());
123     dso_filter_cache_.insert({dso, match});
124     return match;
125   }
126 
Filter(const std::string & path)127   bool Filter(const std::string& path) {
128     return binary_name_regex_ == nullptr || binary_name_regex_->Search(path);
129   }
130 
131  private:
132   const RegEx* binary_name_regex_;
133   std::unordered_map<Dso*, bool> dso_filter_cache_;
134 };
135 
GetFirstLoadSegmentVaddr(Dso * dso)136 static uint64_t GetFirstLoadSegmentVaddr(Dso* dso) {
137   ElfStatus status;
138   if (auto elf = ElfFile::Open(dso->GetDebugFilePath(), &status); elf) {
139     for (const auto& segment : elf->GetProgramHeader()) {
140       if (segment.is_load) {
141         return segment.vaddr;
142       }
143     }
144   }
145   return 0;
146 }
147 
148 // Read perf.data, and generate AutoFDOBinaryInfo or BranchListBinaryInfo.
149 // To avoid resetting data, it only processes one input file per instance.
150 class PerfDataReader {
151  public:
PerfDataReader(const std::string & filename,bool exclude_perf,ETMDumpOption etm_dump_option,const RegEx * binary_name_regex)152   PerfDataReader(const std::string& filename, bool exclude_perf, ETMDumpOption etm_dump_option,
153                  const RegEx* binary_name_regex)
154       : filename_(filename),
155         exclude_perf_(exclude_perf),
156         etm_dump_option_(etm_dump_option),
157         binary_filter_(binary_name_regex) {}
158 
SetCallback(const AutoFDOBinaryCallback & callback)159   void SetCallback(const AutoFDOBinaryCallback& callback) { autofdo_callback_ = callback; }
SetCallback(const BranchListBinaryCallback & callback)160   void SetCallback(const BranchListBinaryCallback& callback) { branch_list_callback_ = callback; }
161 
Read()162   bool Read() {
163     record_file_reader_ = RecordFileReader::CreateInstance(filename_);
164     if (!record_file_reader_) {
165       return false;
166     }
167     if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_ETM_BRANCH_LIST)) {
168       return ProcessETMBranchListFeature();
169     }
170     if (exclude_perf_) {
171       const auto& info_map = record_file_reader_->GetMetaInfoFeature();
172       if (auto it = info_map.find("recording_process"); it == info_map.end()) {
173         LOG(ERROR) << filename_ << " doesn't support --exclude-perf";
174         return false;
175       } else {
176         int pid;
177         if (!android::base::ParseInt(it->second, &pid, 0)) {
178           LOG(ERROR) << "invalid recording_process " << it->second << " in " << filename_;
179           return false;
180         }
181         thread_tree_.ExcludePid(pid);
182       }
183     }
184     if (!record_file_reader_->LoadBuildIdAndFileFeatures(thread_tree_.GetThreadTree())) {
185       return false;
186     }
187     if (!record_file_reader_->ReadDataSection([this](auto r) { return ProcessRecord(r.get()); })) {
188       return false;
189     }
190     if (etm_decoder_ && !etm_decoder_->FinishData()) {
191       return false;
192     }
193     if (autofdo_callback_) {
194       ProcessAutoFDOBinaryInfo();
195     } else if (branch_list_callback_) {
196       ProcessBranchListBinaryInfo();
197     }
198     return true;
199   }
200 
201  private:
ProcessETMBranchListFeature()202   bool ProcessETMBranchListFeature() {
203     if (exclude_perf_) {
204       LOG(WARNING) << "--exclude-perf has no effect on perf.data with etm branch list";
205     }
206     if (autofdo_callback_) {
207       LOG(ERROR) << "convert to autofdo format isn't support on perf.data with etm branch list";
208       return false;
209     }
210     CHECK(branch_list_callback_);
211     std::string s;
212     if (!record_file_reader_->ReadFeatureSection(PerfFileFormat::FEAT_ETM_BRANCH_LIST, &s)) {
213       return false;
214     }
215     BranchListBinaryMap binary_map;
216     if (!StringToBranchListBinaryMap(s, binary_map)) {
217       return false;
218     }
219     for (auto& [key, binary] : binary_map) {
220       if (!binary_filter_.Filter(key.path)) {
221         continue;
222       }
223       branch_list_callback_(key, binary);
224     }
225     return true;
226   }
227 
ProcessRecord(Record * r)228   bool ProcessRecord(Record* r) {
229     thread_tree_.GetThreadTree().Update(*r);
230     if (r->type() == PERF_RECORD_AUXTRACE_INFO) {
231       etm_decoder_ = ETMDecoder::Create(*static_cast<AuxTraceInfoRecord*>(r), thread_tree_);
232       if (!etm_decoder_) {
233         return false;
234       }
235       etm_decoder_->EnableDump(etm_dump_option_);
236       if (autofdo_callback_) {
237         etm_decoder_->RegisterCallback(
238             [this](const ETMInstrRange& range) { ProcessInstrRange(range); });
239       } else if (branch_list_callback_) {
240         etm_decoder_->RegisterCallback(
241             [this](const ETMBranchList& branch) { ProcessBranchList(branch); });
242       }
243     } else if (r->type() == PERF_RECORD_AUX) {
244       AuxRecord* aux = static_cast<AuxRecord*>(r);
245       if (aux->data->aux_size > SIZE_MAX) {
246         LOG(ERROR) << "invalid aux size";
247         return false;
248       }
249       size_t aux_size = aux->data->aux_size;
250       if (aux_size > 0) {
251         bool error = false;
252         if (!record_file_reader_->ReadAuxData(aux->Cpu(), aux->data->aux_offset, aux_size,
253                                               aux_data_buffer_, error)) {
254           return !error;
255         }
256         if (!etm_decoder_) {
257           LOG(ERROR) << "ETMDecoder isn't created";
258           return false;
259         }
260         return etm_decoder_->ProcessData(aux_data_buffer_.data(), aux_size, !aux->Unformatted(),
261                                          aux->Cpu());
262       }
263     } else if (r->type() == PERF_RECORD_MMAP && r->InKernel()) {
264       auto& mmap_r = *static_cast<MmapRecord*>(r);
265       if (android::base::StartsWith(mmap_r.filename, DEFAULT_KERNEL_MMAP_NAME)) {
266         kernel_map_start_addr_ = mmap_r.data->addr;
267       }
268     }
269     return true;
270   }
271 
ProcessInstrRange(const ETMInstrRange & instr_range)272   void ProcessInstrRange(const ETMInstrRange& instr_range) {
273     if (!binary_filter_.Filter(instr_range.dso)) {
274       return;
275     }
276 
277     autofdo_binary_map_[instr_range.dso].AddInstrRange(instr_range);
278   }
279 
ProcessBranchList(const ETMBranchList & branch_list)280   void ProcessBranchList(const ETMBranchList& branch_list) {
281     if (!binary_filter_.Filter(branch_list.dso)) {
282       return;
283     }
284 
285     auto& branch_map = branch_list_binary_map_[branch_list.dso].branch_map;
286     ++branch_map[branch_list.addr][branch_list.branch];
287   }
288 
ProcessAutoFDOBinaryInfo()289   void ProcessAutoFDOBinaryInfo() {
290     for (auto& p : autofdo_binary_map_) {
291       Dso* dso = p.first;
292       AutoFDOBinaryInfo& binary = p.second;
293       binary.first_load_segment_addr = GetFirstLoadSegmentVaddr(dso);
294       autofdo_callback_(BinaryKey(dso, 0), binary);
295     }
296   }
297 
ProcessBranchListBinaryInfo()298   void ProcessBranchListBinaryInfo() {
299     for (auto& p : branch_list_binary_map_) {
300       Dso* dso = p.first;
301       BranchListBinaryInfo& binary = p.second;
302       binary.dso_type = dso->type();
303       BinaryKey key(dso, 0);
304       if (binary.dso_type == DSO_KERNEL) {
305         if (kernel_map_start_addr_ == 0) {
306           LOG(WARNING) << "Can't convert kernel ip addresses without kernel start addr. So remove "
307                           "branches for the kernel.";
308           continue;
309         }
310         if (dso->GetDebugFilePath() == dso->Path()) {
311           // vmlinux isn't available. We still use kernel ip addr. Put kernel start addr in proto
312           // for address conversion later.
313           key.kernel_start_addr = kernel_map_start_addr_;
314         }
315       }
316       branch_list_callback_(key, binary);
317     }
318   }
319 
320   const std::string filename_;
321   bool exclude_perf_;
322   ETMDumpOption etm_dump_option_;
323   BinaryFilter binary_filter_;
324   AutoFDOBinaryCallback autofdo_callback_;
325   BranchListBinaryCallback branch_list_callback_;
326 
327   std::vector<uint8_t> aux_data_buffer_;
328   std::unique_ptr<ETMDecoder> etm_decoder_;
329   std::unique_ptr<RecordFileReader> record_file_reader_;
330   ETMThreadTreeWithFilter thread_tree_;
331   uint64_t kernel_map_start_addr_ = 0;
332   // Store results for AutoFDO.
333   std::unordered_map<Dso*, AutoFDOBinaryInfo> autofdo_binary_map_;
334   // Store results for BranchList.
335   std::unordered_map<Dso*, BranchListBinaryInfo> branch_list_binary_map_;
336 };
337 
338 // Read a protobuf file specified by etm_branch_list.proto, and generate BranchListBinaryInfo.
339 class BranchListReader {
340  public:
BranchListReader(const std::string & filename,const RegEx * binary_name_regex)341   BranchListReader(const std::string& filename, const RegEx* binary_name_regex)
342       : filename_(filename), binary_filter_(binary_name_regex) {}
343 
SetCallback(const BranchListBinaryCallback & callback)344   void SetCallback(const BranchListBinaryCallback& callback) { callback_ = callback; }
345 
Read()346   bool Read() {
347     std::string s;
348     if (!android::base::ReadFileToString(filename_, &s)) {
349       PLOG(ERROR) << "failed to read " << filename_;
350       return false;
351     }
352     BranchListBinaryMap binary_map;
353     if (!StringToBranchListBinaryMap(s, binary_map)) {
354       PLOG(ERROR) << "file is in wrong format: " << filename_;
355       return false;
356     }
357     for (auto& [key, binary] : binary_map) {
358       if (!binary_filter_.Filter(key.path)) {
359         continue;
360       }
361       callback_(key, binary);
362     }
363     return true;
364   }
365 
366  private:
367   const std::string filename_;
368   BinaryFilter binary_filter_;
369   BranchListBinaryCallback callback_;
370 };
371 
372 // Convert BranchListBinaryInfo into AutoFDOBinaryInfo.
373 class BranchListToAutoFDOConverter {
374  public:
Convert(const BinaryKey & key,BranchListBinaryInfo & binary)375   std::unique_ptr<AutoFDOBinaryInfo> Convert(const BinaryKey& key, BranchListBinaryInfo& binary) {
376     BuildId build_id = key.build_id;
377     std::unique_ptr<Dso> dso = Dso::CreateDsoWithBuildId(binary.dso_type, key.path, build_id);
378     if (!dso || !CheckBuildId(dso.get(), key.build_id)) {
379       return nullptr;
380     }
381     std::unique_ptr<AutoFDOBinaryInfo> autofdo_binary(new AutoFDOBinaryInfo);
382     autofdo_binary->first_load_segment_addr = GetFirstLoadSegmentVaddr(dso.get());
383 
384     if (dso->type() == DSO_KERNEL) {
385       ModifyBranchMapForKernel(dso.get(), key.kernel_start_addr, binary);
386     }
387 
388     auto process_instr_range = [&](const ETMInstrRange& range) {
389       CHECK_EQ(range.dso, dso.get());
390       autofdo_binary->AddInstrRange(range);
391     };
392 
393     auto result =
394         ConvertBranchMapToInstrRanges(dso.get(), binary.GetOrderedBranchMap(), process_instr_range);
395     if (!result.ok()) {
396       LOG(WARNING) << "failed to build instr ranges for binary " << dso->Path() << ": "
397                    << result.error();
398       return nullptr;
399     }
400     return autofdo_binary;
401   }
402 
403  private:
CheckBuildId(Dso * dso,const BuildId & expected_build_id)404   bool CheckBuildId(Dso* dso, const BuildId& expected_build_id) {
405     if (expected_build_id.IsEmpty()) {
406       return true;
407     }
408     BuildId build_id;
409     return GetBuildIdFromDsoPath(dso->GetDebugFilePath(), &build_id) &&
410            build_id == expected_build_id;
411   }
412 
ModifyBranchMapForKernel(Dso * dso,uint64_t kernel_start_addr,BranchListBinaryInfo & binary)413   void ModifyBranchMapForKernel(Dso* dso, uint64_t kernel_start_addr,
414                                 BranchListBinaryInfo& binary) {
415     if (kernel_start_addr == 0) {
416       // vmlinux has been provided when generating branch lists. Addresses in branch lists are
417       // already vaddrs in vmlinux.
418       return;
419     }
420     // Addresses are still kernel ip addrs in memory. Need to convert them to vaddrs in vmlinux.
421     UnorderedBranchMap new_branch_map;
422     for (auto& p : binary.branch_map) {
423       uint64_t vaddr_in_file = dso->IpToVaddrInFile(p.first, kernel_start_addr, 0);
424       new_branch_map[vaddr_in_file] = std::move(p.second);
425     }
426     binary.branch_map = std::move(new_branch_map);
427   }
428 };
429 
430 // Write instruction ranges to a file in AutoFDO text format.
431 class AutoFDOWriter {
432  public:
AddAutoFDOBinary(const BinaryKey & key,AutoFDOBinaryInfo & binary)433   void AddAutoFDOBinary(const BinaryKey& key, AutoFDOBinaryInfo& binary) {
434     auto it = binary_map_.find(key);
435     if (it == binary_map_.end()) {
436       binary_map_[key] = std::move(binary);
437     } else {
438       it->second.Merge(binary);
439     }
440   }
441 
Write(const std::string & output_filename)442   bool Write(const std::string& output_filename) {
443     std::unique_ptr<FILE, decltype(&fclose)> output_fp(fopen(output_filename.c_str(), "w"), fclose);
444     if (!output_fp) {
445       PLOG(ERROR) << "failed to write to " << output_filename;
446       return false;
447     }
448     // autofdo_binary_map is used to store instruction ranges, which can have a large amount. And
449     // it has a larger access time (instruction ranges * executed time). So it's better to use
450     // unorder_maps to speed up access time. But we also want a stable output here, to compare
451     // output changes result from code changes. So generate a sorted output here.
452     std::vector<BinaryKey> keys;
453     for (auto& p : binary_map_) {
454       keys.emplace_back(p.first);
455     }
456     std::sort(keys.begin(), keys.end(),
457               [](const BinaryKey& key1, const BinaryKey& key2) { return key1.path < key2.path; });
458     if (keys.size() > 1) {
459       fprintf(output_fp.get(),
460               "// Please split this file. AutoFDO only accepts profile for one binary.\n");
461     }
462     for (const auto& key : keys) {
463       const AutoFDOBinaryInfo& binary = binary_map_[key];
464       // AutoFDO text format needs file_offsets instead of virtual addrs in a binary. And it uses
465       // below formula: vaddr = file_offset + GetFirstLoadSegmentVaddr().
466       uint64_t first_load_segment_addr = binary.first_load_segment_addr;
467 
468       auto to_offset = [&](uint64_t vaddr) -> uint64_t {
469         if (vaddr == 0) {
470           return 0;
471         }
472         CHECK_GE(vaddr, first_load_segment_addr);
473         return vaddr - first_load_segment_addr;
474       };
475 
476       // Write range_count_map.
477       std::map<AddrPair, uint64_t> range_count_map(binary.range_count_map.begin(),
478                                                    binary.range_count_map.end());
479       fprintf(output_fp.get(), "%zu\n", range_count_map.size());
480       for (const auto& pair2 : range_count_map) {
481         const AddrPair& addr_range = pair2.first;
482         uint64_t count = pair2.second;
483 
484         fprintf(output_fp.get(), "%" PRIx64 "-%" PRIx64 ":%" PRIu64 "\n",
485                 to_offset(addr_range.first), to_offset(addr_range.second), count);
486       }
487 
488       // Write addr_count_map.
489       fprintf(output_fp.get(), "0\n");
490 
491       // Write branch_count_map.
492       std::map<AddrPair, uint64_t> branch_count_map(binary.branch_count_map.begin(),
493                                                     binary.branch_count_map.end());
494       fprintf(output_fp.get(), "%zu\n", branch_count_map.size());
495       for (const auto& pair2 : branch_count_map) {
496         const AddrPair& branch = pair2.first;
497         uint64_t count = pair2.second;
498 
499         fprintf(output_fp.get(), "%" PRIx64 "->%" PRIx64 ":%" PRIu64 "\n", to_offset(branch.first),
500                 to_offset(branch.second), count);
501       }
502 
503       // Write the binary path in comment.
504       fprintf(output_fp.get(), "// build_id: %s\n", key.build_id.ToString().c_str());
505       fprintf(output_fp.get(), "// %s\n\n", key.path.c_str());
506     }
507     return true;
508   }
509 
510  private:
511   std::unordered_map<BinaryKey, AutoFDOBinaryInfo, BinaryKeyHash> binary_map_;
512 };
513 
514 // Merge BranchListBinaryInfo.
515 struct BranchListMerger {
AddBranchListBinarysimpleperf::__anon8f351e250111::BranchListMerger516   void AddBranchListBinary(const BinaryKey& key, BranchListBinaryInfo& binary) {
517     auto it = binary_map.find(key);
518     if (it == binary_map.end()) {
519       binary_map[key] = std::move(binary);
520     } else {
521       it->second.Merge(binary);
522     }
523   }
524 
525   BranchListBinaryMap binary_map;
526 };
527 
528 // Write branch lists to a protobuf file specified by etm_branch_list.proto.
529 class BranchListWriter {
530  public:
Write(const std::string & output_filename,const BranchListBinaryMap & binary_map)531   bool Write(const std::string& output_filename, const BranchListBinaryMap& binary_map) {
532     // Don't produce empty output file.
533     if (binary_map.empty()) {
534       LOG(INFO) << "Skip empty output file.";
535       unlink(output_filename.c_str());
536       return true;
537     }
538     std::string s;
539     if (!BranchListBinaryMapToString(binary_map, s)) {
540       LOG(ERROR) << "invalid BranchListBinaryMap";
541       return false;
542     }
543     if (!android::base::WriteStringToFile(s, output_filename)) {
544       PLOG(ERROR) << "failed to write to " << output_filename;
545       return false;
546     }
547     return true;
548   }
549 };
550 
551 class InjectCommand : public Command {
552  public:
InjectCommand()553   InjectCommand()
554       : Command("inject", "parse etm instruction tracing data",
555                 // clang-format off
556 "Usage: simpleperf inject [options]\n"
557 "--binary binary_name         Generate data only for binaries matching binary_name regex.\n"
558 "-i file1,file2,...           Input files. Default is perf.data. Support below formats:\n"
559 "                               1. perf.data generated by recording cs-etm event type.\n"
560 "                               2. branch_list file generated by `inject --output branch-list`.\n"
561 "                             If a file name starts with @, it contains a list of input files.\n"
562 "-o <file>                    output file. Default is perf_inject.data.\n"
563 "--output <format>            Select output file format:\n"
564 "                               autofdo      -- text format accepted by TextSampleReader\n"
565 "                                               of AutoFDO\n"
566 "                               branch-list  -- protobuf file in etm_branch_list.proto\n"
567 "                             Default is autofdo.\n"
568 "--dump-etm type1,type2,...   Dump etm data. A type is one of raw, packet and element.\n"
569 "--exclude-perf               Exclude trace data for the recording process.\n"
570 "--symdir <dir>               Look for binaries in a directory recursively.\n"
571 "\n"
572 "Examples:\n"
573 "1. Generate autofdo text output.\n"
574 "$ simpleperf inject -i perf.data -o autofdo.txt --output autofdo\n"
575 "\n"
576 "2. Generate branch list proto, then convert to autofdo text.\n"
577 "$ simpleperf inject -i perf.data -o branch_list.data --output branch-list\n"
578 "$ simpleperf inject -i branch_list.data -o autofdo.txt --output autofdo\n"
579                 // clang-format on
580         ) {}
581 
Run(const std::vector<std::string> & args)582   bool Run(const std::vector<std::string>& args) override {
583     GOOGLE_PROTOBUF_VERIFY_VERSION;
584     if (!ParseOptions(args)) {
585       return false;
586     }
587 
588     CHECK(!input_filenames_.empty());
589     if (IsPerfDataFile(input_filenames_[0])) {
590       switch (output_format_) {
591         case OutputFormat::AutoFDO:
592           return ConvertPerfDataToAutoFDO();
593         case OutputFormat::BranchList:
594           return ConvertPerfDataToBranchList();
595       }
596     } else {
597       switch (output_format_) {
598         case OutputFormat::AutoFDO:
599           return ConvertBranchListToAutoFDO();
600         case OutputFormat::BranchList:
601           return ConvertBranchListToBranchList();
602       }
603     }
604   }
605 
606  private:
ParseOptions(const std::vector<std::string> & args)607   bool ParseOptions(const std::vector<std::string>& args) {
608     const OptionFormatMap option_formats = {
609         {"--binary", {OptionValueType::STRING, OptionType::SINGLE}},
610         {"--dump-etm", {OptionValueType::STRING, OptionType::SINGLE}},
611         {"--exclude-perf", {OptionValueType::NONE, OptionType::SINGLE}},
612         {"-i", {OptionValueType::STRING, OptionType::MULTIPLE}},
613         {"-o", {OptionValueType::STRING, OptionType::SINGLE}},
614         {"--output", {OptionValueType::STRING, OptionType::SINGLE}},
615         {"--symdir", {OptionValueType::STRING, OptionType::MULTIPLE}},
616     };
617     OptionValueMap options;
618     std::vector<std::pair<OptionName, OptionValue>> ordered_options;
619     if (!PreprocessOptions(args, option_formats, &options, &ordered_options, nullptr)) {
620       return false;
621     }
622 
623     if (auto value = options.PullValue("--binary"); value) {
624       binary_name_regex_ = RegEx::Create(*value->str_value);
625       if (binary_name_regex_ == nullptr) {
626         return false;
627       }
628     }
629     if (auto value = options.PullValue("--dump-etm"); value) {
630       if (!ParseEtmDumpOption(*value->str_value, &etm_dump_option_)) {
631         return false;
632       }
633     }
634     exclude_perf_ = options.PullBoolValue("--exclude-perf");
635 
636     for (const OptionValue& value : options.PullValues("-i")) {
637       std::vector<std::string> files = android::base::Split(*value.str_value, ",");
638       for (std::string& file : files) {
639         if (android::base::StartsWith(file, "@")) {
640           if (!ReadFileList(file.substr(1), &input_filenames_)) {
641             return false;
642           }
643         } else {
644           input_filenames_.emplace_back(file);
645         }
646       }
647     }
648     if (input_filenames_.empty()) {
649       input_filenames_.emplace_back("perf.data");
650     }
651     options.PullStringValue("-o", &output_filename_);
652     if (auto value = options.PullValue("--output"); value) {
653       const std::string& output = *value->str_value;
654       if (output == "autofdo") {
655         output_format_ = OutputFormat::AutoFDO;
656       } else if (output == "branch-list") {
657         output_format_ = OutputFormat::BranchList;
658       } else {
659         LOG(ERROR) << "unknown format in --output option: " << output;
660         return false;
661       }
662     }
663     if (auto value = options.PullValue("--symdir"); value) {
664       if (!Dso::AddSymbolDir(*value->str_value)) {
665         return false;
666       }
667       // Symbol dirs are cleaned when Dso count is decreased to zero, which can happen between
668       // processing input files. To make symbol dirs always available, create a placeholder dso to
669       // prevent cleaning from happening.
670       placeholder_dso_ = Dso::CreateDso(DSO_UNKNOWN_FILE, "unknown");
671     }
672     CHECK(options.values.empty());
673     return true;
674   }
675 
ReadFileList(const std::string & path,std::vector<std::string> * file_list)676   bool ReadFileList(const std::string& path, std::vector<std::string>* file_list) {
677     std::string data;
678     if (!android::base::ReadFileToString(path, &data)) {
679       PLOG(ERROR) << "failed to read " << path;
680       return false;
681     }
682     std::vector<std::string> tokens = android::base::Tokenize(data, " \t\n\r");
683     file_list->insert(file_list->end(), tokens.begin(), tokens.end());
684     return true;
685   }
686 
ConvertPerfDataToAutoFDO()687   bool ConvertPerfDataToAutoFDO() {
688     AutoFDOWriter autofdo_writer;
689     auto callback = [&](const BinaryKey& key, AutoFDOBinaryInfo& binary) {
690       autofdo_writer.AddAutoFDOBinary(key, binary);
691     };
692     for (const auto& input_filename : input_filenames_) {
693       PerfDataReader reader(input_filename, exclude_perf_, etm_dump_option_,
694                             binary_name_regex_.get());
695       reader.SetCallback(callback);
696       if (!reader.Read()) {
697         return false;
698       }
699     }
700     return autofdo_writer.Write(output_filename_);
701   }
702 
ConvertPerfDataToBranchList()703   bool ConvertPerfDataToBranchList() {
704     BranchListMerger branch_list_merger;
705     auto callback = [&](const BinaryKey& key, BranchListBinaryInfo& binary) {
706       branch_list_merger.AddBranchListBinary(key, binary);
707     };
708     for (const auto& input_filename : input_filenames_) {
709       PerfDataReader reader(input_filename, exclude_perf_, etm_dump_option_,
710                             binary_name_regex_.get());
711       reader.SetCallback(callback);
712       if (!reader.Read()) {
713         return false;
714       }
715     }
716     BranchListWriter branch_list_writer;
717     return branch_list_writer.Write(output_filename_, branch_list_merger.binary_map);
718   }
719 
ConvertBranchListToAutoFDO()720   bool ConvertBranchListToAutoFDO() {
721     // Step1 : Merge branch lists from all input files.
722     BranchListMerger branch_list_merger;
723     auto callback = [&](const BinaryKey& key, BranchListBinaryInfo& binary) {
724       branch_list_merger.AddBranchListBinary(key, binary);
725     };
726     for (const auto& input_filename : input_filenames_) {
727       BranchListReader reader(input_filename, binary_name_regex_.get());
728       reader.SetCallback(callback);
729       if (!reader.Read()) {
730         return false;
731       }
732     }
733 
734     // Step2: Convert BranchListBinaryInfo to AutoFDOBinaryInfo.
735     AutoFDOWriter autofdo_writer;
736     BranchListToAutoFDOConverter converter;
737     for (auto& p : branch_list_merger.binary_map) {
738       const BinaryKey& key = p.first;
739       BranchListBinaryInfo& binary = p.second;
740       std::unique_ptr<AutoFDOBinaryInfo> autofdo_binary = converter.Convert(key, binary);
741       if (autofdo_binary) {
742         // Create new BinaryKey with kernel_start_addr = 0. Because AutoFDO output doesn't care
743         // kernel_start_addr.
744         autofdo_writer.AddAutoFDOBinary(BinaryKey(key.path, key.build_id), *autofdo_binary);
745       }
746     }
747 
748     // Step3: Write AutoFDOBinaryInfo.
749     return autofdo_writer.Write(output_filename_);
750   }
751 
ConvertBranchListToBranchList()752   bool ConvertBranchListToBranchList() {
753     // Step1 : Merge branch lists from all input files.
754     BranchListMerger branch_list_merger;
755     auto callback = [&](const BinaryKey& key, BranchListBinaryInfo& binary) {
756       branch_list_merger.AddBranchListBinary(key, binary);
757     };
758     for (const auto& input_filename : input_filenames_) {
759       BranchListReader reader(input_filename, binary_name_regex_.get());
760       reader.SetCallback(callback);
761       if (!reader.Read()) {
762         return false;
763       }
764     }
765     // Step2: Write BranchListBinaryInfo.
766     BranchListWriter branch_list_writer;
767     return branch_list_writer.Write(output_filename_, branch_list_merger.binary_map);
768   }
769 
770   std::unique_ptr<RegEx> binary_name_regex_;
771   bool exclude_perf_ = false;
772   std::vector<std::string> input_filenames_;
773   std::string output_filename_ = "perf_inject.data";
774   OutputFormat output_format_ = OutputFormat::AutoFDO;
775   ETMDumpOption etm_dump_option_;
776 
777   std::unique_ptr<Dso> placeholder_dso_;
778 };
779 
780 }  // namespace
781 
RegisterInjectCommand()782 void RegisterInjectCommand() {
783   return RegisterCommand("inject", [] { return std::unique_ptr<Command>(new InjectCommand); });
784 }
785 
786 }  // namespace simpleperf
787