• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <stdio.h>
18 #include <unistd.h>
19 
20 #include <memory>
21 #include <optional>
22 #include <regex>
23 #include <string>
24 
25 #include <android-base/parseint.h>
26 #include <android-base/strings.h>
27 
28 #include "ETMDecoder.h"
29 #include "cmd_inject_impl.h"
30 #include "command.h"
31 #include "record_file.h"
32 #include "system/extras/simpleperf/etm_branch_list.pb.h"
33 #include "thread_tree.h"
34 #include "utils.h"
35 
36 namespace simpleperf {
37 
BranchToProtoString(const std::vector<bool> & branch)38 std::string BranchToProtoString(const std::vector<bool>& branch) {
39   size_t bytes = (branch.size() + 7) / 8;
40   std::string res(bytes, '\0');
41   for (size_t i = 0; i < branch.size(); i++) {
42     if (branch[i]) {
43       res[i >> 3] |= 1 << (i & 7);
44     }
45   }
46   return res;
47 }
48 
ProtoStringToBranch(const std::string & s,size_t bit_size)49 std::vector<bool> ProtoStringToBranch(const std::string& s, size_t bit_size) {
50   std::vector<bool> branch(bit_size, false);
51   for (size_t i = 0; i < bit_size; i++) {
52     if (s[i >> 3] & (1 << (i & 7))) {
53       branch[i] = true;
54     }
55   }
56   return branch;
57 }
58 
59 namespace {
60 
61 constexpr const char* ETM_BRANCH_LIST_PROTO_MAGIC = "simpleperf:EtmBranchList";
62 
63 using AddrPair = std::pair<uint64_t, uint64_t>;
64 
65 struct AddrPairHash {
operator ()simpleperf::__anon9004e0ec0111::AddrPairHash66   size_t operator()(const AddrPair& ap) const noexcept {
67     size_t seed = 0;
68     HashCombine(seed, ap.first);
69     HashCombine(seed, ap.second);
70     return seed;
71   }
72 };
73 
74 enum class OutputFormat {
75   AutoFDO,
76   BranchList,
77 };
78 
79 // When processing binary info in an input file, the binaries are identified by their path.
80 // But this isn't sufficient when merging binary info from multiple input files. Because
81 // binaries for the same path may be changed between generating input files. So after processing
82 // each input file, we create BinaryKeys to identify binaries, which consider path, build_id and
83 // kernel_start_addr (for vmlinux). kernel_start_addr affects how addresses in BranchListBinaryInfo
84 // are interpreted for vmlinux.
85 struct BinaryKey {
86   std::string path;
87   BuildId build_id;
88   uint64_t kernel_start_addr = 0;
89 
BinaryKeysimpleperf::__anon9004e0ec0111::BinaryKey90   BinaryKey() {}
91 
BinaryKeysimpleperf::__anon9004e0ec0111::BinaryKey92   BinaryKey(const std::string& path, BuildId build_id) : path(path), build_id(build_id) {}
93 
BinaryKeysimpleperf::__anon9004e0ec0111::BinaryKey94   BinaryKey(Dso* dso, uint64_t kernel_start_addr) : path(dso->Path()) {
95     build_id = Dso::FindExpectedBuildIdForPath(dso->Path());
96     if (dso->type() == DSO_KERNEL) {
97       this->kernel_start_addr = kernel_start_addr;
98     }
99   }
100 
operator ==simpleperf::__anon9004e0ec0111::BinaryKey101   bool operator==(const BinaryKey& other) const {
102     return path == other.path && build_id == other.build_id &&
103            kernel_start_addr == other.kernel_start_addr;
104   }
105 };
106 
107 struct BinaryKeyHash {
operator ()simpleperf::__anon9004e0ec0111::BinaryKeyHash108   size_t operator()(const BinaryKey& key) const noexcept {
109     size_t seed = 0;
110     HashCombine(seed, key.path);
111     HashCombine(seed, key.build_id);
112     if (key.kernel_start_addr != 0) {
113       HashCombine(seed, key.kernel_start_addr);
114     }
115     return seed;
116   }
117 };
118 
OverflowSafeAdd(uint64_t & dest,uint64_t add)119 static void OverflowSafeAdd(uint64_t& dest, uint64_t add) {
120   if (__builtin_add_overflow(dest, add, &dest)) {
121     LOG(WARNING) << "Branch count overflow happened.";
122     dest = UINT64_MAX;
123   }
124 }
125 
126 struct AutoFDOBinaryInfo {
127   uint64_t first_load_segment_addr = 0;
128   std::unordered_map<AddrPair, uint64_t, AddrPairHash> range_count_map;
129   std::unordered_map<AddrPair, uint64_t, AddrPairHash> branch_count_map;
130 
AddInstrRangesimpleperf::__anon9004e0ec0111::AutoFDOBinaryInfo131   void AddInstrRange(const ETMInstrRange& instr_range) {
132     uint64_t total_count = instr_range.branch_taken_count;
133     OverflowSafeAdd(total_count, instr_range.branch_not_taken_count);
134     OverflowSafeAdd(range_count_map[AddrPair(instr_range.start_addr, instr_range.end_addr)],
135                     total_count);
136     if (instr_range.branch_taken_count > 0) {
137       OverflowSafeAdd(branch_count_map[AddrPair(instr_range.end_addr, instr_range.branch_to_addr)],
138                       instr_range.branch_taken_count);
139     }
140   }
141 
Mergesimpleperf::__anon9004e0ec0111::AutoFDOBinaryInfo142   void Merge(const AutoFDOBinaryInfo& other) {
143     for (const auto& p : other.range_count_map) {
144       auto res = range_count_map.emplace(p.first, p.second);
145       if (!res.second) {
146         OverflowSafeAdd(res.first->second, p.second);
147       }
148     }
149     for (const auto& p : other.branch_count_map) {
150       auto res = branch_count_map.emplace(p.first, p.second);
151       if (!res.second) {
152         OverflowSafeAdd(res.first->second, p.second);
153       }
154     }
155   }
156 };
157 
158 using UnorderedBranchMap =
159     std::unordered_map<uint64_t, std::unordered_map<std::vector<bool>, uint64_t>>;
160 
161 struct BranchListBinaryInfo {
162   DsoType dso_type;
163   UnorderedBranchMap branch_map;
164 
Mergesimpleperf::__anon9004e0ec0111::BranchListBinaryInfo165   void Merge(const BranchListBinaryInfo& other) {
166     for (auto& other_p : other.branch_map) {
167       auto it = branch_map.find(other_p.first);
168       if (it == branch_map.end()) {
169         branch_map[other_p.first] = std::move(other_p.second);
170       } else {
171         auto& map2 = it->second;
172         for (auto& other_p2 : other_p.second) {
173           auto it2 = map2.find(other_p2.first);
174           if (it2 == map2.end()) {
175             map2[other_p2.first] = other_p2.second;
176           } else {
177             OverflowSafeAdd(it2->second, other_p2.second);
178           }
179         }
180       }
181     }
182   }
183 
GetOrderedBranchMapsimpleperf::__anon9004e0ec0111::BranchListBinaryInfo184   BranchMap GetOrderedBranchMap() const {
185     BranchMap result;
186     for (const auto& p : branch_map) {
187       uint64_t addr = p.first;
188       const auto& b_map = p.second;
189       result[addr] = std::map<std::vector<bool>, uint64_t>(b_map.begin(), b_map.end());
190     }
191     return result;
192   }
193 };
194 
195 using AutoFDOBinaryCallback = std::function<void(const BinaryKey&, AutoFDOBinaryInfo&)>;
196 using BranchListBinaryCallback = std::function<void(const BinaryKey&, BranchListBinaryInfo&)>;
197 
198 class ThreadTreeWithFilter : public ThreadTree {
199  public:
ExcludePid(pid_t pid)200   void ExcludePid(pid_t pid) { exclude_pid_ = pid; }
201 
FindThread(int tid) const202   ThreadEntry* FindThread(int tid) const override {
203     ThreadEntry* thread = ThreadTree::FindThread(tid);
204     if (thread != nullptr && exclude_pid_ && thread->pid == exclude_pid_) {
205       return nullptr;
206     }
207     return thread;
208   }
209 
210  private:
211   std::optional<pid_t> exclude_pid_;
212 };
213 
214 class DsoFilter {
215  public:
DsoFilter(const std::regex & binary_name_regex)216   DsoFilter(const std::regex& binary_name_regex) : binary_name_regex_(binary_name_regex) {}
217 
FilterDso(Dso * dso)218   bool FilterDso(Dso* dso) {
219     auto lookup = dso_filter_cache_.find(dso);
220     if (lookup != dso_filter_cache_.end()) {
221       return lookup->second;
222     }
223     bool match = std::regex_search(dso->Path(), binary_name_regex_);
224     dso_filter_cache_.insert({dso, match});
225     return match;
226   }
227 
228  private:
229   std::regex binary_name_regex_;
230   std::unordered_map<Dso*, bool> dso_filter_cache_;
231 };
232 
GetFirstLoadSegmentVaddr(Dso * dso)233 static uint64_t GetFirstLoadSegmentVaddr(Dso* dso) {
234   ElfStatus status;
235   if (auto elf = ElfFile::Open(dso->GetDebugFilePath(), &status); elf) {
236     for (const auto& segment : elf->GetProgramHeader()) {
237       if (segment.is_load) {
238         return segment.vaddr;
239       }
240     }
241   }
242   return 0;
243 }
244 
245 // Read perf.data, and generate AutoFDOBinaryInfo or BranchListBinaryInfo.
246 // To avoid resetting data, it only processes one input file per instance.
247 class PerfDataReader {
248  public:
PerfDataReader(const std::string & filename,bool exclude_perf,ETMDumpOption etm_dump_option,const std::regex & binary_name_regex)249   PerfDataReader(const std::string& filename, bool exclude_perf, ETMDumpOption etm_dump_option,
250                  const std::regex& binary_name_regex)
251       : filename_(filename),
252         exclude_perf_(exclude_perf),
253         etm_dump_option_(etm_dump_option),
254         dso_filter_(binary_name_regex) {}
255 
SetCallback(const AutoFDOBinaryCallback & callback)256   void SetCallback(const AutoFDOBinaryCallback& callback) { autofdo_callback_ = callback; }
SetCallback(const BranchListBinaryCallback & callback)257   void SetCallback(const BranchListBinaryCallback& callback) { branch_list_callback_ = callback; }
258 
Read()259   bool Read() {
260     record_file_reader_ = RecordFileReader::CreateInstance(filename_);
261     if (!record_file_reader_) {
262       return false;
263     }
264     if (exclude_perf_) {
265       const auto& info_map = record_file_reader_->GetMetaInfoFeature();
266       if (auto it = info_map.find("recording_process"); it == info_map.end()) {
267         LOG(ERROR) << filename_ << " doesn't support --exclude-perf";
268         return false;
269       } else {
270         int pid;
271         if (!android::base::ParseInt(it->second, &pid, 0)) {
272           LOG(ERROR) << "invalid recording_process " << it->second << " in " << filename_;
273           return false;
274         }
275         thread_tree_.ExcludePid(pid);
276       }
277     }
278     record_file_reader_->LoadBuildIdAndFileFeatures(thread_tree_);
279     if (!record_file_reader_->ReadDataSection([this](auto r) { return ProcessRecord(r.get()); })) {
280       return false;
281     }
282     if (etm_decoder_ && !etm_decoder_->FinishData()) {
283       return false;
284     }
285     if (autofdo_callback_) {
286       ProcessAutoFDOBinaryInfo();
287     } else if (branch_list_callback_) {
288       ProcessBranchListBinaryInfo();
289     }
290     return true;
291   }
292 
293  private:
ProcessRecord(Record * r)294   bool ProcessRecord(Record* r) {
295     thread_tree_.Update(*r);
296     if (r->type() == PERF_RECORD_AUXTRACE_INFO) {
297       etm_decoder_ = ETMDecoder::Create(*static_cast<AuxTraceInfoRecord*>(r), thread_tree_);
298       if (!etm_decoder_) {
299         return false;
300       }
301       etm_decoder_->EnableDump(etm_dump_option_);
302       if (autofdo_callback_) {
303         etm_decoder_->RegisterCallback(
304             [this](const ETMInstrRange& range) { ProcessInstrRange(range); });
305       } else if (branch_list_callback_) {
306         etm_decoder_->RegisterCallback(
307             [this](const ETMBranchList& branch) { ProcessBranchList(branch); });
308       }
309     } else if (r->type() == PERF_RECORD_AUX) {
310       AuxRecord* aux = static_cast<AuxRecord*>(r);
311       uint64_t aux_size = aux->data->aux_size;
312       if (aux_size > 0) {
313         if (aux_data_buffer_.size() < aux_size) {
314           aux_data_buffer_.resize(aux_size);
315         }
316         if (!record_file_reader_->ReadAuxData(aux->Cpu(), aux->data->aux_offset,
317                                               aux_data_buffer_.data(), aux_size)) {
318           LOG(ERROR) << "failed to read aux data in " << filename_;
319           return false;
320         }
321         return etm_decoder_->ProcessData(aux_data_buffer_.data(), aux_size, !aux->Unformatted(),
322                                          aux->Cpu());
323       }
324     } else if (r->type() == PERF_RECORD_MMAP && r->InKernel()) {
325       auto& mmap_r = *static_cast<MmapRecord*>(r);
326       if (android::base::StartsWith(mmap_r.filename, DEFAULT_KERNEL_MMAP_NAME)) {
327         kernel_map_start_addr_ = mmap_r.data->addr;
328       }
329     }
330     return true;
331   }
332 
ProcessInstrRange(const ETMInstrRange & instr_range)333   void ProcessInstrRange(const ETMInstrRange& instr_range) {
334     if (!dso_filter_.FilterDso(instr_range.dso)) {
335       return;
336     }
337 
338     autofdo_binary_map_[instr_range.dso].AddInstrRange(instr_range);
339   }
340 
ProcessBranchList(const ETMBranchList & branch_list)341   void ProcessBranchList(const ETMBranchList& branch_list) {
342     if (!dso_filter_.FilterDso(branch_list.dso)) {
343       return;
344     }
345 
346     auto& branch_map = branch_list_binary_map_[branch_list.dso].branch_map;
347     ++branch_map[branch_list.addr][branch_list.branch];
348   }
349 
ProcessAutoFDOBinaryInfo()350   void ProcessAutoFDOBinaryInfo() {
351     for (auto& p : autofdo_binary_map_) {
352       Dso* dso = p.first;
353       AutoFDOBinaryInfo& binary = p.second;
354       binary.first_load_segment_addr = GetFirstLoadSegmentVaddr(dso);
355       autofdo_callback_(BinaryKey(dso, 0), binary);
356     }
357   }
358 
ProcessBranchListBinaryInfo()359   void ProcessBranchListBinaryInfo() {
360     for (auto& p : branch_list_binary_map_) {
361       Dso* dso = p.first;
362       BranchListBinaryInfo& binary = p.second;
363       binary.dso_type = dso->type();
364       BinaryKey key(dso, 0);
365       if (binary.dso_type == DSO_KERNEL) {
366         if (kernel_map_start_addr_ == 0) {
367           LOG(WARNING) << "Can't convert kernel ip addresses without kernel start addr. So remove "
368                           "branches for the kernel.";
369           continue;
370         }
371         if (dso->GetDebugFilePath() == dso->Path()) {
372           // vmlinux isn't available. We still use kernel ip addr. Put kernel start addr in proto
373           // for address conversion later.
374           key.kernel_start_addr = kernel_map_start_addr_;
375         }
376       }
377       branch_list_callback_(key, binary);
378     }
379   }
380 
381   const std::string filename_;
382   bool exclude_perf_;
383   ETMDumpOption etm_dump_option_;
384   DsoFilter dso_filter_;
385   AutoFDOBinaryCallback autofdo_callback_;
386   BranchListBinaryCallback branch_list_callback_;
387 
388   std::vector<uint8_t> aux_data_buffer_;
389   std::unique_ptr<ETMDecoder> etm_decoder_;
390   std::unique_ptr<RecordFileReader> record_file_reader_;
391   ThreadTreeWithFilter thread_tree_;
392   uint64_t kernel_map_start_addr_ = 0;
393   // Store results for AutoFDO.
394   std::unordered_map<Dso*, AutoFDOBinaryInfo> autofdo_binary_map_;
395   // Store results for BranchList.
396   std::unordered_map<Dso*, BranchListBinaryInfo> branch_list_binary_map_;
397 };
398 
399 // Read a protobuf file specified by etm_branch_list.proto, and generate BranchListBinaryInfo.
400 class BranchListReader {
401  public:
BranchListReader(const std::string & filename,const std::regex binary_name_regex)402   BranchListReader(const std::string& filename, const std::regex binary_name_regex)
403       : filename_(filename), binary_name_regex_(binary_name_regex) {}
404 
SetCallback(const BranchListBinaryCallback & callback)405   void SetCallback(const BranchListBinaryCallback& callback) { callback_ = callback; }
406 
Read()407   bool Read() {
408     auto fd = FileHelper::OpenReadOnly(filename_);
409     if (!fd.ok()) {
410       PLOG(ERROR) << "failed to open " << filename_;
411       return false;
412     }
413 
414     proto::ETMBranchList branch_list_proto;
415     if (!branch_list_proto.ParseFromFileDescriptor(fd)) {
416       PLOG(ERROR) << "failed to read msg from " << filename_;
417       return false;
418     }
419     if (branch_list_proto.magic() != ETM_BRANCH_LIST_PROTO_MAGIC) {
420       PLOG(ERROR) << "file not in format etm_branch_list.proto: " << filename_;
421       return false;
422     }
423 
424     for (size_t i = 0; i < branch_list_proto.binaries_size(); i++) {
425       const auto& binary_proto = branch_list_proto.binaries(i);
426       if (!std::regex_search(binary_proto.path(), binary_name_regex_)) {
427         continue;
428       }
429       BinaryKey key(binary_proto.path(), BuildId(binary_proto.build_id()));
430       if (binary_proto.has_kernel_info()) {
431         key.kernel_start_addr = binary_proto.kernel_info().kernel_start_addr();
432       }
433       BranchListBinaryInfo binary;
434       auto dso_type = ToDsoType(binary_proto.type());
435       if (!dso_type) {
436         LOG(ERROR) << "invalid binary type in " << filename_;
437         return false;
438       }
439       binary.dso_type = dso_type.value();
440       binary.branch_map = BuildUnorderedBranchMap(binary_proto);
441       callback_(key, binary);
442     }
443     return true;
444   }
445 
446  private:
ToDsoType(proto::ETMBranchList_Binary::BinaryType binary_type)447   std::optional<DsoType> ToDsoType(proto::ETMBranchList_Binary::BinaryType binary_type) {
448     switch (binary_type) {
449       case proto::ETMBranchList_Binary::ELF_FILE:
450         return DSO_ELF_FILE;
451       case proto::ETMBranchList_Binary::KERNEL:
452         return DSO_KERNEL;
453       case proto::ETMBranchList_Binary::KERNEL_MODULE:
454         return DSO_KERNEL_MODULE;
455       default:
456         LOG(ERROR) << "unexpected binary type " << binary_type;
457         return std::nullopt;
458     }
459   }
460 
BuildUnorderedBranchMap(const proto::ETMBranchList_Binary & binary_proto)461   UnorderedBranchMap BuildUnorderedBranchMap(const proto::ETMBranchList_Binary& binary_proto) {
462     UnorderedBranchMap branch_map;
463     for (size_t i = 0; i < binary_proto.addrs_size(); i++) {
464       const auto& addr_proto = binary_proto.addrs(i);
465       auto& b_map = branch_map[addr_proto.addr()];
466       for (size_t j = 0; j < addr_proto.branches_size(); j++) {
467         const auto& branch_proto = addr_proto.branches(j);
468         std::vector<bool> branch =
469             ProtoStringToBranch(branch_proto.branch(), branch_proto.branch_size());
470         b_map[branch] = branch_proto.count();
471       }
472     }
473     return branch_map;
474   }
475 
476   const std::string filename_;
477   const std::regex binary_name_regex_;
478   BranchListBinaryCallback callback_;
479 };
480 
481 // Convert BranchListBinaryInfo into AutoFDOBinaryInfo.
482 class BranchListToAutoFDOConverter {
483  public:
Convert(const BinaryKey & key,BranchListBinaryInfo & binary)484   std::unique_ptr<AutoFDOBinaryInfo> Convert(const BinaryKey& key, BranchListBinaryInfo& binary) {
485     BuildId build_id = key.build_id;
486     std::unique_ptr<Dso> dso = Dso::CreateDsoWithBuildId(binary.dso_type, key.path, build_id);
487     if (!dso || !CheckBuildId(dso.get(), key.build_id)) {
488       return nullptr;
489     }
490     std::unique_ptr<AutoFDOBinaryInfo> autofdo_binary(new AutoFDOBinaryInfo);
491     autofdo_binary->first_load_segment_addr = GetFirstLoadSegmentVaddr(dso.get());
492 
493     if (dso->type() == DSO_KERNEL) {
494       ModifyBranchMapForKernel(dso.get(), key.kernel_start_addr, binary);
495     }
496 
497     auto process_instr_range = [&](const ETMInstrRange& range) {
498       CHECK_EQ(range.dso, dso.get());
499       autofdo_binary->AddInstrRange(range);
500     };
501 
502     auto result =
503         ConvertBranchMapToInstrRanges(dso.get(), binary.GetOrderedBranchMap(), process_instr_range);
504     if (!result.ok()) {
505       LOG(WARNING) << "failed to build instr ranges for binary " << dso->Path() << ": "
506                    << result.error();
507       return nullptr;
508     }
509     return autofdo_binary;
510   }
511 
512  private:
CheckBuildId(Dso * dso,const BuildId & expected_build_id)513   bool CheckBuildId(Dso* dso, const BuildId& expected_build_id) {
514     if (expected_build_id.IsEmpty()) {
515       return true;
516     }
517     BuildId build_id;
518     return GetBuildIdFromDsoPath(dso->GetDebugFilePath(), &build_id) &&
519            build_id == expected_build_id;
520   }
521 
ModifyBranchMapForKernel(Dso * dso,uint64_t kernel_start_addr,BranchListBinaryInfo & binary)522   void ModifyBranchMapForKernel(Dso* dso, uint64_t kernel_start_addr,
523                                 BranchListBinaryInfo& binary) {
524     if (kernel_start_addr == 0) {
525       // vmlinux has been provided when generating branch lists. Addresses in branch lists are
526       // already vaddrs in vmlinux.
527       return;
528     }
529     // Addresses are still kernel ip addrs in memory. Need to convert them to vaddrs in vmlinux.
530     UnorderedBranchMap new_branch_map;
531     for (auto& p : binary.branch_map) {
532       uint64_t vaddr_in_file = dso->IpToVaddrInFile(p.first, kernel_start_addr, 0);
533       new_branch_map[vaddr_in_file] = std::move(p.second);
534     }
535     binary.branch_map = std::move(new_branch_map);
536   }
537 };
538 
539 // Write instruction ranges to a file in AutoFDO text format.
540 class AutoFDOWriter {
541  public:
AddAutoFDOBinary(const BinaryKey & key,AutoFDOBinaryInfo & binary)542   void AddAutoFDOBinary(const BinaryKey& key, AutoFDOBinaryInfo& binary) {
543     auto it = binary_map_.find(key);
544     if (it == binary_map_.end()) {
545       binary_map_[key] = std::move(binary);
546     } else {
547       it->second.Merge(binary);
548     }
549   }
550 
Write(const std::string & output_filename)551   bool Write(const std::string& output_filename) {
552     std::unique_ptr<FILE, decltype(&fclose)> output_fp(fopen(output_filename.c_str(), "w"), fclose);
553     if (!output_fp) {
554       PLOG(ERROR) << "failed to write to " << output_filename;
555       return false;
556     }
557     // autofdo_binary_map is used to store instruction ranges, which can have a large amount. And
558     // it has a larger access time (instruction ranges * executed time). So it's better to use
559     // unorder_maps to speed up access time. But we also want a stable output here, to compare
560     // output changes result from code changes. So generate a sorted output here.
561     std::vector<BinaryKey> keys;
562     for (auto& p : binary_map_) {
563       keys.emplace_back(p.first);
564     }
565     std::sort(keys.begin(), keys.end(),
566               [](const BinaryKey& key1, const BinaryKey& key2) { return key1.path < key2.path; });
567     if (keys.size() > 1) {
568       fprintf(output_fp.get(),
569               "// Please split this file. AutoFDO only accepts profile for one binary.\n");
570     }
571     for (const auto& key : keys) {
572       const AutoFDOBinaryInfo& binary = binary_map_[key];
573       // AutoFDO text format needs file_offsets instead of virtual addrs in a binary. And it uses
574       // below formula: vaddr = file_offset + GetFirstLoadSegmentVaddr().
575       uint64_t first_load_segment_addr = binary.first_load_segment_addr;
576 
577       auto to_offset = [&](uint64_t vaddr) -> uint64_t {
578         if (vaddr == 0) {
579           return 0;
580         }
581         CHECK_GE(vaddr, first_load_segment_addr);
582         return vaddr - first_load_segment_addr;
583       };
584 
585       // Write range_count_map.
586       std::map<AddrPair, uint64_t> range_count_map(binary.range_count_map.begin(),
587                                                    binary.range_count_map.end());
588       fprintf(output_fp.get(), "%zu\n", range_count_map.size());
589       for (const auto& pair2 : range_count_map) {
590         const AddrPair& addr_range = pair2.first;
591         uint64_t count = pair2.second;
592 
593         fprintf(output_fp.get(), "%" PRIx64 "-%" PRIx64 ":%" PRIu64 "\n",
594                 to_offset(addr_range.first), to_offset(addr_range.second), count);
595       }
596 
597       // Write addr_count_map.
598       fprintf(output_fp.get(), "0\n");
599 
600       // Write branch_count_map.
601       std::map<AddrPair, uint64_t> branch_count_map(binary.branch_count_map.begin(),
602                                                     binary.branch_count_map.end());
603       fprintf(output_fp.get(), "%zu\n", branch_count_map.size());
604       for (const auto& pair2 : branch_count_map) {
605         const AddrPair& branch = pair2.first;
606         uint64_t count = pair2.second;
607 
608         fprintf(output_fp.get(), "%" PRIx64 "->%" PRIx64 ":%" PRIu64 "\n", to_offset(branch.first),
609                 to_offset(branch.second), count);
610       }
611 
612       // Write the binary path in comment.
613       fprintf(output_fp.get(), "// %s\n\n", key.path.c_str());
614     }
615     return true;
616   }
617 
618  private:
619   std::unordered_map<BinaryKey, AutoFDOBinaryInfo, BinaryKeyHash> binary_map_;
620 };
621 
622 // Merge BranchListBinaryInfo.
623 struct BranchListMerger {
AddBranchListBinarysimpleperf::__anon9004e0ec0111::BranchListMerger624   void AddBranchListBinary(const BinaryKey& key, BranchListBinaryInfo& binary) {
625     auto it = binary_map.find(key);
626     if (it == binary_map.end()) {
627       binary_map[key] = std::move(binary);
628     } else {
629       it->second.Merge(binary);
630     }
631   }
632 
633   std::unordered_map<BinaryKey, BranchListBinaryInfo, BinaryKeyHash> binary_map;
634 };
635 
636 // Write branch lists to a protobuf file specified by etm_branch_list.proto.
637 class BranchListWriter {
638  public:
Write(const std::string & output_filename,const std::unordered_map<BinaryKey,BranchListBinaryInfo,BinaryKeyHash> & binary_map)639   bool Write(const std::string& output_filename,
640              const std::unordered_map<BinaryKey, BranchListBinaryInfo, BinaryKeyHash>& binary_map) {
641     // Don't produce empty output file.
642     if (binary_map.empty()) {
643       LOG(INFO) << "Skip empty output file.";
644       unlink(output_filename.c_str());
645       return true;
646     }
647     std::unique_ptr<FILE, decltype(&fclose)> output_fp(fopen(output_filename.c_str(), "wb"),
648                                                        fclose);
649     if (!output_fp) {
650       PLOG(ERROR) << "failed to write to " << output_filename;
651       return false;
652     }
653 
654     proto::ETMBranchList branch_list_proto;
655     branch_list_proto.set_magic(ETM_BRANCH_LIST_PROTO_MAGIC);
656     std::vector<char> branch_buf;
657     for (const auto& p : binary_map) {
658       const BinaryKey& key = p.first;
659       const BranchListBinaryInfo& binary = p.second;
660       auto binary_proto = branch_list_proto.add_binaries();
661 
662       binary_proto->set_path(key.path);
663       if (!key.build_id.IsEmpty()) {
664         binary_proto->set_build_id(key.build_id.ToString().substr(2));
665       }
666       auto opt_binary_type = ToProtoBinaryType(binary.dso_type);
667       if (!opt_binary_type.has_value()) {
668         return false;
669       }
670       binary_proto->set_type(opt_binary_type.value());
671 
672       for (const auto& addr_p : binary.branch_map) {
673         auto addr_proto = binary_proto->add_addrs();
674         addr_proto->set_addr(addr_p.first);
675 
676         for (const auto& branch_p : addr_p.second) {
677           const std::vector<bool>& branch = branch_p.first;
678           auto branch_proto = addr_proto->add_branches();
679 
680           branch_proto->set_branch(BranchToProtoString(branch));
681           branch_proto->set_branch_size(branch.size());
682           branch_proto->set_count(branch_p.second);
683         }
684       }
685 
686       if (binary.dso_type == DSO_KERNEL) {
687         binary_proto->mutable_kernel_info()->set_kernel_start_addr(key.kernel_start_addr);
688       }
689     }
690     if (!branch_list_proto.SerializeToFileDescriptor(fileno(output_fp.get()))) {
691       PLOG(ERROR) << "failed to write to " << output_filename;
692       return false;
693     }
694     return true;
695   }
696 
697  private:
ToProtoBinaryType(DsoType dso_type)698   std::optional<proto::ETMBranchList_Binary::BinaryType> ToProtoBinaryType(DsoType dso_type) {
699     switch (dso_type) {
700       case DSO_ELF_FILE:
701         return proto::ETMBranchList_Binary::ELF_FILE;
702       case DSO_KERNEL:
703         return proto::ETMBranchList_Binary::KERNEL;
704       case DSO_KERNEL_MODULE:
705         return proto::ETMBranchList_Binary::KERNEL_MODULE;
706       default:
707         LOG(ERROR) << "unexpected dso type " << dso_type;
708         return std::nullopt;
709     }
710   }
711 };
712 
713 class InjectCommand : public Command {
714  public:
InjectCommand()715   InjectCommand()
716       : Command("inject", "parse etm instruction tracing data",
717                 // clang-format off
718 "Usage: simpleperf inject [options]\n"
719 "--binary binary_name         Generate data only for binaries matching binary_name regex.\n"
720 "-i file1,file2,...           Input files. Default is perf.data. Support below formats:\n"
721 "                               1. perf.data generated by recording cs-etm event type.\n"
722 "                               2. branch_list file generated by `inject --output branch-list`.\n"
723 "                             If a file name starts with @, it contains a list of input files.\n"
724 "-o <file>                    output file. Default is perf_inject.data.\n"
725 "--output <format>            Select output file format:\n"
726 "                               autofdo      -- text format accepted by TextSampleReader\n"
727 "                                               of AutoFDO\n"
728 "                               branch-list  -- protobuf file in etm_branch_list.proto\n"
729 "                             Default is autofdo.\n"
730 "--dump-etm type1,type2,...   Dump etm data. A type is one of raw, packet and element.\n"
731 "--exclude-perf               Exclude trace data for the recording process.\n"
732 "--symdir <dir>               Look for binaries in a directory recursively.\n"
733 "\n"
734 "Examples:\n"
735 "1. Generate autofdo text output.\n"
736 "$ simpleperf inject -i perf.data -o autofdo.txt --output autofdo\n"
737 "\n"
738 "2. Generate branch list proto, then convert to autofdo text.\n"
739 "$ simpleperf inject -i perf.data -o branch_list.data --output branch-list\n"
740 "$ simpleperf inject -i branch_list.data -o autofdo.txt --output autofdo\n"
741                 // clang-format on
742         ) {}
743 
Run(const std::vector<std::string> & args)744   bool Run(const std::vector<std::string>& args) override {
745     GOOGLE_PROTOBUF_VERIFY_VERSION;
746     if (!ParseOptions(args)) {
747       return false;
748     }
749 
750     CHECK(!input_filenames_.empty());
751     if (IsPerfDataFile(input_filenames_[0])) {
752       switch (output_format_) {
753         case OutputFormat::AutoFDO:
754           return ConvertPerfDataToAutoFDO();
755         case OutputFormat::BranchList:
756           return ConvertPerfDataToBranchList();
757       }
758     } else {
759       switch (output_format_) {
760         case OutputFormat::AutoFDO:
761           return ConvertBranchListToAutoFDO();
762         case OutputFormat::BranchList:
763           return ConvertBranchListToBranchList();
764       }
765     }
766   }
767 
768  private:
ParseOptions(const std::vector<std::string> & args)769   bool ParseOptions(const std::vector<std::string>& args) {
770     const OptionFormatMap option_formats = {
771         {"--binary", {OptionValueType::STRING, OptionType::SINGLE}},
772         {"--dump-etm", {OptionValueType::STRING, OptionType::SINGLE}},
773         {"--exclude-perf", {OptionValueType::NONE, OptionType::SINGLE}},
774         {"-i", {OptionValueType::STRING, OptionType::MULTIPLE}},
775         {"-o", {OptionValueType::STRING, OptionType::SINGLE}},
776         {"--output", {OptionValueType::STRING, OptionType::SINGLE}},
777         {"--symdir", {OptionValueType::STRING, OptionType::MULTIPLE}},
778     };
779     OptionValueMap options;
780     std::vector<std::pair<OptionName, OptionValue>> ordered_options;
781     if (!PreprocessOptions(args, option_formats, &options, &ordered_options, nullptr)) {
782       return false;
783     }
784 
785     if (auto value = options.PullValue("--binary"); value) {
786       binary_name_regex_ = *value->str_value;
787     }
788     if (auto value = options.PullValue("--dump-etm"); value) {
789       if (!ParseEtmDumpOption(*value->str_value, &etm_dump_option_)) {
790         return false;
791       }
792     }
793     exclude_perf_ = options.PullBoolValue("--exclude-perf");
794 
795     for (const OptionValue& value : options.PullValues("-i")) {
796       std::vector<std::string> files = android::base::Split(*value.str_value, ",");
797       for (std::string& file : files) {
798         if (android::base::StartsWith(file, "@")) {
799           if (!ReadFileList(file.substr(1), &input_filenames_)) {
800             return false;
801           }
802         } else {
803           input_filenames_.emplace_back(file);
804         }
805       }
806     }
807     if (input_filenames_.empty()) {
808       input_filenames_.emplace_back("perf.data");
809     }
810     options.PullStringValue("-o", &output_filename_);
811     if (auto value = options.PullValue("--output"); value) {
812       const std::string& output = *value->str_value;
813       if (output == "autofdo") {
814         output_format_ = OutputFormat::AutoFDO;
815       } else if (output == "branch-list") {
816         output_format_ = OutputFormat::BranchList;
817       } else {
818         LOG(ERROR) << "unknown format in --output option: " << output;
819         return false;
820       }
821     }
822     if (auto value = options.PullValue("--symdir"); value) {
823       if (!Dso::AddSymbolDir(*value->str_value)) {
824         return false;
825       }
826       // Symbol dirs are cleaned when Dso count is decreased to zero, which can happen between
827       // processing input files. To make symbol dirs always available, create a placeholder dso to
828       // prevent cleaning from happening.
829       placeholder_dso_ = Dso::CreateDso(DSO_UNKNOWN_FILE, "unknown");
830     }
831     CHECK(options.values.empty());
832     return true;
833   }
834 
ReadFileList(const std::string & path,std::vector<std::string> * file_list)835   bool ReadFileList(const std::string& path, std::vector<std::string>* file_list) {
836     std::string data;
837     if (!android::base::ReadFileToString(path, &data)) {
838       PLOG(ERROR) << "failed to read " << path;
839       return false;
840     }
841     std::vector<std::string> tokens = android::base::Tokenize(data, " \t\n\r");
842     file_list->insert(file_list->end(), tokens.begin(), tokens.end());
843     return true;
844   }
845 
ConvertPerfDataToAutoFDO()846   bool ConvertPerfDataToAutoFDO() {
847     AutoFDOWriter autofdo_writer;
848     auto callback = [&](const BinaryKey& key, AutoFDOBinaryInfo& binary) {
849       autofdo_writer.AddAutoFDOBinary(key, binary);
850     };
851     for (const auto& input_filename : input_filenames_) {
852       PerfDataReader reader(input_filename, exclude_perf_, etm_dump_option_, binary_name_regex_);
853       reader.SetCallback(callback);
854       if (!reader.Read()) {
855         return false;
856       }
857     }
858     return autofdo_writer.Write(output_filename_);
859   }
860 
ConvertPerfDataToBranchList()861   bool ConvertPerfDataToBranchList() {
862     BranchListMerger branch_list_merger;
863     auto callback = [&](const BinaryKey& key, BranchListBinaryInfo& binary) {
864       branch_list_merger.AddBranchListBinary(key, binary);
865     };
866     for (const auto& input_filename : input_filenames_) {
867       PerfDataReader reader(input_filename, exclude_perf_, etm_dump_option_, binary_name_regex_);
868       reader.SetCallback(callback);
869       if (!reader.Read()) {
870         return false;
871       }
872     }
873     BranchListWriter branch_list_writer;
874     return branch_list_writer.Write(output_filename_, branch_list_merger.binary_map);
875   }
876 
ConvertBranchListToAutoFDO()877   bool ConvertBranchListToAutoFDO() {
878     // Step1 : Merge branch lists from all input files.
879     BranchListMerger branch_list_merger;
880     auto callback = [&](const BinaryKey& key, BranchListBinaryInfo& binary) {
881       branch_list_merger.AddBranchListBinary(key, binary);
882     };
883     for (const auto& input_filename : input_filenames_) {
884       BranchListReader reader(input_filename, binary_name_regex_);
885       reader.SetCallback(callback);
886       if (!reader.Read()) {
887         return false;
888       }
889     }
890 
891     // Step2: Convert BranchListBinaryInfo to AutoFDOBinaryInfo.
892     AutoFDOWriter autofdo_writer;
893     BranchListToAutoFDOConverter converter;
894     for (auto& p : branch_list_merger.binary_map) {
895       const BinaryKey& key = p.first;
896       BranchListBinaryInfo& binary = p.second;
897       std::unique_ptr<AutoFDOBinaryInfo> autofdo_binary = converter.Convert(key, binary);
898       if (autofdo_binary) {
899         // Create new BinaryKey with kernel_start_addr = 0. Because AutoFDO output doesn't care
900         // kernel_start_addr.
901         autofdo_writer.AddAutoFDOBinary(BinaryKey(key.path, key.build_id), *autofdo_binary);
902       }
903     }
904 
905     // Step3: Write AutoFDOBinaryInfo.
906     return autofdo_writer.Write(output_filename_);
907   }
908 
ConvertBranchListToBranchList()909   bool ConvertBranchListToBranchList() {
910     // Step1 : Merge branch lists from all input files.
911     BranchListMerger branch_list_merger;
912     auto callback = [&](const BinaryKey& key, BranchListBinaryInfo& binary) {
913       branch_list_merger.AddBranchListBinary(key, binary);
914     };
915     for (const auto& input_filename : input_filenames_) {
916       BranchListReader reader(input_filename, binary_name_regex_);
917       reader.SetCallback(callback);
918       if (!reader.Read()) {
919         return false;
920       }
921     }
922     // Step2: Write BranchListBinaryInfo.
923     BranchListWriter branch_list_writer;
924     return branch_list_writer.Write(output_filename_, branch_list_merger.binary_map);
925   }
926 
927   std::regex binary_name_regex_{""};  // Default to match everything.
928   bool exclude_perf_ = false;
929   std::vector<std::string> input_filenames_;
930   std::string output_filename_ = "perf_inject.data";
931   OutputFormat output_format_ = OutputFormat::AutoFDO;
932   ETMDumpOption etm_dump_option_;
933 
934   std::unique_ptr<Dso> placeholder_dso_;
935 };
936 
937 }  // namespace
938 
RegisterInjectCommand()939 void RegisterInjectCommand() {
940   return RegisterCommand("inject", [] { return std::unique_ptr<Command>(new InjectCommand); });
941 }
942 
943 }  // namespace simpleperf
944