1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SIMPLE_PERF_SAMPLE_TREE_H_ 18 #define SIMPLE_PERF_SAMPLE_TREE_H_ 19 20 #include <unordered_map> 21 22 #include "OfflineUnwinder.h" 23 #include "SampleComparator.h" 24 #include "SampleDisplayer.h" 25 #include "callchain.h" 26 #include "perf_regs.h" 27 #include "record.h" 28 #include "thread_tree.h" 29 30 namespace simpleperf { 31 32 // A SampleTree is a collection of samples. A profiling report is mainly about 33 // constructing a SampleTree and display it. There are three steps involved: 34 // build the tree, sort the tree, and display it. For example, if we want to 35 // show how many cpu-cycles are spent in different functions, we should do as 36 // follows: 37 // 1. Build a SampleTree from SampleRecords with each sample containing 38 // (cpu-cycles, function name). When building the tree, we should merge 39 // samples containing the same function name. 40 // 2. Sort the SampleTree by cpu-cycles in the sample. As we want to display the 41 // samples in a decreasing order of cpu-cycles, we should sort it like this. 42 // 3. Display the SampleTree, each sample prints its (cpu-cycles, function name) 43 // pair. 44 // 45 // We represent the three steps with three template classes. 46 // 1. A SampleTree is built by SampleTreeBuilder. The comparator passed in 47 // SampleTreeBuilder's constructor decides the property of samples should be 48 // merged together. 49 // 2. After a SampleTree is built and got from SampleTreeBuilder, it should be 50 // sorted by SampleTreeSorter. The sort result decides the order to show 51 // samples. 52 // 3. At last, the sorted SampleTree is passed to SampleTreeDisplayer, which 53 // displays each sample in the SampleTree. 54 55 template <typename EntryT, typename AccumulateInfoT> 56 class SampleTreeBuilder { 57 public: SampleTreeBuilder(const SampleComparator<EntryT> & comparator)58 explicit SampleTreeBuilder(const SampleComparator<EntryT>& comparator) 59 : sample_set_(comparator), 60 accumulate_callchain_(false), 61 sample_comparator_(comparator), 62 filtered_sample_set_(comparator), 63 use_branch_address_(false), 64 build_callchain_(false), 65 use_caller_as_callchain_root_(false) {} 66 ~SampleTreeBuilder()67 virtual ~SampleTreeBuilder() {} 68 SetBranchSampleOption(bool use_branch_address)69 void SetBranchSampleOption(bool use_branch_address) { use_branch_address_ = use_branch_address; } 70 SetCallChainSampleOptions(bool accumulate_callchain,bool build_callchain,bool use_caller_as_callchain_root)71 void SetCallChainSampleOptions(bool accumulate_callchain, bool build_callchain, 72 bool use_caller_as_callchain_root) { 73 accumulate_callchain_ = accumulate_callchain; 74 build_callchain_ = build_callchain; 75 use_caller_as_callchain_root_ = use_caller_as_callchain_root; 76 if (accumulate_callchain_) { 77 offline_unwinder_ = OfflineUnwinder::Create(false); 78 } 79 } 80 GetUnwinder()81 OfflineUnwinder* GetUnwinder() { return offline_unwinder_.get(); } 82 ProcessSampleRecord(const SampleRecord & r)83 void ProcessSampleRecord(const SampleRecord& r) { 84 if (use_branch_address_ && (r.sample_type & PERF_SAMPLE_BRANCH_STACK)) { 85 for (uint64_t i = 0; i < r.branch_stack_data.stack_nr; ++i) { 86 auto& item = r.branch_stack_data.stack[i]; 87 if (item.from != 0 && item.to != 0) { 88 CreateBranchSample(r, item); 89 } 90 } 91 return; 92 } 93 bool in_kernel = r.InKernel(); 94 AccumulateInfoT acc_info; 95 EntryT* sample = CreateSample(r, in_kernel, &acc_info); 96 if (sample == nullptr) { 97 return; 98 } 99 if (accumulate_callchain_) { 100 std::vector<uint64_t> ips; 101 if (r.sample_type & PERF_SAMPLE_CALLCHAIN) { 102 ips.insert(ips.end(), r.callchain_data.ips, r.callchain_data.ips + r.callchain_data.ip_nr); 103 } 104 const ThreadEntry* thread = GetThreadOfSample(sample); 105 // Use stack_user_data.data.size() instead of stack_user_data.dyn_size, to 106 // make up for the missing kernel patch in N9. See b/22612370. 107 if (thread != nullptr && (r.sample_type & PERF_SAMPLE_REGS_USER) && 108 (r.regs_user_data.reg_mask != 0) && (r.sample_type & PERF_SAMPLE_STACK_USER) && 109 (r.GetValidStackSize() > 0)) { 110 RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs); 111 std::vector<uint64_t> user_ips; 112 std::vector<uint64_t> sps; 113 if (offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data, 114 r.GetValidStackSize(), &user_ips, &sps)) { 115 ips.push_back(PERF_CONTEXT_USER); 116 ips.insert(ips.end(), user_ips.begin(), user_ips.end()); 117 } 118 } 119 120 std::vector<EntryT*> callchain; 121 callchain.push_back(sample); 122 123 bool first_ip = true; 124 for (auto& ip : ips) { 125 if (ip >= PERF_CONTEXT_MAX) { 126 switch (ip) { 127 case PERF_CONTEXT_KERNEL: 128 in_kernel = true; 129 break; 130 case PERF_CONTEXT_USER: 131 in_kernel = false; 132 break; 133 default: 134 LOG(DEBUG) << "Unexpected perf_context in callchain: " << ip; 135 } 136 } else { 137 if (first_ip) { 138 first_ip = false; 139 // Remove duplication with sampled ip. 140 if (ip == r.ip_data.ip) { 141 continue; 142 } 143 } 144 EntryT* callchain_sample = 145 CreateCallChainSample(thread, sample, ip, in_kernel, callchain, acc_info); 146 if (callchain_sample == nullptr) { 147 break; 148 } 149 callchain.push_back(callchain_sample); 150 } 151 } 152 153 if (build_callchain_) { 154 std::set<EntryT*> added_set; 155 if (use_caller_as_callchain_root_) { 156 std::reverse(callchain.begin(), callchain.end()); 157 } 158 EntryT* parent = nullptr; 159 while (callchain.size() >= 2) { 160 EntryT* sample = callchain[0]; 161 callchain.erase(callchain.begin()); 162 // Add only once for recursive calls on callchain. 163 if (added_set.find(sample) != added_set.end()) { 164 continue; 165 } 166 added_set.insert(sample); 167 InsertCallChainForSample(sample, callchain, acc_info); 168 UpdateCallChainParentInfo(sample, parent); 169 parent = sample; 170 } 171 } 172 } 173 } 174 GetSamples()175 std::vector<EntryT*> GetSamples() const { 176 std::vector<EntryT*> result; 177 for (auto& entry : sample_set_) { 178 result.push_back(entry); 179 } 180 return result; 181 } 182 183 protected: 184 virtual EntryT* CreateSample(const SampleRecord& r, bool in_kernel, 185 AccumulateInfoT* acc_info) = 0; 186 virtual EntryT* CreateBranchSample(const SampleRecord& r, const BranchStackItemType& item) = 0; 187 virtual EntryT* CreateCallChainSample(const ThreadEntry* thread, const EntryT* sample, 188 uint64_t ip, bool in_kernel, 189 const std::vector<EntryT*>& callchain, 190 const AccumulateInfoT& acc_info) = 0; 191 virtual const ThreadEntry* GetThreadOfSample(EntryT*) = 0; 192 virtual uint64_t GetPeriodForCallChain(const AccumulateInfoT& acc_info) = 0; FilterSample(const EntryT *)193 virtual bool FilterSample(const EntryT*) { return true; } 194 UpdateSummary(const EntryT *)195 virtual void UpdateSummary(const EntryT*) {} 196 197 virtual void MergeSample(EntryT* sample1, EntryT* sample2) = 0; 198 InsertSample(std::unique_ptr<EntryT> sample)199 EntryT* InsertSample(std::unique_ptr<EntryT> sample) { 200 if (sample == nullptr) { 201 return nullptr; 202 } 203 if (!FilterSample(sample.get())) { 204 // Store in filtered_sample_set_ for use in other EntryT's callchain. 205 auto it = filtered_sample_set_.find(sample.get()); 206 if (it != filtered_sample_set_.end()) { 207 return *it; 208 } 209 EntryT* result = sample.get(); 210 filtered_sample_set_.insert(sample.get()); 211 sample_storage_.push_back(std::move(sample)); 212 return result; 213 } 214 UpdateSummary(sample.get()); 215 EntryT* result; 216 auto it = sample_set_.find(sample.get()); 217 if (it == sample_set_.end()) { 218 result = sample.get(); 219 sample_set_.insert(sample.get()); 220 sample_storage_.push_back(std::move(sample)); 221 } else { 222 result = *it; 223 MergeSample(*it, sample.get()); 224 } 225 return result; 226 } 227 InsertCallChainSample(std::unique_ptr<EntryT> sample,const std::vector<EntryT * > & callchain)228 EntryT* InsertCallChainSample(std::unique_ptr<EntryT> sample, 229 const std::vector<EntryT*>& callchain) { 230 if (sample == nullptr) { 231 return nullptr; 232 } 233 auto it = sample_set_.find(sample.get()); 234 if (it != sample_set_.end()) { 235 EntryT* sample = *it; 236 // Process only once for recursive function call. 237 if (std::find(callchain.begin(), callchain.end(), sample) != callchain.end()) { 238 return sample; 239 } 240 } 241 return InsertSample(std::move(sample)); 242 } 243 InsertCallChainForSample(EntryT * sample,const std::vector<EntryT * > & callchain,const AccumulateInfoT & acc_info)244 void InsertCallChainForSample(EntryT* sample, const std::vector<EntryT*>& callchain, 245 const AccumulateInfoT& acc_info) { 246 uint64_t period = GetPeriodForCallChain(acc_info); 247 sample->callchain.AddCallChain(callchain, period, [&](const EntryT* s1, const EntryT* s2) { 248 return sample_comparator_.IsSameSample(s1, s2); 249 }); 250 } 251 AddCallChainDuplicateInfo()252 void AddCallChainDuplicateInfo() { 253 if (build_callchain_) { 254 for (EntryT* sample : sample_set_) { 255 auto it = callchain_parent_map_.find(sample); 256 if (it != callchain_parent_map_.end() && !it->second.has_multiple_parents) { 257 sample->callchain.duplicated = true; 258 } 259 } 260 } 261 } 262 263 std::set<EntryT*, SampleComparator<EntryT>> sample_set_; 264 bool accumulate_callchain_; 265 266 private: UpdateCallChainParentInfo(EntryT * sample,EntryT * parent)267 void UpdateCallChainParentInfo(EntryT* sample, EntryT* parent) { 268 if (parent == nullptr) { 269 return; 270 } 271 auto it = callchain_parent_map_.find(sample); 272 if (it == callchain_parent_map_.end()) { 273 CallChainParentInfo info; 274 info.parent = parent; 275 info.has_multiple_parents = false; 276 callchain_parent_map_[sample] = info; 277 } else if (it->second.parent != parent) { 278 it->second.has_multiple_parents = true; 279 } 280 } 281 282 const SampleComparator<EntryT> sample_comparator_; 283 // If a Sample/CallChainSample is filtered out, it is stored in filtered_sample_set_, 284 // and only used in other EntryT's callchain. 285 std::set<EntryT*, SampleComparator<EntryT>> filtered_sample_set_; 286 std::vector<std::unique_ptr<EntryT>> sample_storage_; 287 288 struct CallChainParentInfo { 289 EntryT* parent; 290 bool has_multiple_parents; 291 }; 292 std::unordered_map<EntryT*, CallChainParentInfo> callchain_parent_map_; 293 294 bool use_branch_address_; 295 bool build_callchain_; 296 bool use_caller_as_callchain_root_; 297 std::unique_ptr<OfflineUnwinder> offline_unwinder_; 298 }; 299 300 template <typename EntryT> 301 class SampleTreeSorter { 302 public: SampleTreeSorter(SampleComparator<EntryT> comparator)303 explicit SampleTreeSorter(SampleComparator<EntryT> comparator) : comparator_(comparator) {} 304 ~SampleTreeSorter()305 virtual ~SampleTreeSorter() {} 306 Sort(std::vector<EntryT * > & v,bool sort_callchain)307 void Sort(std::vector<EntryT*>& v, bool sort_callchain) { 308 if (sort_callchain) { 309 for (auto& sample : v) { 310 SortCallChain(sample); 311 } 312 } 313 if (!comparator_.empty()) { 314 std::sort(v.begin(), v.end(), 315 [this](const EntryT* s1, const EntryT* s2) { return comparator_(s1, s2); }); 316 } 317 } 318 319 protected: SortCallChain(EntryT * sample)320 void SortCallChain(EntryT* sample) { sample->callchain.SortByPeriod(); } 321 322 private: 323 SampleComparator<EntryT> comparator_; 324 }; 325 326 template <typename EntryT, typename InfoT> 327 class SampleTreeDisplayer { 328 public: SampleTreeDisplayer(SampleDisplayer<EntryT,InfoT> displayer)329 explicit SampleTreeDisplayer(SampleDisplayer<EntryT, InfoT> displayer) : displayer_(displayer) {} 330 ~SampleTreeDisplayer()331 virtual ~SampleTreeDisplayer() {} 332 DisplaySamples(FILE * fp,const std::vector<EntryT * > & samples,const InfoT * info)333 void DisplaySamples(FILE* fp, const std::vector<EntryT*>& samples, const InfoT* info) { 334 displayer_.SetInfo(info); 335 for (const auto& sample : samples) { 336 displayer_.AdjustWidth(sample); 337 } 338 displayer_.PrintNames(fp); 339 for (const auto& sample : samples) { 340 displayer_.PrintSample(fp, sample); 341 } 342 } 343 344 private: 345 SampleDisplayer<EntryT, InfoT> displayer_; 346 }; 347 348 } // namespace simpleperf 349 350 #endif // SIMPLE_PERF_SAMPLE_TREE_H_ 351