• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SIMPLE_PERF_SAMPLE_TREE_H_
18 #define SIMPLE_PERF_SAMPLE_TREE_H_
19 
20 #include <unordered_map>
21 
22 #include "OfflineUnwinder.h"
23 #include "SampleComparator.h"
24 #include "SampleDisplayer.h"
25 #include "callchain.h"
26 #include "perf_regs.h"
27 #include "record.h"
28 #include "thread_tree.h"
29 
30 namespace simpleperf {
31 
32 // A SampleTree is a collection of samples. A profiling report is mainly about
33 // constructing a SampleTree and display it. There are three steps involved:
34 // build the tree, sort the tree, and display it. For example, if we want to
35 // show how many cpu-cycles are spent in different functions, we should do as
36 // follows:
37 // 1. Build a SampleTree from SampleRecords with each sample containing
38 //    (cpu-cycles, function name). When building the tree, we should merge
39 //    samples containing the same function name.
40 // 2. Sort the SampleTree by cpu-cycles in the sample. As we want to display the
41 //    samples in a decreasing order of cpu-cycles, we should sort it like this.
42 // 3. Display the SampleTree, each sample prints its (cpu-cycles, function name)
43 //    pair.
44 //
45 // We represent the three steps with three template classes.
46 // 1. A SampleTree is built by SampleTreeBuilder. The comparator passed in
47 //    SampleTreeBuilder's constructor decides the property of samples should be
48 //    merged together.
49 // 2. After a SampleTree is built and got from SampleTreeBuilder, it should be
50 //    sorted by SampleTreeSorter. The sort result decides the order to show
51 //    samples.
52 // 3. At last, the sorted SampleTree is passed to SampleTreeDisplayer, which
53 //    displays each sample in the SampleTree.
54 
55 template <typename EntryT, typename AccumulateInfoT>
56 class SampleTreeBuilder {
57  public:
SampleTreeBuilder(const SampleComparator<EntryT> & comparator)58   explicit SampleTreeBuilder(const SampleComparator<EntryT>& comparator)
59       : sample_set_(comparator),
60         accumulate_callchain_(false),
61         sample_comparator_(comparator),
62         filtered_sample_set_(comparator),
63         use_branch_address_(false),
64         build_callchain_(false),
65         use_caller_as_callchain_root_(false) {}
66 
~SampleTreeBuilder()67   virtual ~SampleTreeBuilder() {}
68 
SetBranchSampleOption(bool use_branch_address)69   void SetBranchSampleOption(bool use_branch_address) { use_branch_address_ = use_branch_address; }
70 
SetCallChainSampleOptions(bool accumulate_callchain,bool build_callchain,bool use_caller_as_callchain_root)71   void SetCallChainSampleOptions(bool accumulate_callchain, bool build_callchain,
72                                  bool use_caller_as_callchain_root) {
73     accumulate_callchain_ = accumulate_callchain;
74     build_callchain_ = build_callchain;
75     use_caller_as_callchain_root_ = use_caller_as_callchain_root;
76     if (accumulate_callchain_) {
77       offline_unwinder_ = OfflineUnwinder::Create(false);
78     }
79   }
80 
GetUnwinder()81   OfflineUnwinder* GetUnwinder() { return offline_unwinder_.get(); }
82 
ProcessSampleRecord(const SampleRecord & r)83   void ProcessSampleRecord(const SampleRecord& r) {
84     if (use_branch_address_ && (r.sample_type & PERF_SAMPLE_BRANCH_STACK)) {
85       for (uint64_t i = 0; i < r.branch_stack_data.stack_nr; ++i) {
86         auto& item = r.branch_stack_data.stack[i];
87         if (item.from != 0 && item.to != 0) {
88           CreateBranchSample(r, item);
89         }
90       }
91       return;
92     }
93     bool in_kernel = r.InKernel();
94     AccumulateInfoT acc_info;
95     EntryT* sample = CreateSample(r, in_kernel, &acc_info);
96     if (sample == nullptr) {
97       return;
98     }
99     if (accumulate_callchain_) {
100       std::vector<uint64_t> ips;
101       if (r.sample_type & PERF_SAMPLE_CALLCHAIN) {
102         ips.insert(ips.end(), r.callchain_data.ips, r.callchain_data.ips + r.callchain_data.ip_nr);
103       }
104       const ThreadEntry* thread = GetThreadOfSample(sample);
105       // Use stack_user_data.data.size() instead of stack_user_data.dyn_size, to
106       // make up for the missing kernel patch in N9. See b/22612370.
107       if (thread != nullptr && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
108           (r.regs_user_data.reg_mask != 0) && (r.sample_type & PERF_SAMPLE_STACK_USER) &&
109           (r.GetValidStackSize() > 0)) {
110         RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
111         std::vector<uint64_t> user_ips;
112         std::vector<uint64_t> sps;
113         if (offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
114                                                r.GetValidStackSize(), &user_ips, &sps)) {
115           ips.push_back(PERF_CONTEXT_USER);
116           ips.insert(ips.end(), user_ips.begin(), user_ips.end());
117         }
118       }
119 
120       std::vector<EntryT*> callchain;
121       callchain.push_back(sample);
122 
123       bool first_ip = true;
124       for (auto& ip : ips) {
125         if (ip >= PERF_CONTEXT_MAX) {
126           switch (ip) {
127             case PERF_CONTEXT_KERNEL:
128               in_kernel = true;
129               break;
130             case PERF_CONTEXT_USER:
131               in_kernel = false;
132               break;
133             default:
134               LOG(DEBUG) << "Unexpected perf_context in callchain: " << ip;
135           }
136         } else {
137           if (first_ip) {
138             first_ip = false;
139             // Remove duplication with sampled ip.
140             if (ip == r.ip_data.ip) {
141               continue;
142             }
143           }
144           EntryT* callchain_sample =
145               CreateCallChainSample(thread, sample, ip, in_kernel, callchain, acc_info);
146           if (callchain_sample == nullptr) {
147             break;
148           }
149           callchain.push_back(callchain_sample);
150         }
151       }
152 
153       if (build_callchain_) {
154         std::set<EntryT*> added_set;
155         if (use_caller_as_callchain_root_) {
156           std::reverse(callchain.begin(), callchain.end());
157         }
158         EntryT* parent = nullptr;
159         while (callchain.size() >= 2) {
160           EntryT* sample = callchain[0];
161           callchain.erase(callchain.begin());
162           // Add only once for recursive calls on callchain.
163           if (added_set.find(sample) != added_set.end()) {
164             continue;
165           }
166           added_set.insert(sample);
167           InsertCallChainForSample(sample, callchain, acc_info);
168           UpdateCallChainParentInfo(sample, parent);
169           parent = sample;
170         }
171       }
172     }
173   }
174 
GetSamples()175   std::vector<EntryT*> GetSamples() const {
176     std::vector<EntryT*> result;
177     for (auto& entry : sample_set_) {
178       result.push_back(entry);
179     }
180     return result;
181   }
182 
183  protected:
184   virtual EntryT* CreateSample(const SampleRecord& r, bool in_kernel,
185                                AccumulateInfoT* acc_info) = 0;
186   virtual EntryT* CreateBranchSample(const SampleRecord& r, const BranchStackItemType& item) = 0;
187   virtual EntryT* CreateCallChainSample(const ThreadEntry* thread, const EntryT* sample,
188                                         uint64_t ip, bool in_kernel,
189                                         const std::vector<EntryT*>& callchain,
190                                         const AccumulateInfoT& acc_info) = 0;
191   virtual const ThreadEntry* GetThreadOfSample(EntryT*) = 0;
192   virtual uint64_t GetPeriodForCallChain(const AccumulateInfoT& acc_info) = 0;
FilterSample(const EntryT *)193   virtual bool FilterSample(const EntryT*) { return true; }
194 
UpdateSummary(const EntryT *)195   virtual void UpdateSummary(const EntryT*) {}
196 
197   virtual void MergeSample(EntryT* sample1, EntryT* sample2) = 0;
198 
InsertSample(std::unique_ptr<EntryT> sample)199   EntryT* InsertSample(std::unique_ptr<EntryT> sample) {
200     if (sample == nullptr) {
201       return nullptr;
202     }
203     if (!FilterSample(sample.get())) {
204       // Store in filtered_sample_set_ for use in other EntryT's callchain.
205       auto it = filtered_sample_set_.find(sample.get());
206       if (it != filtered_sample_set_.end()) {
207         return *it;
208       }
209       EntryT* result = sample.get();
210       filtered_sample_set_.insert(sample.get());
211       sample_storage_.push_back(std::move(sample));
212       return result;
213     }
214     UpdateSummary(sample.get());
215     EntryT* result;
216     auto it = sample_set_.find(sample.get());
217     if (it == sample_set_.end()) {
218       result = sample.get();
219       sample_set_.insert(sample.get());
220       sample_storage_.push_back(std::move(sample));
221     } else {
222       result = *it;
223       MergeSample(*it, sample.get());
224     }
225     return result;
226   }
227 
InsertCallChainSample(std::unique_ptr<EntryT> sample,const std::vector<EntryT * > & callchain)228   EntryT* InsertCallChainSample(std::unique_ptr<EntryT> sample,
229                                 const std::vector<EntryT*>& callchain) {
230     if (sample == nullptr) {
231       return nullptr;
232     }
233     auto it = sample_set_.find(sample.get());
234     if (it != sample_set_.end()) {
235       EntryT* sample = *it;
236       // Process only once for recursive function call.
237       if (std::find(callchain.begin(), callchain.end(), sample) != callchain.end()) {
238         return sample;
239       }
240     }
241     return InsertSample(std::move(sample));
242   }
243 
InsertCallChainForSample(EntryT * sample,const std::vector<EntryT * > & callchain,const AccumulateInfoT & acc_info)244   void InsertCallChainForSample(EntryT* sample, const std::vector<EntryT*>& callchain,
245                                 const AccumulateInfoT& acc_info) {
246     uint64_t period = GetPeriodForCallChain(acc_info);
247     sample->callchain.AddCallChain(callchain, period, [&](const EntryT* s1, const EntryT* s2) {
248       return sample_comparator_.IsSameSample(s1, s2);
249     });
250   }
251 
AddCallChainDuplicateInfo()252   void AddCallChainDuplicateInfo() {
253     if (build_callchain_) {
254       for (EntryT* sample : sample_set_) {
255         auto it = callchain_parent_map_.find(sample);
256         if (it != callchain_parent_map_.end() && !it->second.has_multiple_parents) {
257           sample->callchain.duplicated = true;
258         }
259       }
260     }
261   }
262 
263   std::set<EntryT*, SampleComparator<EntryT>> sample_set_;
264   bool accumulate_callchain_;
265 
266  private:
UpdateCallChainParentInfo(EntryT * sample,EntryT * parent)267   void UpdateCallChainParentInfo(EntryT* sample, EntryT* parent) {
268     if (parent == nullptr) {
269       return;
270     }
271     auto it = callchain_parent_map_.find(sample);
272     if (it == callchain_parent_map_.end()) {
273       CallChainParentInfo info;
274       info.parent = parent;
275       info.has_multiple_parents = false;
276       callchain_parent_map_[sample] = info;
277     } else if (it->second.parent != parent) {
278       it->second.has_multiple_parents = true;
279     }
280   }
281 
282   const SampleComparator<EntryT> sample_comparator_;
283   // If a Sample/CallChainSample is filtered out, it is stored in filtered_sample_set_,
284   // and only used in other EntryT's callchain.
285   std::set<EntryT*, SampleComparator<EntryT>> filtered_sample_set_;
286   std::vector<std::unique_ptr<EntryT>> sample_storage_;
287 
288   struct CallChainParentInfo {
289     EntryT* parent;
290     bool has_multiple_parents;
291   };
292   std::unordered_map<EntryT*, CallChainParentInfo> callchain_parent_map_;
293 
294   bool use_branch_address_;
295   bool build_callchain_;
296   bool use_caller_as_callchain_root_;
297   std::unique_ptr<OfflineUnwinder> offline_unwinder_;
298 };
299 
300 template <typename EntryT>
301 class SampleTreeSorter {
302  public:
SampleTreeSorter(SampleComparator<EntryT> comparator)303   explicit SampleTreeSorter(SampleComparator<EntryT> comparator) : comparator_(comparator) {}
304 
~SampleTreeSorter()305   virtual ~SampleTreeSorter() {}
306 
Sort(std::vector<EntryT * > & v,bool sort_callchain)307   void Sort(std::vector<EntryT*>& v, bool sort_callchain) {
308     if (sort_callchain) {
309       for (auto& sample : v) {
310         SortCallChain(sample);
311       }
312     }
313     if (!comparator_.empty()) {
314       std::sort(v.begin(), v.end(),
315                 [this](const EntryT* s1, const EntryT* s2) { return comparator_(s1, s2); });
316     }
317   }
318 
319  protected:
SortCallChain(EntryT * sample)320   void SortCallChain(EntryT* sample) { sample->callchain.SortByPeriod(); }
321 
322  private:
323   SampleComparator<EntryT> comparator_;
324 };
325 
326 template <typename EntryT, typename InfoT>
327 class SampleTreeDisplayer {
328  public:
SampleTreeDisplayer(SampleDisplayer<EntryT,InfoT> displayer)329   explicit SampleTreeDisplayer(SampleDisplayer<EntryT, InfoT> displayer) : displayer_(displayer) {}
330 
~SampleTreeDisplayer()331   virtual ~SampleTreeDisplayer() {}
332 
DisplaySamples(FILE * fp,const std::vector<EntryT * > & samples,const InfoT * info)333   void DisplaySamples(FILE* fp, const std::vector<EntryT*>& samples, const InfoT* info) {
334     displayer_.SetInfo(info);
335     for (const auto& sample : samples) {
336       displayer_.AdjustWidth(sample);
337     }
338     displayer_.PrintNames(fp);
339     for (const auto& sample : samples) {
340       displayer_.PrintSample(fp, sample);
341     }
342   }
343 
344  private:
345   SampleDisplayer<EntryT, InfoT> displayer_;
346 };
347 
348 }  // namespace simpleperf
349 
350 #endif  // SIMPLE_PERF_SAMPLE_TREE_H_
351