• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_UTIL_PROFILE_BUILDER_H_
18 #define SRC_TRACE_PROCESSOR_UTIL_PROFILE_BUILDER_H_
19 
20 #include <optional>
21 
22 #include "perfetto/ext/base/string_view.h"
23 #include "perfetto/protozero/packed_repeated_fields.h"
24 #include "perfetto/protozero/scattered_heap_buffer.h"
25 #include "protos/perfetto/trace_processor/stack.pbzero.h"
26 #include "protos/third_party/pprof/profile.pbzero.h"
27 #include "src/trace_processor/containers/string_pool.h"
28 #include "src/trace_processor/storage/trace_storage.h"
29 #include "src/trace_processor/tables/profiler_tables_py.h"
30 #include "src/trace_processor/util/annotated_callsites.h"
31 
32 #include <algorithm>
33 #include <cstdint>
34 #include <functional>
35 #include <unordered_map>
36 #include <vector>
37 
38 namespace perfetto {
39 namespace trace_processor {
40 
41 class TraceProcessorContext;
42 
43 // Builds a |perftools.profiles.Profile| proto.
44 class GProfileBuilder {
45  public:
46   struct ValueType {
47     std::string type;
48     std::string unit;
49   };
50 
51   // |sample_types| A description of the values stored with each sample.
52   // |annotated| Whether to annotate callstack frames.
53   //
54   // Important: Annotations might interfere with certain aggregations, as we
55   // will could have a frame that is annotated with different annotations. That
56   // will lead to multiple functions being generated (sane name, line etc, but
57   // different annotation). Since there is no field in a Profile proto to track
58   // these annotations we extend the function name (my_func [annotation]), so
59   // from pprof perspective we now have different functions. So in flame graphs
60   // for example you will have one separate slice for each of these same
61   // functions with different annotations.
62   GProfileBuilder(const TraceProcessorContext* context,
63                   const std::vector<ValueType>& sample_types);
64   ~GProfileBuilder();
65 
66   // Returns false if the operation fails (e.g callsite_id was not found)
67   bool AddSample(const protos::pbzero::Stack_Decoder& stack,
68                  const protozero::PackedVarInt& values);
69 
70   // Finalizes the profile and returns the serialized proto. Can be called
71   // multiple times but after the first invocation `AddSample` calls will have
72   // no effect.
73   std::string Build();
74 
75  private:
76   static constexpr int64_t kEmptyStringIndex = 0;
77   static constexpr uint64_t kNullFunctionId = 0;
78 
79   // Strings are stored in the `Profile` in a table and referenced by their
80   // index. This helper class takes care of all the book keeping.
81   // `TraceProcessor` uses its own `StringPool` for strings. This helper
82   // provides convenient ways of dealing with `StringPool::Id` values instead of
83   // actual string. This class ensures that two equal strings will have the same
84   // index, so you can compare them instead of the actual strings.
85   class StringTable {
86    public:
87     // |result| This is the `Profile` proto we are building. Strings will be
88     // added to it as necessary. |string_pool| `StringPool` to quey for strings
89     // passed as `StringPool:Id`
90     StringTable(protozero::HeapBuffered<
91                     third_party::perftools::profiles::pbzero::Profile>* result,
92                 const StringPool* string_pool);
93 
94     // Adds the given string to the table, if not currently present, and returns
95     // the index to it. Might write data to the infligt `Profile` so it should
96     // not be called while in the middle of writing a message to the proto.
97     int64_t InternString(base::StringView str);
98     // Adds a string stored in the `TraceProcessor` `StringPool` to the table,
99     // if not currently present, and returns the index to it. Might write data
100     // to the inflight `Profile` so it should not be called while in the middle
101     // of writing a message to the proto.
102     int64_t InternString(StringPool::Id id);
103 
104     int64_t GetAnnotatedString(StringPool::Id str,
105                                CallsiteAnnotation annotation);
106     int64_t GetAnnotatedString(base::StringView str,
107                                CallsiteAnnotation annotation);
108 
109    private:
110     // Unconditionally writes the given string to the table and returns its
111     // index.
112     int64_t WriteString(base::StringView str);
113 
114     const StringPool& string_pool_;
115     protozero::HeapBuffered<third_party::perftools::profiles::pbzero::Profile>&
116         result_;
117 
118     std::unordered_map<StringPool::Id, int64_t> seen_string_pool_ids_;
119     // Maps strings (hashes thereof) to indexes in the table.
120     std::unordered_map<uint64_t, int64_t> seen_strings_;
121     // Index where the next string will be written to
122     int64_t next_index_{0};
123   };
124 
125   struct AnnotatedFrameId {
126     struct Hash {
operatorAnnotatedFrameId::Hash127       size_t operator()(const AnnotatedFrameId& id) const {
128         return static_cast<size_t>(perfetto::base::Hasher::Combine(
129             id.frame_id.value, static_cast<int>(id.annotation)));
130       }
131     };
132 
133     FrameId frame_id;
134     CallsiteAnnotation annotation;
135 
136     bool operator==(const AnnotatedFrameId& other) const {
137       return frame_id == other.frame_id && annotation == other.annotation;
138     }
139   };
140 
141   struct Line {
142     uint64_t function_id;
143     int64_t line;
144     bool operator==(const Line& other) const {
145       return function_id == other.function_id && line == other.line;
146     }
147   };
148 
149   // Location, MappingKey, Mapping, Function, and Line are helper structs to
150   // deduplicate entities. We do not write these directly to the proto Profile
151   // but instead stage them and write them out during `Finalize`. Samples on the
152   // other hand are directly written to the proto.
153 
154   struct Location {
155     struct Hash {
operatorLocation::Hash156       size_t operator()(const Location& loc) const {
157         perfetto::base::Hasher hasher;
158         hasher.UpdateAll(loc.mapping_id, loc.rel_pc, loc.lines.size());
159         for (const auto& line : loc.lines) {
160           hasher.UpdateAll(line.function_id, line.line);
161         }
162         return static_cast<size_t>(hasher.digest());
163       }
164     };
165 
166     uint64_t mapping_id;
167     uint64_t rel_pc;
168     std::vector<Line> lines;
169 
170     bool operator==(const Location& other) const {
171       return mapping_id == other.mapping_id && rel_pc == other.rel_pc &&
172              lines == other.lines;
173     }
174   };
175 
176   // Mappings are tricky. We could have samples for different processes and
177   // given address space layout randomization the same mapping could be located
178   // at different addresses. MappingKey has the set of properties that uniquely
179   // identify mapping in order to deduplicate rows in the stack_profile_mapping
180   // table.
181   struct MappingKey {
182     struct Hash {
operatorMappingKey::Hash183       size_t operator()(const MappingKey& mapping) const {
184         perfetto::base::Hasher hasher;
185         hasher.UpdateAll(mapping.size, mapping.file_offset,
186                          mapping.build_id_or_filename);
187         return static_cast<size_t>(hasher.digest());
188       }
189     };
190 
191     explicit MappingKey(
192         const tables::StackProfileMappingTable::ConstRowReference& mapping,
193         StringTable& string_table);
194 
195     bool operator==(const MappingKey& other) const {
196       return size == other.size && file_offset == other.file_offset &&
197              build_id_or_filename == other.build_id_or_filename;
198     }
199 
200     uint64_t size;
201     uint64_t file_offset;
202     int64_t build_id_or_filename;
203   };
204 
205   // Keeps track of what debug information is available for a mapping.
206   // TODO(carlscab): We could be a bit more "clever" here. Currently if there is
207   // debug info for at least one frame we flag the mapping as having debug info.
208   // We could use some heuristic instead, e.g. if x% for frames have the info
209   // etc.
210   struct DebugInfo {
211     bool has_functions{false};
212     bool has_filenames{false};
213     bool has_line_numbers{false};
214     bool has_inline_frames{false};
215   };
216 
217   struct Mapping {
218     explicit Mapping(
219         const tables::StackProfileMappingTable::ConstRowReference& mapping,
220         const StringPool& string_pool,
221         StringTable& string_table);
222 
223     // Heuristic to determine if this maps to the main binary. Bigger scores
224     // mean higher likelihood.
225     int64_t ComputeMainBinaryScore() const;
226 
227     const uint64_t memory_start;
228     const uint64_t memory_limit;
229     const uint64_t file_offset;
230     const int64_t filename;
231     const int64_t build_id;
232 
233     const std::string filename_str;
234 
235     DebugInfo debug_info;
236   };
237 
238   struct Function {
239     struct Hash {
operatorFunction::Hash240       size_t operator()(const Function& func) const {
241         return static_cast<size_t>(perfetto::base::Hasher::Combine(
242             func.name, func.system_name, func.filename));
243       }
244     };
245 
246     int64_t name;
247     int64_t system_name;
248     int64_t filename;
249 
250     bool operator==(const Function& other) const {
251       return name == other.name && system_name == other.system_name &&
252              filename == other.filename;
253     }
254   };
255 
256   const protozero::PackedVarInt& GetLocationIdsForCallsite(
257       const CallsiteId& callsite_id,
258       bool annotated);
259 
260   std::vector<Line> GetLinesForSymbolSetId(
261       std::optional<uint32_t> symbol_set_id,
262       CallsiteAnnotation annotation,
263       uint64_t mapping_id);
264 
265   std::vector<Line> GetLines(
266       const tables::StackProfileFrameTable::ConstRowReference& frame,
267       CallsiteAnnotation annotation,
268       uint64_t mapping_id);
269 
270   int64_t GetNameForFrame(
271       const tables::StackProfileFrameTable::ConstRowReference& frame,
272       CallsiteAnnotation annotation);
273 
274   int64_t GetSystemNameForFrame(
275       const tables::StackProfileFrameTable::ConstRowReference& frame);
276 
277   uint64_t WriteLocationIfNeeded(FrameId frame_id,
278                                  CallsiteAnnotation annotation);
279   uint64_t WriteFakeLocationIfNeeded(const std::string& name);
280 
281   uint64_t WriteFunctionIfNeeded(
282       const tables::SymbolTable::ConstRowReference& symbol,
283       CallsiteAnnotation annotation,
284       uint64_t mapping_id);
285 
286   uint64_t WriteFunctionIfNeeded(
287       const tables::StackProfileFrameTable::ConstRowReference& frame,
288       CallsiteAnnotation annotation,
289       uint64_t mapping_id);
290 
291   uint64_t WriteFakeFunctionIfNeeded(int64_t name_id);
292 
293   uint64_t WriteMappingIfNeeded(
294       const tables::StackProfileMappingTable::ConstRowReference& mapping);
295   void WriteMappings();
296   void WriteMapping(uint64_t mapping_id);
297   void WriteFunctions();
298   void WriteLocations();
299 
300   void WriteSampleTypes(const std::vector<ValueType>& sample_types);
301 
302   void Finalize();
303 
GetMapping(uint64_t mapping_id)304   Mapping& GetMapping(uint64_t mapping_id) {
305     return mappings_[static_cast<size_t>(mapping_id - 1)];
306   }
307 
308   // Goes over the list of staged mappings and tries to determine which is the
309   // most likely main binary.
310   std::optional<uint64_t> GuessMainBinary() const;
311 
312   bool AddSample(const protozero::PackedVarInt& location_ids,
313                  const protozero::PackedVarInt& values);
314 
315   // Profile proto being serialized.
316   protozero::HeapBuffered<third_party::perftools::profiles::pbzero::Profile>
317       result_;
318 
319   const TraceProcessorContext& context_;
320   StringTable string_table_;
321 
322   bool finalized_{false};
323   AnnotatedCallsites annotations_;
324 
325   // Caches a (possibly annotated) CallsiteId (callstack) to the list of
326   // locations emitted to the profile.
327   struct MaybeAnnotatedCallsiteId {
328     struct Hash {
operatorMaybeAnnotatedCallsiteId::Hash329       size_t operator()(const MaybeAnnotatedCallsiteId& id) const {
330         return static_cast<size_t>(
331             perfetto::base::Hasher::Combine(id.callsite_id.value, id.annotate));
332       }
333     };
334 
335     CallsiteId callsite_id;
336     bool annotate;
337 
338     bool operator==(const MaybeAnnotatedCallsiteId& other) const {
339       return callsite_id == other.callsite_id && annotate == other.annotate;
340     }
341   };
342   std::unordered_map<MaybeAnnotatedCallsiteId,
343                      protozero::PackedVarInt,
344                      MaybeAnnotatedCallsiteId::Hash>
345       cached_location_ids_;
346 
347   // Helpers to map TraceProcessor rows to already written Profile entities
348   // (their ids).
349   std::unordered_map<AnnotatedFrameId, uint64_t, AnnotatedFrameId::Hash>
350       seen_locations_;
351   std::unordered_map<AnnotatedFrameId, uint64_t, AnnotatedFrameId::Hash>
352       seen_functions_;
353   std::unordered_map<MappingId, uint64_t> seen_mappings_;
354   std::unordered_map<int64_t, uint64_t> seen_fake_locations_;
355 
356   // Helpers to deduplicate entries. Map entity to its id. These also serve as a
357   // staging area until written out to the profile proto during `Finalize`. Ids
358   // are consecutive integers starting at 1. (Ids with value 0 are not allowed).
359   // Ids are not unique across entities (i.e. there can be a mapping_id = 1 and
360   // a function_id = 1)
361   std::unordered_map<Location, uint64_t, Location::Hash> locations_;
362   std::unordered_map<MappingKey, uint64_t, MappingKey::Hash> mapping_keys_;
363   std::unordered_map<Function, uint64_t, Function::Hash> functions_;
364   // Staging area for Mappings. mapping_id - 1 = index in the vector.
365   std::vector<Mapping> mappings_;
366 };
367 
368 }  // namespace trace_processor
369 }  // namespace perfetto
370 
371 #endif  // SRC_TRACE_PROCESSOR_UTIL_PROFILE_BUILDER_H_
372