• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_PROFILING_MEMORY_BOOKKEEPING_H_
18 #define SRC_PROFILING_MEMORY_BOOKKEEPING_H_
19 
20 #include <functional>
21 #include <map>
22 #include <string>
23 #include <vector>
24 
25 #include "perfetto/base/lookup_set.h"
26 #include "perfetto/base/string_splitter.h"
27 #include "perfetto/base/time.h"
28 #include "perfetto/trace/profiling/profile_packet.pbzero.h"
29 #include "perfetto/trace/trace_packet.pbzero.h"
30 #include "perfetto/tracing/core/trace_writer.h"
31 #include "src/profiling/memory/interner.h"
32 #include "src/profiling/memory/unwound_messages.h"
33 
34 // Below is an illustration of the bookkeeping system state where
35 // PID 1 does the following allocations:
36 // 0x123: 128 bytes at [bar main]
37 // 0x234: 128 bytes at [bar main]
38 // 0xf00: 512 bytes at [foo main]
39 // PID 1 allocated but previously freed 1024 bytes at [bar main]
40 //
41 // PID 2 does the following allocations:
42 // 0x345: 512 bytes at [foo main]
43 // 0x456:  32 bytes at [foo main]
44 // PID 2 allocated but already freed 1235 bytes at [foo main]
45 // PID 2 allocated and freed 2048 bytes in main.
46 //
47 // +---------------------------------+   +-------------------+
48 // | +---------+    HeapTracker PID 1|   | GlobalCallstackTri|
49 // | |0x123 128+---+    +----------+ |   |           +---+   |
50 // | |         |   +---->alloc:1280+----------------->bar|   |
51 // | |0x234 128+---+    |free: 1024| |   |           +-^-+   |
52 // | |         |        +----------+ |   |   +---+     ^     |
53 // | |0xf00 512+---+                 | +----->foo|     |     |
54 // | +--------+|   |    +----------+ | | |   +-^-+     |     |
55 // |               +---->alloc: 512+---+ |     |       |     |
56 // |                    |free:    0| | | |     +--+----+     |
57 // |                    +----------+ | | |        |          |
58 // |                                 | | |      +-+--+       |
59 // +---------------------------------+ | |      |main|       |
60 //                                     | |      +--+-+       |
61 // +---------------------------------+ | |         ^         |
62 // | +---------+    HeapTracker PID 2| | +-------------------+
63 // | |0x345 512+---+    +----------+ | |           |
64 // | |         |   +---->alloc:1779+---+           |
65 // | |0x456  32+---+    |free: 1235| |             |
66 // | +---------+        +----------+ |             |
67 // |                                 |             |
68 // |                    +----------+ |             |
69 // |                    |alloc:2048+---------------+
70 // |                    |free: 2048| |
71 // |                    +----------+ |
72 // |                                 |
73 // +---------------------------------+
74 //   Allocation    CallstackAllocations        Node
75 //
76 // The active allocations are on the leftmost side, modeled as the class
77 // HeapTracker::Allocation.
78 //
79 // The total allocated and freed bytes per callsite are in the middle, modeled
80 // as the HeapTracker::CallstackAllocations class.
81 // Note that (1280 - 1024) = 256, so alloc - free is equal to the total of the
82 // currently active allocations.
83 // Note in PID 2 there is a CallstackAllocations with 2048 allocated and 2048
84 // freed bytes. This is not currently referenced by any Allocations (as it
85 // should, as 2048 - 2048 = 0, which would mean that the total size of the
86 // allocations referencing it should be 0). This is because we haven't dumped
87 // this state yet, so the CallstackAllocations will be kept around until the
88 // next dump, written to the trace, and then destroyed.
89 //
90 // On the right hand side is the GlobalCallstackTrie, with nodes representing
91 // distinct callstacks. They have no information about the currently allocated
92 // or freed bytes, they only contain a reference count to destroy them as
93 // soon as they are no longer referenced by a HeapTracker.
94 
95 namespace perfetto {
96 namespace profiling {
97 
98 class HeapTracker;
99 
100 struct Mapping {
MappingMapping101   Mapping(Interned<std::string> b) : build_id(std::move(b)) {}
102 
103   Interned<std::string> build_id;
104   uint64_t offset = 0;
105   uint64_t start = 0;
106   uint64_t end = 0;
107   uint64_t load_bias = 0;
108   std::vector<Interned<std::string>> path_components{};
109 
110   bool operator<(const Mapping& other) const {
111     return std::tie(build_id, offset, start, end, load_bias, path_components) <
112            std::tie(other.build_id, other.offset, other.start, other.end,
113                     other.load_bias, other.path_components);
114   }
115   bool operator==(const Mapping& other) const {
116     return std::tie(build_id, offset, start, end, load_bias, path_components) ==
117            std::tie(other.build_id, other.offset, other.start, other.end,
118                     other.load_bias, other.path_components);
119   }
120 };
121 
122 struct Frame {
FrameFrame123   Frame(Interned<Mapping> m, Interned<std::string> fn_name, uint64_t pc)
124       : mapping(m), function_name(fn_name), rel_pc(pc) {}
125   Interned<Mapping> mapping;
126   Interned<std::string> function_name;
127   uint64_t rel_pc;
128 
129   bool operator<(const Frame& other) const {
130     return std::tie(mapping, function_name, rel_pc) <
131            std::tie(other.mapping, other.function_name, other.rel_pc);
132   }
133 
134   bool operator==(const Frame& other) const {
135     return std::tie(mapping, function_name, rel_pc) ==
136            std::tie(other.mapping, other.function_name, other.rel_pc);
137   }
138 };
139 
140 // Graph of function callsites. This is shared between heap dumps for
141 // different processes. Each call site is represented by a
142 // GlobalCallstackTrie::Node that is owned by the parent (i.e. calling)
143 // callsite. It has a pointer to its parent, which means the function
144 // call-graph can be reconstructed from a GlobalCallstackTrie::Node by walking
145 // down the pointers to the parents.
146 class GlobalCallstackTrie {
147  public:
148   // Node in a tree of function traces that resulted in an allocation. For
149   // instance, if alloc_buf is called from foo and bar, which are called from
150   // main, the tree looks as following.
151   //
152   //            alloc_buf    alloc_buf
153   //                   |      |
154   //                  foo    bar
155   //                    \    /
156   //                      main
157   //                       |
158   //                   libc_init
159   //                       |
160   //                    [root_]
161   //
162   // allocations_ will hold a map from the pointers returned from malloc to
163   // alloc_buf to the leafs of this tree.
164   class Node {
165    public:
166     // This is opaque except to GlobalCallstackTrie.
167     friend class GlobalCallstackTrie;
168 
Node(Interned<Frame> frame)169     Node(Interned<Frame> frame) : Node(std::move(frame), nullptr) {}
Node(Interned<Frame> frame,Node * parent)170     Node(Interned<Frame> frame, Node* parent)
171         : parent_(parent), location_(std::move(frame)) {}
172 
id()173     uintptr_t id() const { return reinterpret_cast<uintptr_t>(this); }
174 
175    private:
176     Node* GetOrCreateChild(const Interned<Frame>& loc);
177 
178     uint64_t ref_count_ = 0;
179     Node* const parent_;
180     const Interned<Frame> location_;
181     base::LookupSet<Node, const Interned<Frame>, &Node::location_> children_;
182   };
183 
184   GlobalCallstackTrie() = default;
185   GlobalCallstackTrie(const GlobalCallstackTrie&) = delete;
186   GlobalCallstackTrie& operator=(const GlobalCallstackTrie&) = delete;
187 
188   Node* CreateCallsite(const std::vector<FrameData>& locs);
189   static void DecrementNode(Node* node);
190   static void IncrementNode(Node* node);
191 
192   std::vector<Interned<Frame>> BuildCallstack(const Node* node) const;
193 
194  private:
195   Interned<Frame> InternCodeLocation(const FrameData& loc);
196   Interned<Frame> MakeRootFrame();
197 
198   Interner<std::string> string_interner_;
199   Interner<Mapping> mapping_interner_;
200   Interner<Frame> frame_interner_;
201 
202   Node root_{MakeRootFrame()};
203 };
204 
205 struct DumpState {
DumpStateDumpState206   DumpState(TraceWriter* tw, uint64_t* ni) : trace_writer(tw), next_index(ni) {
207     last_written = trace_writer->written();
208 
209     current_trace_packet = trace_writer->NewTracePacket();
210     current_trace_packet->set_timestamp(
211         static_cast<uint64_t>(base::GetBootTimeNs().count()));
212     current_profile_packet = current_trace_packet->set_profile_packet();
213     current_profile_packet->set_index((*next_index)++);
214 
215     // Explicitly reserve intern ID 0 for the empty string, so unset string
216     // fields get mapped to this.
217     auto interned_string = current_profile_packet->add_strings();
218     constexpr const uint8_t kEmptyString[] = "";
219     interned_string->set_id(0);
220     interned_string->set_str(kEmptyString, 0);
221   }
222 
223   void WriteMap(const Interned<Mapping> map);
224   void WriteFrame(const Interned<Frame> frame);
225   void WriteString(const Interned<std::string>& str);
226 
227   std::set<InternID> dumped_strings;
228   std::set<InternID> dumped_frames;
229   std::set<InternID> dumped_mappings;
230 
231   std::set<GlobalCallstackTrie::Node*> callstacks_to_dump;
232 
233   TraceWriter* trace_writer;
234   protos::pbzero::ProfilePacket* current_profile_packet;
235   TraceWriter::TracePacketHandle current_trace_packet;
236   uint64_t* next_index;
237   uint64_t last_written = 0;
238 
NewProfilePacketDumpState239   void NewProfilePacket() {
240     PERFETTO_DLOG("New profile packet after %" PRIu64 " bytes. Total: %" PRIu64
241                   ", before %" PRIu64,
242                   trace_writer->written() - last_written,
243                   trace_writer->written(), last_written);
244     current_profile_packet->set_continued(true);
245     last_written = trace_writer->written();
246 
247     current_trace_packet->Finalize();
248     current_trace_packet = trace_writer->NewTracePacket();
249     current_trace_packet->set_timestamp(
250         static_cast<uint64_t>(base::GetBootTimeNs().count()));
251     current_profile_packet = current_trace_packet->set_profile_packet();
252     current_profile_packet->set_index((*next_index)++);
253   }
254 
currently_writtenDumpState255   uint64_t currently_written() {
256     return trace_writer->written() - last_written;
257   }
258 };
259 
260 // Snapshot for memory allocations of a particular process. Shares callsites
261 // with other processes.
262 class HeapTracker {
263  public:
264   // Caller needs to ensure that callsites outlives the HeapTracker.
HeapTracker(GlobalCallstackTrie * callsites)265   explicit HeapTracker(GlobalCallstackTrie* callsites)
266       : callsites_(callsites) {}
267 
268   void RecordMalloc(const std::vector<FrameData>& stack,
269                     uint64_t address,
270                     uint64_t size,
271                     uint64_t sequence_number,
272                     uint64_t timestamp);
273   void Dump(
274       std::function<void(protos::pbzero::ProfilePacket::ProcessHeapSamples*)>
275           fill_process_header,
276       DumpState* dump_state);
RecordFree(uint64_t address,uint64_t sequence_number,uint64_t timestamp)277   void RecordFree(uint64_t address,
278                   uint64_t sequence_number,
279                   uint64_t timestamp) {
280     RecordOperation(sequence_number, {address, timestamp});
281   }
282 
283   uint64_t GetSizeForTesting(const std::vector<FrameData>& stack);
GetTimestampForTesting()284   uint64_t GetTimestampForTesting() { return committed_timestamp_; }
285 
286  private:
287   // Sum of all the allocations for a given callstack.
288   struct CallstackAllocations {
CallstackAllocationsCallstackAllocations289     CallstackAllocations(GlobalCallstackTrie::Node* n) : node(n) {}
290 
291     uint64_t allocs = 0;
292 
293     uint64_t allocated = 0;
294     uint64_t freed = 0;
295     uint64_t allocation_count = 0;
296     uint64_t free_count = 0;
297 
298     GlobalCallstackTrie::Node* const node;
299 
~CallstackAllocationsCallstackAllocations300     ~CallstackAllocations() { GlobalCallstackTrie::DecrementNode(node); }
301 
302     bool operator<(const CallstackAllocations& other) const {
303       return node < other.node;
304     }
305   };
306 
307   struct Allocation {
AllocationAllocation308     Allocation(uint64_t size, uint64_t seq, CallstackAllocations* csa)
309         : total_size(size), sequence_number(seq), callstack_allocations(csa) {
310       callstack_allocations->allocs++;
311     }
312 
313     Allocation() = default;
314     Allocation(const Allocation&) = delete;
AllocationAllocation315     Allocation(Allocation&& other) noexcept {
316       total_size = other.total_size;
317       sequence_number = other.sequence_number;
318       callstack_allocations = other.callstack_allocations;
319       other.callstack_allocations = nullptr;
320     }
321 
AddToCallstackAllocationsAllocation322     void AddToCallstackAllocations() {
323       callstack_allocations->allocation_count++;
324       callstack_allocations->allocated += total_size;
325     }
326 
SubtractFromCallstackAllocationsAllocation327     void SubtractFromCallstackAllocations() {
328       callstack_allocations->free_count++;
329       callstack_allocations->freed += total_size;
330     }
331 
~AllocationAllocation332     ~Allocation() {
333       if (callstack_allocations)
334         callstack_allocations->allocs--;
335     }
336 
337     uint64_t total_size;
338     uint64_t sequence_number;
339     CallstackAllocations* callstack_allocations;
340   };
341 
342   struct PendingOperation {
343     uint64_t allocation_address;
344     uint64_t timestamp;
345   };
346 
MaybeCreateCallstackAllocations(GlobalCallstackTrie::Node * node)347   CallstackAllocations* MaybeCreateCallstackAllocations(
348       GlobalCallstackTrie::Node* node) {
349     auto callstack_allocations_it = callstack_allocations_.find(node);
350     if (callstack_allocations_it == callstack_allocations_.end()) {
351       GlobalCallstackTrie::IncrementNode(node);
352       bool inserted;
353       std::tie(callstack_allocations_it, inserted) =
354           callstack_allocations_.emplace(node, node);
355       PERFETTO_DCHECK(inserted);
356     }
357     return &callstack_allocations_it->second;
358   }
359 
360   void RecordOperation(uint64_t sequence_number,
361                        const PendingOperation& operation);
362 
363   // Commits a malloc or free operation.
364   // See comment of pending_operations_ for encoding of malloc and free
365   // operations.
366   //
367   // Committing a malloc operation: Add the allocations size to
368   // CallstackAllocation::allocated.
369   // Committing a free operation: Add the allocation's size to
370   // CallstackAllocation::freed and delete the allocation.
371   void CommitOperation(uint64_t sequence_number,
372                        const PendingOperation& operation);
373 
374   // We cannot use an interner here, because after the last allocation goes
375   // away, we still need to keep the CallstackAllocations around until the next
376   // dump.
377   std::map<GlobalCallstackTrie::Node*, CallstackAllocations>
378       callstack_allocations_;
379 
380   std::vector<std::pair<decltype(callstack_allocations_)::iterator, uint64_t>>
381       dead_callstack_allocations_;
382 
383   std::map<uint64_t /* allocation address */, Allocation> allocations_;
384 
385   // An operation is either a commit of an allocation or freeing of an
386   // allocation. An operation is a free if its seq_id is larger than
387   // the sequence_number of the corresponding allocation. It is a commit if its
388   // seq_id is equal to the sequence_number of the corresponding allocation.
389   //
390   // If its seq_id is less than the sequence_number of the corresponding
391   // allocation it could be either, but is ignored either way.
392   std::map<uint64_t /* seq_id */, PendingOperation /* allocation address */>
393       pending_operations_;
394 
395   uint64_t committed_timestamp_ = 0;
396   // The sequence number all mallocs and frees have been handled up to.
397   uint64_t committed_sequence_number_ = 0;
398   GlobalCallstackTrie* callsites_;
399 };
400 
401 }  // namespace profiling
402 }  // namespace perfetto
403 
404 namespace std {
405 template <>
406 struct hash<::perfetto::profiling::Mapping> {
407   using argument_type = ::perfetto::profiling::Mapping;
408   using result_type = size_t;
409   result_type operator()(const argument_type& mapping) {
410     size_t h =
411         std::hash<::perfetto::profiling::InternID>{}(mapping.build_id.id());
412     h ^= std::hash<uint64_t>{}(mapping.offset);
413     h ^= std::hash<uint64_t>{}(mapping.start);
414     h ^= std::hash<uint64_t>{}(mapping.end);
415     h ^= std::hash<uint64_t>{}(mapping.load_bias);
416     for (const auto& path : mapping.path_components)
417       h ^= std::hash<uint64_t>{}(path.id());
418     return h;
419   }
420 };
421 
422 template <>
423 struct hash<::perfetto::profiling::Frame> {
424   using argument_type = ::perfetto::profiling::Frame;
425   using result_type = size_t;
426   result_type operator()(const argument_type& frame) {
427     size_t h = std::hash<::perfetto::profiling::InternID>{}(frame.mapping.id());
428     h ^= std::hash<::perfetto::profiling::InternID>{}(frame.function_name.id());
429     h ^= std::hash<uint64_t>{}(frame.rel_pc);
430     return h;
431   }
432 };
433 }  // namespace std
434 
435 #endif  // SRC_PROFILING_MEMORY_BOOKKEEPING_H_
436