• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_PROFILING_MEMORY_BOOKKEEPING_H_
18 #define SRC_PROFILING_MEMORY_BOOKKEEPING_H_
19 
20 #include <map>
21 #include <vector>
22 
23 #include "perfetto/base/time.h"
24 #include "src/profiling/common/callstack_trie.h"
25 #include "src/profiling/common/interner.h"
26 #include "src/profiling/memory/unwound_messages.h"
27 
28 // Below is an illustration of the bookkeeping system state where
29 // PID 1 does the following allocations:
30 // 0x123: 128 bytes at [bar main]
31 // 0x234: 128 bytes at [bar main]
32 // 0xf00: 512 bytes at [foo main]
33 // PID 1 allocated but previously freed 1024 bytes at [bar main]
34 //
35 // PID 2 does the following allocations:
36 // 0x345: 512 bytes at [foo main]
37 // 0x456:  32 bytes at [foo main]
38 // PID 2 allocated but already freed 1235 bytes at [foo main]
39 // PID 2 allocated and freed 2048 bytes in main.
40 //
41 // +---------------------------------+   +-------------------+
42 // | +---------+    HeapTracker PID 1|   | GlobalCallstackTri|
43 // | |0x123 128+---+    +----------+ |   |           +---+   |
44 // | |         |   +---->alloc:1280+----------------->bar|   |
45 // | |0x234 128+---+    |free: 1024| |   |           +-^-+   |
46 // | |         |        +----------+ |   |   +---+     ^     |
47 // | |0xf00 512+---+                 | +----->foo|     |     |
48 // | +--------+|   |    +----------+ | | |   +-^-+     |     |
49 // |               +---->alloc: 512+---+ |     |       |     |
50 // |                    |free:    0| | | |     +--+----+     |
51 // |                    +----------+ | | |        |          |
52 // |                                 | | |      +-+--+       |
53 // +---------------------------------+ | |      |main|       |
54 //                                     | |      +--+-+       |
55 // +---------------------------------+ | |         ^         |
56 // | +---------+    HeapTracker PID 2| | +-------------------+
57 // | |0x345 512+---+    +----------+ | |           |
58 // | |         |   +---->alloc:1779+---+           |
59 // | |0x456  32+---+    |free: 1235| |             |
60 // | +---------+        +----------+ |             |
61 // |                                 |             |
62 // |                    +----------+ |             |
63 // |                    |alloc:2048+---------------+
64 // |                    |free: 2048| |
65 // |                    +----------+ |
66 // |                                 |
67 // +---------------------------------+
68 //   Allocation    CallstackAllocations        Node
69 //
70 // The active allocations are on the leftmost side, modeled as the class
71 // HeapTracker::Allocation.
72 //
73 // The total allocated and freed bytes per callsite are in the middle, modeled
74 // as the HeapTracker::CallstackAllocations class.
75 // Note that (1280 - 1024) = 256, so alloc - free is equal to the total of the
76 // currently active allocations.
77 // Note in PID 2 there is a CallstackAllocations with 2048 allocated and 2048
78 // freed bytes. This is not currently referenced by any Allocations (as it
79 // should, as 2048 - 2048 = 0, which would mean that the total size of the
80 // allocations referencing it should be 0). This is because we haven't dumped
81 // this state yet, so the CallstackAllocations will be kept around until the
82 // next dump, written to the trace, and then destroyed.
83 //
84 // On the right hand side is the GlobalCallstackTrie, with nodes representing
85 // distinct callstacks. They have no information about the currently allocated
86 // or freed bytes, they only contain a reference count to destroy them as
87 // soon as they are no longer referenced by a HeapTracker.
88 
89 namespace perfetto {
90 namespace profiling {
91 
92 // Snapshot for memory allocations of a particular process. Shares callsites
93 // with other processes.
94 class HeapTracker {
95  public:
96   struct CallstackMaxAllocations {
97     uint64_t max;
98     uint64_t cur;
99     uint64_t max_count;
100     uint64_t cur_count;
101   };
102   struct CallstackTotalAllocations {
103     uint64_t allocated;
104     uint64_t freed;
105     uint64_t allocation_count;
106     uint64_t free_count;
107   };
108 
109   // Sum of all the allocations for a given callstack.
110   struct CallstackAllocations {
CallstackAllocationsCallstackAllocations111     explicit CallstackAllocations(GlobalCallstackTrie::Node* n) : node(n) {}
112 
113     uint64_t allocs = 0;
114 
115     union {
116       CallstackMaxAllocations retain_max;
117       CallstackTotalAllocations totals;
118     } value = {};
119 
120     GlobalCallstackTrie::Node* const node;
121 
~CallstackAllocationsCallstackAllocations122     ~CallstackAllocations() { GlobalCallstackTrie::DecrementNode(node); }
123 
124     bool operator<(const CallstackAllocations& other) const {
125       return node < other.node;
126     }
127   };
128 
129   // Caller needs to ensure that callsites outlives the HeapTracker.
HeapTracker(GlobalCallstackTrie * callsites,bool dump_at_max_mode)130   explicit HeapTracker(GlobalCallstackTrie* callsites, bool dump_at_max_mode)
131       : callsites_(callsites), dump_at_max_mode_(dump_at_max_mode) {}
132 
133   void RecordMalloc(const std::vector<unwindstack::FrameData>& callstack,
134                     const std::vector<std::string>& build_ids,
135                     uint64_t address,
136                     uint64_t sample_size,
137                     uint64_t alloc_size,
138                     uint64_t sequence_number,
139                     uint64_t timestamp);
140 
141   template <typename F>
GetCallstackAllocations(F fn)142   void GetCallstackAllocations(F fn) {
143     // There are two reasons we remove the unused callstack allocations on the
144     // next iteration of Dump:
145     // * We need to remove them after the callstacks were dumped, which
146     //   currently happens after the allocations are dumped.
147     // * This way, we do not destroy and recreate callstacks as frequently.
148     for (auto it_and_alloc : dead_callstack_allocations_) {
149       auto& it = it_and_alloc.first;
150       uint64_t allocated = it_and_alloc.second;
151       const CallstackAllocations& alloc = it->second;
152       // For non-dump-at-max, we need to check, even if there are still no
153       // allocations referencing this callstack, whether there were any
154       // allocations that happened but were freed again. If that was the case,
155       // we need to keep the callsite, because the next dump will indicate a
156       // different self_alloc and self_freed.
157       if (alloc.allocs == 0 &&
158           (dump_at_max_mode_ ||
159            alloc.value.totals.allocation_count == allocated)) {
160         // TODO(fmayer): We could probably be smarter than throw away
161         // our whole frames cache.
162         ClearFrameCache();
163         callstack_allocations_.erase(it);
164       }
165     }
166     dead_callstack_allocations_.clear();
167 
168     for (auto it = callstack_allocations_.begin();
169          it != callstack_allocations_.end(); ++it) {
170       const CallstackAllocations& alloc = it->second;
171       fn(alloc);
172 
173       if (alloc.allocs == 0)
174         dead_callstack_allocations_.emplace_back(
175             it, !dump_at_max_mode_ ? alloc.value.totals.allocation_count : 0);
176     }
177   }
178 
179   template <typename F>
GetAllocations(F fn)180   void GetAllocations(F fn) {
181     for (const auto& addr_and_allocation : allocations_) {
182       const Allocation& alloc = addr_and_allocation.second;
183       fn(addr_and_allocation.first, alloc.sample_size, alloc.alloc_size,
184          alloc.callstack_allocations()->node->id());
185     }
186   }
187 
RecordFree(uint64_t address,uint64_t sequence_number,uint64_t timestamp)188   void RecordFree(uint64_t address,
189                   uint64_t sequence_number,
190                   uint64_t timestamp) {
191     RecordOperation(sequence_number, {address, timestamp});
192   }
193 
ClearFrameCache()194   void ClearFrameCache() { frame_cache_.clear(); }
195 
dump_timestamp()196   uint64_t dump_timestamp() {
197     return dump_at_max_mode_ ? max_timestamp_ : committed_timestamp_;
198   }
199 
200   uint64_t GetSizeForTesting(const std::vector<unwindstack::FrameData>& stack,
201                              std::vector<std::string> build_ids);
202   uint64_t GetMaxForTesting(const std::vector<unwindstack::FrameData>& stack,
203                             std::vector<std::string> build_ids);
204   uint64_t GetMaxCountForTesting(
205       const std::vector<unwindstack::FrameData>& stack,
206       std::vector<std::string> build_ids);
207 
GetTimestampForTesting()208   uint64_t GetTimestampForTesting() { return committed_timestamp_; }
209 
210  private:
211   struct Allocation {
AllocationAllocation212     Allocation(uint64_t size,
213                uint64_t asize,
214                uint64_t seq,
215                CallstackAllocations* csa)
216         : sample_size(size), alloc_size(asize), sequence_number(seq) {
217       SetCallstackAllocations(csa);
218     }
219 
220     Allocation() = default;
221     Allocation(const Allocation&) = delete;
AllocationAllocation222     Allocation(Allocation&& other) noexcept {
223       sample_size = other.sample_size;
224       alloc_size = other.alloc_size;
225       sequence_number = other.sequence_number;
226       callstack_allocations_ = other.callstack_allocations_;
227       other.callstack_allocations_ = nullptr;
228     }
229 
~AllocationAllocation230     ~Allocation() { SetCallstackAllocations(nullptr); }
231 
SetCallstackAllocationsAllocation232     void SetCallstackAllocations(CallstackAllocations* callstack_allocations) {
233       if (callstack_allocations_)
234         callstack_allocations_->allocs--;
235       callstack_allocations_ = callstack_allocations;
236       if (callstack_allocations_)
237         callstack_allocations_->allocs++;
238     }
239 
callstack_allocationsAllocation240     CallstackAllocations* callstack_allocations() const {
241       return callstack_allocations_;
242     }
243 
244     uint64_t sample_size;
245     uint64_t alloc_size;
246     uint64_t sequence_number;
247 
248    private:
249     CallstackAllocations* callstack_allocations_ = nullptr;
250   };
251 
252   struct PendingOperation {
253     uint64_t allocation_address;
254     uint64_t timestamp;
255   };
256 
MaybeCreateCallstackAllocations(GlobalCallstackTrie::Node * node)257   CallstackAllocations* MaybeCreateCallstackAllocations(
258       GlobalCallstackTrie::Node* node) {
259     auto callstack_allocations_it = callstack_allocations_.find(node);
260     if (callstack_allocations_it == callstack_allocations_.end()) {
261       GlobalCallstackTrie::IncrementNode(node);
262       bool inserted;
263       std::tie(callstack_allocations_it, inserted) =
264           callstack_allocations_.emplace(node, node);
265       PERFETTO_DCHECK(inserted);
266     }
267     return &callstack_allocations_it->second;
268   }
269 
270   void RecordOperation(uint64_t sequence_number,
271                        const PendingOperation& operation);
272 
273   // Commits a malloc or free operation.
274   // See comment of pending_operations_ for encoding of malloc and free
275   // operations.
276   //
277   // Committing a malloc operation: Add the allocations size to
278   // CallstackAllocation::allocated.
279   // Committing a free operation: Add the allocation's size to
280   // CallstackAllocation::freed and delete the allocation.
281   void CommitOperation(uint64_t sequence_number,
282                        const PendingOperation& operation);
283 
AddToCallstackAllocations(uint64_t ts,const Allocation & alloc)284   void AddToCallstackAllocations(uint64_t ts, const Allocation& alloc) {
285     if (dump_at_max_mode_) {
286       current_unfreed_ += alloc.sample_size;
287       alloc.callstack_allocations()->value.retain_max.cur += alloc.sample_size;
288       alloc.callstack_allocations()->value.retain_max.cur_count++;
289 
290       if (current_unfreed_ <= max_unfreed_)
291         return;
292 
293       if (max_sequence_number_ == alloc.sequence_number - 1) {
294         // We know the only CallstackAllocation that has max != cur is the
295         // one we just updated.
296         alloc.callstack_allocations()->value.retain_max.max =
297             alloc.callstack_allocations()->value.retain_max.cur;
298         alloc.callstack_allocations()->value.retain_max.max_count =
299             alloc.callstack_allocations()->value.retain_max.cur_count;
300       } else {
301         for (auto& p : callstack_allocations_) {
302           // We need to reset max = cur for every CallstackAllocation, as we
303           // do not know which ones have changed since the last max.
304           // TODO(fmayer): Add an index to speed this up
305           CallstackAllocations& csa = p.second;
306           csa.value.retain_max.max = csa.value.retain_max.cur;
307           csa.value.retain_max.max_count = csa.value.retain_max.cur_count;
308         }
309       }
310       max_sequence_number_ = alloc.sequence_number;
311       max_unfreed_ = current_unfreed_;
312       max_timestamp_ = ts;
313     } else {
314       alloc.callstack_allocations()->value.totals.allocated +=
315           alloc.sample_size;
316       alloc.callstack_allocations()->value.totals.allocation_count++;
317     }
318   }
319 
SubtractFromCallstackAllocations(const Allocation & alloc)320   void SubtractFromCallstackAllocations(const Allocation& alloc) {
321     if (dump_at_max_mode_) {
322       current_unfreed_ -= alloc.sample_size;
323       alloc.callstack_allocations()->value.retain_max.cur -= alloc.sample_size;
324       alloc.callstack_allocations()->value.retain_max.cur_count--;
325     } else {
326       alloc.callstack_allocations()->value.totals.freed += alloc.sample_size;
327       alloc.callstack_allocations()->value.totals.free_count++;
328     }
329   }
330 
331   // We cannot use an interner here, because after the last allocation goes
332   // away, we still need to keep the CallstackAllocations around until the next
333   // dump.
334   std::map<GlobalCallstackTrie::Node*, CallstackAllocations>
335       callstack_allocations_;
336 
337   std::vector<std::pair<decltype(callstack_allocations_)::iterator, uint64_t>>
338       dead_callstack_allocations_;
339 
340   std::map<uint64_t /* allocation address */, Allocation> allocations_;
341 
342   // An operation is either a commit of an allocation or freeing of an
343   // allocation. An operation is a free if its seq_id is larger than
344   // the sequence_number of the corresponding allocation. It is a commit if its
345   // seq_id is equal to the sequence_number of the corresponding allocation.
346   //
347   // If its seq_id is less than the sequence_number of the corresponding
348   // allocation it could be either, but is ignored either way.
349   std::map<uint64_t /* seq_id */, PendingOperation /* allocation address */>
350       pending_operations_;
351 
352   uint64_t committed_timestamp_ = 0;
353   // The sequence number all mallocs and frees have been handled up to.
354   uint64_t committed_sequence_number_ = 0;
355   GlobalCallstackTrie* callsites_;
356 
357   const bool dump_at_max_mode_ = false;
358   // The following members are only used if dump_at_max_mode_ == true.
359   uint64_t max_sequence_number_ = 0;
360   uint64_t current_unfreed_ = 0;
361   uint64_t max_unfreed_ = 0;
362   uint64_t max_timestamp_ = 0;
363 
364   // We index by abspc, which is unique as long as the maps do not change.
365   // This is why we ClearFrameCache after we reparsed maps.
366   std::unordered_map<uint64_t /* abs pc */, Interned<Frame>> frame_cache_;
367 };
368 
369 }  // namespace profiling
370 }  // namespace perfetto
371 
372 #endif  // SRC_PROFILING_MEMORY_BOOKKEEPING_H_
373