• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_PROFILING_MEMORY_BOOKKEEPING_H_
18 #define SRC_PROFILING_MEMORY_BOOKKEEPING_H_
19 
20 #include <map>
21 #include <vector>
22 
23 #include "perfetto/base/time.h"
24 #include "src/profiling/common/callstack_trie.h"
25 #include "src/profiling/common/interner.h"
26 #include "src/profiling/memory/unwound_messages.h"
27 
28 // Below is an illustration of the bookkeeping system state where
29 // PID 1 does the following allocations:
30 // 0x123: 128 bytes at [bar main]
31 // 0x234: 128 bytes at [bar main]
32 // 0xf00: 512 bytes at [foo main]
33 // PID 1 allocated but previously freed 1024 bytes at [bar main]
34 //
35 // PID 2 does the following allocations:
36 // 0x345: 512 bytes at [foo main]
37 // 0x456:  32 bytes at [foo main]
38 // PID 2 allocated but already freed 1235 bytes at [foo main]
39 // PID 2 allocated and freed 2048 bytes in main.
40 //
41 // +---------------------------------+   +-------------------+
42 // | +---------+    HeapTracker PID 1|   | GlobalCallstackTri|
43 // | |0x123 128+---+    +----------+ |   |           +---+   |
44 // | |         |   +---->alloc:1280+----------------->bar|   |
45 // | |0x234 128+---+    |free: 1024| |   |           +-^-+   |
46 // | |         |        +----------+ |   |   +---+     ^     |
47 // | |0xf00 512+---+                 | +----->foo|     |     |
48 // | +--------+|   |    +----------+ | | |   +-^-+     |     |
49 // |               +---->alloc: 512+---+ |     |       |     |
50 // |                    |free:    0| | | |     +--+----+     |
51 // |                    +----------+ | | |        |          |
52 // |                                 | | |      +-+--+       |
53 // +---------------------------------+ | |      |main|       |
54 //                                     | |      +--+-+       |
55 // +---------------------------------+ | |         ^         |
56 // | +---------+    HeapTracker PID 2| | +-------------------+
57 // | |0x345 512+---+    +----------+ | |           |
58 // | |         |   +---->alloc:1779+---+           |
59 // | |0x456  32+---+    |free: 1235| |             |
60 // | +---------+        +----------+ |             |
61 // |                                 |             |
62 // |                    +----------+ |             |
63 // |                    |alloc:2048+---------------+
64 // |                    |free: 2048| |
65 // |                    +----------+ |
66 // |                                 |
67 // +---------------------------------+
68 //   Allocation    CallstackAllocations        Node
69 //
70 // The active allocations are on the leftmost side, modeled as the class
71 // HeapTracker::Allocation.
72 //
73 // The total allocated and freed bytes per callsite are in the middle, modeled
74 // as the HeapTracker::CallstackAllocations class.
75 // Note that (1280 - 1024) = 256, so alloc - free is equal to the total of the
76 // currently active allocations.
77 // Note in PID 2 there is a CallstackAllocations with 2048 allocated and 2048
78 // freed bytes. This is not currently referenced by any Allocations (as it
79 // should, as 2048 - 2048 = 0, which would mean that the total size of the
80 // allocations referencing it should be 0). This is because we haven't dumped
81 // this state yet, so the CallstackAllocations will be kept around until the
82 // next dump, written to the trace, and then destroyed.
83 //
84 // On the right hand side is the GlobalCallstackTrie, with nodes representing
85 // distinct callstacks. They have no information about the currently allocated
86 // or freed bytes, they only contain a reference count to destroy them as
87 // soon as they are no longer referenced by a HeapTracker.
88 
89 namespace perfetto {
90 namespace profiling {
91 
92 // Snapshot for memory allocations of a particular process. Shares callsites
93 // with other processes.
94 class HeapTracker {
95  public:
96   struct CallstackMaxAllocations {
97     uint64_t max;
98     uint64_t cur;
99   };
100   struct CallstackTotalAllocations {
101     uint64_t allocated;
102     uint64_t freed;
103   };
104 
105   // Sum of all the allocations for a given callstack.
106   struct CallstackAllocations {
CallstackAllocationsCallstackAllocations107     CallstackAllocations(GlobalCallstackTrie::Node* n) : node(n) {}
108 
109     uint64_t allocs = 0;
110     uint64_t allocation_count = 0;
111     uint64_t free_count = 0;
112     union {
113       CallstackMaxAllocations retain_max;
114       CallstackTotalAllocations totals;
115     } value = {};
116 
117     GlobalCallstackTrie::Node* const node;
118 
~CallstackAllocationsCallstackAllocations119     ~CallstackAllocations() { GlobalCallstackTrie::DecrementNode(node); }
120 
121     bool operator<(const CallstackAllocations& other) const {
122       return node < other.node;
123     }
124   };
125 
126   // Caller needs to ensure that callsites outlives the HeapTracker.
HeapTracker(GlobalCallstackTrie * callsites,bool dump_at_max_mode)127   explicit HeapTracker(GlobalCallstackTrie* callsites, bool dump_at_max_mode)
128       : callsites_(callsites), dump_at_max_mode_(dump_at_max_mode) {}
129 
130   void RecordMalloc(const std::vector<FrameData>& stack,
131                     uint64_t address,
132                     uint64_t sample_size,
133                     uint64_t alloc_size,
134                     uint64_t sequence_number,
135                     uint64_t timestamp);
136 
137   template <typename F>
GetCallstackAllocations(F fn)138   void GetCallstackAllocations(F fn) {
139     // There are two reasons we remove the unused callstack allocations on the
140     // next iteration of Dump:
141     // * We need to remove them after the callstacks were dumped, which
142     //   currently happens after the allocations are dumped.
143     // * This way, we do not destroy and recreate callstacks as frequently.
144     for (auto it_and_alloc : dead_callstack_allocations_) {
145       auto& it = it_and_alloc.first;
146       uint64_t allocated = it_and_alloc.second;
147       const CallstackAllocations& alloc = it->second;
148       if (alloc.allocs == 0 && alloc.allocation_count == allocated) {
149         // TODO(fmayer): We could probably be smarter than throw away
150         // our whole frames cache.
151         ClearFrameCache();
152         callstack_allocations_.erase(it);
153       }
154     }
155     dead_callstack_allocations_.clear();
156 
157     for (auto it = callstack_allocations_.begin();
158          it != callstack_allocations_.end(); ++it) {
159       const CallstackAllocations& alloc = it->second;
160       fn(alloc);
161 
162       if (alloc.allocs == 0)
163         dead_callstack_allocations_.emplace_back(it, alloc.allocation_count);
164     }
165   }
166 
167   template <typename F>
GetAllocations(F fn)168   void GetAllocations(F fn) {
169     for (const auto& addr_and_allocation : allocations_) {
170       const Allocation& alloc = addr_and_allocation.second;
171       fn(addr_and_allocation.first, alloc.sample_size, alloc.alloc_size,
172          alloc.callstack_allocations()->node->id());
173     }
174   }
175 
RecordFree(uint64_t address,uint64_t sequence_number,uint64_t timestamp)176   void RecordFree(uint64_t address,
177                   uint64_t sequence_number,
178                   uint64_t timestamp) {
179     RecordOperation(sequence_number, {address, timestamp});
180   }
181 
ClearFrameCache()182   void ClearFrameCache() { frame_cache_.clear(); }
183 
committed_timestamp()184   uint64_t committed_timestamp() { return committed_timestamp_; }
max_timestamp()185   uint64_t max_timestamp() { return max_timestamp_; }
186 
187   uint64_t GetSizeForTesting(const std::vector<FrameData>& stack);
188   uint64_t GetMaxForTesting(const std::vector<FrameData>& stack);
GetTimestampForTesting()189   uint64_t GetTimestampForTesting() { return committed_timestamp_; }
190 
191  private:
192   struct Allocation {
AllocationAllocation193     Allocation(uint64_t size,
194                uint64_t asize,
195                uint64_t seq,
196                CallstackAllocations* csa)
197         : sample_size(size), alloc_size(asize), sequence_number(seq) {
198       SetCallstackAllocations(csa);
199     }
200 
201     Allocation() = default;
202     Allocation(const Allocation&) = delete;
AllocationAllocation203     Allocation(Allocation&& other) noexcept {
204       sample_size = other.sample_size;
205       alloc_size = other.alloc_size;
206       sequence_number = other.sequence_number;
207       callstack_allocations_ = other.callstack_allocations_;
208       other.callstack_allocations_ = nullptr;
209     }
210 
~AllocationAllocation211     ~Allocation() { SetCallstackAllocations(nullptr); }
212 
SetCallstackAllocationsAllocation213     void SetCallstackAllocations(CallstackAllocations* callstack_allocations) {
214       if (callstack_allocations_)
215         callstack_allocations_->allocs--;
216       callstack_allocations_ = callstack_allocations;
217       if (callstack_allocations_)
218         callstack_allocations_->allocs++;
219     }
220 
callstack_allocationsAllocation221     CallstackAllocations* callstack_allocations() const {
222       return callstack_allocations_;
223     }
224 
225     uint64_t sample_size;
226     uint64_t alloc_size;
227     uint64_t sequence_number;
228 
229    private:
230     CallstackAllocations* callstack_allocations_ = nullptr;
231   };
232 
233   struct PendingOperation {
234     uint64_t allocation_address;
235     uint64_t timestamp;
236   };
237 
MaybeCreateCallstackAllocations(GlobalCallstackTrie::Node * node)238   CallstackAllocations* MaybeCreateCallstackAllocations(
239       GlobalCallstackTrie::Node* node) {
240     auto callstack_allocations_it = callstack_allocations_.find(node);
241     if (callstack_allocations_it == callstack_allocations_.end()) {
242       GlobalCallstackTrie::IncrementNode(node);
243       bool inserted;
244       std::tie(callstack_allocations_it, inserted) =
245           callstack_allocations_.emplace(node, node);
246       PERFETTO_DCHECK(inserted);
247     }
248     return &callstack_allocations_it->second;
249   }
250 
251   void RecordOperation(uint64_t sequence_number,
252                        const PendingOperation& operation);
253 
254   // Commits a malloc or free operation.
255   // See comment of pending_operations_ for encoding of malloc and free
256   // operations.
257   //
258   // Committing a malloc operation: Add the allocations size to
259   // CallstackAllocation::allocated.
260   // Committing a free operation: Add the allocation's size to
261   // CallstackAllocation::freed and delete the allocation.
262   void CommitOperation(uint64_t sequence_number,
263                        const PendingOperation& operation);
264 
AddToCallstackAllocations(uint64_t ts,const Allocation & alloc)265   void AddToCallstackAllocations(uint64_t ts, const Allocation& alloc) {
266     alloc.callstack_allocations()->allocation_count++;
267     if (dump_at_max_mode_) {
268       current_unfreed_ += alloc.sample_size;
269       alloc.callstack_allocations()->value.retain_max.cur += alloc.sample_size;
270 
271       if (current_unfreed_ <= max_unfreed_)
272         return;
273 
274       if (max_sequence_number_ == alloc.sequence_number - 1) {
275         alloc.callstack_allocations()->value.retain_max.max =
276             // We know the only CallstackAllocation that has max != cur is the
277             // one we just updated.
278             alloc.callstack_allocations()->value.retain_max.cur;
279       } else {
280         for (auto& p : callstack_allocations_) {
281           // We need to reset max = cur for every CallstackAllocation, as we
282           // do not know which ones have changed since the last max.
283           // TODO(fmayer): Add an index to speed this up
284           CallstackAllocations& csa = p.second;
285           csa.value.retain_max.max = csa.value.retain_max.cur;
286         }
287       }
288       max_sequence_number_ = alloc.sequence_number;
289       max_unfreed_ = current_unfreed_;
290       max_timestamp_ = ts;
291     } else {
292       alloc.callstack_allocations()->value.totals.allocated +=
293           alloc.sample_size;
294     }
295   }
296 
SubtractFromCallstackAllocations(const Allocation & alloc)297   void SubtractFromCallstackAllocations(const Allocation& alloc) {
298     alloc.callstack_allocations()->free_count++;
299     if (dump_at_max_mode_) {
300       current_unfreed_ -= alloc.sample_size;
301       alloc.callstack_allocations()->value.retain_max.cur -= alloc.sample_size;
302     } else {
303       alloc.callstack_allocations()->value.totals.freed += alloc.sample_size;
304     }
305   }
306 
307   // We cannot use an interner here, because after the last allocation goes
308   // away, we still need to keep the CallstackAllocations around until the next
309   // dump.
310   std::map<GlobalCallstackTrie::Node*, CallstackAllocations>
311       callstack_allocations_;
312 
313   std::vector<std::pair<decltype(callstack_allocations_)::iterator, uint64_t>>
314       dead_callstack_allocations_;
315 
316   std::map<uint64_t /* allocation address */, Allocation> allocations_;
317 
318   // An operation is either a commit of an allocation or freeing of an
319   // allocation. An operation is a free if its seq_id is larger than
320   // the sequence_number of the corresponding allocation. It is a commit if its
321   // seq_id is equal to the sequence_number of the corresponding allocation.
322   //
323   // If its seq_id is less than the sequence_number of the corresponding
324   // allocation it could be either, but is ignored either way.
325   std::map<uint64_t /* seq_id */, PendingOperation /* allocation address */>
326       pending_operations_;
327 
328   uint64_t committed_timestamp_ = 0;
329   // The sequence number all mallocs and frees have been handled up to.
330   uint64_t committed_sequence_number_ = 0;
331   GlobalCallstackTrie* callsites_;
332 
333   const bool dump_at_max_mode_ = false;
334   // The following members are only used if dump_at_max_mode_ == true.
335   uint64_t max_sequence_number_ = 0;
336   uint64_t current_unfreed_ = 0;
337   uint64_t max_unfreed_ = 0;
338   uint64_t max_timestamp_ = 0;
339 
340   // We index by abspc, which is unique as long as the maps do not change.
341   // This is why we ClearFrameCache after we reparsed maps.
342   std::unordered_map<uint64_t /* abs pc */, Interned<Frame>> frame_cache_;
343 };
344 
345 }  // namespace profiling
346 }  // namespace perfetto
347 
348 #endif  // SRC_PROFILING_MEMORY_BOOKKEEPING_H_
349