1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_PROFILING_MEMORY_BOOKKEEPING_H_ 18 #define SRC_PROFILING_MEMORY_BOOKKEEPING_H_ 19 20 #include <map> 21 #include <vector> 22 23 #include "perfetto/base/time.h" 24 #include "src/profiling/common/callstack_trie.h" 25 #include "src/profiling/common/interner.h" 26 #include "src/profiling/memory/unwound_messages.h" 27 28 // Below is an illustration of the bookkeeping system state where 29 // PID 1 does the following allocations: 30 // 0x123: 128 bytes at [bar main] 31 // 0x234: 128 bytes at [bar main] 32 // 0xf00: 512 bytes at [foo main] 33 // PID 1 allocated but previously freed 1024 bytes at [bar main] 34 // 35 // PID 2 does the following allocations: 36 // 0x345: 512 bytes at [foo main] 37 // 0x456: 32 bytes at [foo main] 38 // PID 2 allocated but already freed 1235 bytes at [foo main] 39 // PID 2 allocated and freed 2048 bytes in main. 40 // 41 // +---------------------------------+ +-------------------+ 42 // | +---------+ HeapTracker PID 1| | GlobalCallstackTri| 43 // | |0x123 128+---+ +----------+ | | +---+ | 44 // | | | +---->alloc:1280+----------------->bar| | 45 // | |0x234 128+---+ |free: 1024| | | +-^-+ | 46 // | | | +----------+ | | +---+ ^ | 47 // | |0xf00 512+---+ | +----->foo| | | 48 // | +--------+| | +----------+ | | | +-^-+ | | 49 // | +---->alloc: 512+---+ | | | | 50 // | |free: 0| | | | +--+----+ | 51 // | +----------+ | | | | | 52 // | | | | +-+--+ | 53 // +---------------------------------+ | | |main| | 54 // | | +--+-+ | 55 // +---------------------------------+ | | ^ | 56 // | +---------+ HeapTracker PID 2| | +-------------------+ 57 // | |0x345 512+---+ +----------+ | | | 58 // | | | +---->alloc:1779+---+ | 59 // | |0x456 32+---+ |free: 1235| | | 60 // | +---------+ +----------+ | | 61 // | | | 62 // | +----------+ | | 63 // | |alloc:2048+---------------+ 64 // | |free: 2048| | 65 // | +----------+ | 66 // | | 67 // +---------------------------------+ 68 // Allocation CallstackAllocations Node 69 // 70 // The active allocations are on the leftmost side, modeled as the class 71 // HeapTracker::Allocation. 72 // 73 // The total allocated and freed bytes per callsite are in the middle, modeled 74 // as the HeapTracker::CallstackAllocations class. 75 // Note that (1280 - 1024) = 256, so alloc - free is equal to the total of the 76 // currently active allocations. 77 // Note in PID 2 there is a CallstackAllocations with 2048 allocated and 2048 78 // freed bytes. This is not currently referenced by any Allocations (as it 79 // should, as 2048 - 2048 = 0, which would mean that the total size of the 80 // allocations referencing it should be 0). This is because we haven't dumped 81 // this state yet, so the CallstackAllocations will be kept around until the 82 // next dump, written to the trace, and then destroyed. 83 // 84 // On the right hand side is the GlobalCallstackTrie, with nodes representing 85 // distinct callstacks. They have no information about the currently allocated 86 // or freed bytes, they only contain a reference count to destroy them as 87 // soon as they are no longer referenced by a HeapTracker. 88 89 namespace perfetto { 90 namespace profiling { 91 92 // Snapshot for memory allocations of a particular process. Shares callsites 93 // with other processes. 94 class HeapTracker { 95 public: 96 struct CallstackMaxAllocations { 97 uint64_t max; 98 uint64_t cur; 99 }; 100 struct CallstackTotalAllocations { 101 uint64_t allocated; 102 uint64_t freed; 103 }; 104 105 // Sum of all the allocations for a given callstack. 106 struct CallstackAllocations { CallstackAllocationsCallstackAllocations107 CallstackAllocations(GlobalCallstackTrie::Node* n) : node(n) {} 108 109 uint64_t allocs = 0; 110 uint64_t allocation_count = 0; 111 uint64_t free_count = 0; 112 union { 113 CallstackMaxAllocations retain_max; 114 CallstackTotalAllocations totals; 115 } value = {}; 116 117 GlobalCallstackTrie::Node* const node; 118 ~CallstackAllocationsCallstackAllocations119 ~CallstackAllocations() { GlobalCallstackTrie::DecrementNode(node); } 120 121 bool operator<(const CallstackAllocations& other) const { 122 return node < other.node; 123 } 124 }; 125 126 // Caller needs to ensure that callsites outlives the HeapTracker. HeapTracker(GlobalCallstackTrie * callsites,bool dump_at_max_mode)127 explicit HeapTracker(GlobalCallstackTrie* callsites, bool dump_at_max_mode) 128 : callsites_(callsites), dump_at_max_mode_(dump_at_max_mode) {} 129 130 void RecordMalloc(const std::vector<FrameData>& stack, 131 uint64_t address, 132 uint64_t sample_size, 133 uint64_t alloc_size, 134 uint64_t sequence_number, 135 uint64_t timestamp); 136 137 template <typename F> GetCallstackAllocations(F fn)138 void GetCallstackAllocations(F fn) { 139 // There are two reasons we remove the unused callstack allocations on the 140 // next iteration of Dump: 141 // * We need to remove them after the callstacks were dumped, which 142 // currently happens after the allocations are dumped. 143 // * This way, we do not destroy and recreate callstacks as frequently. 144 for (auto it_and_alloc : dead_callstack_allocations_) { 145 auto& it = it_and_alloc.first; 146 uint64_t allocated = it_and_alloc.second; 147 const CallstackAllocations& alloc = it->second; 148 if (alloc.allocs == 0 && alloc.allocation_count == allocated) { 149 // TODO(fmayer): We could probably be smarter than throw away 150 // our whole frames cache. 151 ClearFrameCache(); 152 callstack_allocations_.erase(it); 153 } 154 } 155 dead_callstack_allocations_.clear(); 156 157 for (auto it = callstack_allocations_.begin(); 158 it != callstack_allocations_.end(); ++it) { 159 const CallstackAllocations& alloc = it->second; 160 fn(alloc); 161 162 if (alloc.allocs == 0) 163 dead_callstack_allocations_.emplace_back(it, alloc.allocation_count); 164 } 165 } 166 167 template <typename F> GetAllocations(F fn)168 void GetAllocations(F fn) { 169 for (const auto& addr_and_allocation : allocations_) { 170 const Allocation& alloc = addr_and_allocation.second; 171 fn(addr_and_allocation.first, alloc.sample_size, alloc.alloc_size, 172 alloc.callstack_allocations()->node->id()); 173 } 174 } 175 RecordFree(uint64_t address,uint64_t sequence_number,uint64_t timestamp)176 void RecordFree(uint64_t address, 177 uint64_t sequence_number, 178 uint64_t timestamp) { 179 RecordOperation(sequence_number, {address, timestamp}); 180 } 181 ClearFrameCache()182 void ClearFrameCache() { frame_cache_.clear(); } 183 committed_timestamp()184 uint64_t committed_timestamp() { return committed_timestamp_; } max_timestamp()185 uint64_t max_timestamp() { return max_timestamp_; } 186 187 uint64_t GetSizeForTesting(const std::vector<FrameData>& stack); 188 uint64_t GetMaxForTesting(const std::vector<FrameData>& stack); GetTimestampForTesting()189 uint64_t GetTimestampForTesting() { return committed_timestamp_; } 190 191 private: 192 struct Allocation { AllocationAllocation193 Allocation(uint64_t size, 194 uint64_t asize, 195 uint64_t seq, 196 CallstackAllocations* csa) 197 : sample_size(size), alloc_size(asize), sequence_number(seq) { 198 SetCallstackAllocations(csa); 199 } 200 201 Allocation() = default; 202 Allocation(const Allocation&) = delete; AllocationAllocation203 Allocation(Allocation&& other) noexcept { 204 sample_size = other.sample_size; 205 alloc_size = other.alloc_size; 206 sequence_number = other.sequence_number; 207 callstack_allocations_ = other.callstack_allocations_; 208 other.callstack_allocations_ = nullptr; 209 } 210 ~AllocationAllocation211 ~Allocation() { SetCallstackAllocations(nullptr); } 212 SetCallstackAllocationsAllocation213 void SetCallstackAllocations(CallstackAllocations* callstack_allocations) { 214 if (callstack_allocations_) 215 callstack_allocations_->allocs--; 216 callstack_allocations_ = callstack_allocations; 217 if (callstack_allocations_) 218 callstack_allocations_->allocs++; 219 } 220 callstack_allocationsAllocation221 CallstackAllocations* callstack_allocations() const { 222 return callstack_allocations_; 223 } 224 225 uint64_t sample_size; 226 uint64_t alloc_size; 227 uint64_t sequence_number; 228 229 private: 230 CallstackAllocations* callstack_allocations_ = nullptr; 231 }; 232 233 struct PendingOperation { 234 uint64_t allocation_address; 235 uint64_t timestamp; 236 }; 237 MaybeCreateCallstackAllocations(GlobalCallstackTrie::Node * node)238 CallstackAllocations* MaybeCreateCallstackAllocations( 239 GlobalCallstackTrie::Node* node) { 240 auto callstack_allocations_it = callstack_allocations_.find(node); 241 if (callstack_allocations_it == callstack_allocations_.end()) { 242 GlobalCallstackTrie::IncrementNode(node); 243 bool inserted; 244 std::tie(callstack_allocations_it, inserted) = 245 callstack_allocations_.emplace(node, node); 246 PERFETTO_DCHECK(inserted); 247 } 248 return &callstack_allocations_it->second; 249 } 250 251 void RecordOperation(uint64_t sequence_number, 252 const PendingOperation& operation); 253 254 // Commits a malloc or free operation. 255 // See comment of pending_operations_ for encoding of malloc and free 256 // operations. 257 // 258 // Committing a malloc operation: Add the allocations size to 259 // CallstackAllocation::allocated. 260 // Committing a free operation: Add the allocation's size to 261 // CallstackAllocation::freed and delete the allocation. 262 void CommitOperation(uint64_t sequence_number, 263 const PendingOperation& operation); 264 AddToCallstackAllocations(uint64_t ts,const Allocation & alloc)265 void AddToCallstackAllocations(uint64_t ts, const Allocation& alloc) { 266 alloc.callstack_allocations()->allocation_count++; 267 if (dump_at_max_mode_) { 268 current_unfreed_ += alloc.sample_size; 269 alloc.callstack_allocations()->value.retain_max.cur += alloc.sample_size; 270 271 if (current_unfreed_ <= max_unfreed_) 272 return; 273 274 if (max_sequence_number_ == alloc.sequence_number - 1) { 275 alloc.callstack_allocations()->value.retain_max.max = 276 // We know the only CallstackAllocation that has max != cur is the 277 // one we just updated. 278 alloc.callstack_allocations()->value.retain_max.cur; 279 } else { 280 for (auto& p : callstack_allocations_) { 281 // We need to reset max = cur for every CallstackAllocation, as we 282 // do not know which ones have changed since the last max. 283 // TODO(fmayer): Add an index to speed this up 284 CallstackAllocations& csa = p.second; 285 csa.value.retain_max.max = csa.value.retain_max.cur; 286 } 287 } 288 max_sequence_number_ = alloc.sequence_number; 289 max_unfreed_ = current_unfreed_; 290 max_timestamp_ = ts; 291 } else { 292 alloc.callstack_allocations()->value.totals.allocated += 293 alloc.sample_size; 294 } 295 } 296 SubtractFromCallstackAllocations(const Allocation & alloc)297 void SubtractFromCallstackAllocations(const Allocation& alloc) { 298 alloc.callstack_allocations()->free_count++; 299 if (dump_at_max_mode_) { 300 current_unfreed_ -= alloc.sample_size; 301 alloc.callstack_allocations()->value.retain_max.cur -= alloc.sample_size; 302 } else { 303 alloc.callstack_allocations()->value.totals.freed += alloc.sample_size; 304 } 305 } 306 307 // We cannot use an interner here, because after the last allocation goes 308 // away, we still need to keep the CallstackAllocations around until the next 309 // dump. 310 std::map<GlobalCallstackTrie::Node*, CallstackAllocations> 311 callstack_allocations_; 312 313 std::vector<std::pair<decltype(callstack_allocations_)::iterator, uint64_t>> 314 dead_callstack_allocations_; 315 316 std::map<uint64_t /* allocation address */, Allocation> allocations_; 317 318 // An operation is either a commit of an allocation or freeing of an 319 // allocation. An operation is a free if its seq_id is larger than 320 // the sequence_number of the corresponding allocation. It is a commit if its 321 // seq_id is equal to the sequence_number of the corresponding allocation. 322 // 323 // If its seq_id is less than the sequence_number of the corresponding 324 // allocation it could be either, but is ignored either way. 325 std::map<uint64_t /* seq_id */, PendingOperation /* allocation address */> 326 pending_operations_; 327 328 uint64_t committed_timestamp_ = 0; 329 // The sequence number all mallocs and frees have been handled up to. 330 uint64_t committed_sequence_number_ = 0; 331 GlobalCallstackTrie* callsites_; 332 333 const bool dump_at_max_mode_ = false; 334 // The following members are only used if dump_at_max_mode_ == true. 335 uint64_t max_sequence_number_ = 0; 336 uint64_t current_unfreed_ = 0; 337 uint64_t max_unfreed_ = 0; 338 uint64_t max_timestamp_ = 0; 339 340 // We index by abspc, which is unique as long as the maps do not change. 341 // This is why we ClearFrameCache after we reparsed maps. 342 std::unordered_map<uint64_t /* abs pc */, Interned<Frame>> frame_cache_; 343 }; 344 345 } // namespace profiling 346 } // namespace perfetto 347 348 #endif // SRC_PROFILING_MEMORY_BOOKKEEPING_H_ 349