1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_PROFILING_MEMORY_BOOKKEEPING_H_ 18 #define SRC_PROFILING_MEMORY_BOOKKEEPING_H_ 19 20 #include <map> 21 #include <vector> 22 23 #include "perfetto/base/time.h" 24 #include "src/profiling/common/callstack_trie.h" 25 #include "src/profiling/common/interner.h" 26 #include "src/profiling/memory/unwound_messages.h" 27 28 // Below is an illustration of the bookkeeping system state where 29 // PID 1 does the following allocations: 30 // 0x123: 128 bytes at [bar main] 31 // 0x234: 128 bytes at [bar main] 32 // 0xf00: 512 bytes at [foo main] 33 // PID 1 allocated but previously freed 1024 bytes at [bar main] 34 // 35 // PID 2 does the following allocations: 36 // 0x345: 512 bytes at [foo main] 37 // 0x456: 32 bytes at [foo main] 38 // PID 2 allocated but already freed 1235 bytes at [foo main] 39 // PID 2 allocated and freed 2048 bytes in main. 40 // 41 // +---------------------------------+ +-------------------+ 42 // | +---------+ HeapTracker PID 1| | GlobalCallstackTri| 43 // | |0x123 128+---+ +----------+ | | +---+ | 44 // | | | +---->alloc:1280+----------------->bar| | 45 // | |0x234 128+---+ |free: 1024| | | +-^-+ | 46 // | | | +----------+ | | +---+ ^ | 47 // | |0xf00 512+---+ | +----->foo| | | 48 // | +--------+| | +----------+ | | | +-^-+ | | 49 // | +---->alloc: 512+---+ | | | | 50 // | |free: 0| | | | +--+----+ | 51 // | +----------+ | | | | | 52 // | | | | +-+--+ | 53 // +---------------------------------+ | | |main| | 54 // | | +--+-+ | 55 // +---------------------------------+ | | ^ | 56 // | +---------+ HeapTracker PID 2| | +-------------------+ 57 // | |0x345 512+---+ +----------+ | | | 58 // | | | +---->alloc:1779+---+ | 59 // | |0x456 32+---+ |free: 1235| | | 60 // | +---------+ +----------+ | | 61 // | | | 62 // | +----------+ | | 63 // | |alloc:2048+---------------+ 64 // | |free: 2048| | 65 // | +----------+ | 66 // | | 67 // +---------------------------------+ 68 // Allocation CallstackAllocations Node 69 // 70 // The active allocations are on the leftmost side, modeled as the class 71 // HeapTracker::Allocation. 72 // 73 // The total allocated and freed bytes per callsite are in the middle, modeled 74 // as the HeapTracker::CallstackAllocations class. 75 // Note that (1280 - 1024) = 256, so alloc - free is equal to the total of the 76 // currently active allocations. 77 // Note in PID 2 there is a CallstackAllocations with 2048 allocated and 2048 78 // freed bytes. This is not currently referenced by any Allocations (as it 79 // should, as 2048 - 2048 = 0, which would mean that the total size of the 80 // allocations referencing it should be 0). This is because we haven't dumped 81 // this state yet, so the CallstackAllocations will be kept around until the 82 // next dump, written to the trace, and then destroyed. 83 // 84 // On the right hand side is the GlobalCallstackTrie, with nodes representing 85 // distinct callstacks. They have no information about the currently allocated 86 // or freed bytes, they only contain a reference count to destroy them as 87 // soon as they are no longer referenced by a HeapTracker. 88 89 namespace perfetto { 90 namespace profiling { 91 92 // Snapshot for memory allocations of a particular process. Shares callsites 93 // with other processes. 94 class HeapTracker { 95 public: 96 struct CallstackMaxAllocations { 97 uint64_t max; 98 uint64_t cur; 99 uint64_t max_count; 100 uint64_t cur_count; 101 }; 102 struct CallstackTotalAllocations { 103 uint64_t allocated; 104 uint64_t freed; 105 uint64_t allocation_count; 106 uint64_t free_count; 107 }; 108 109 // Sum of all the allocations for a given callstack. 110 struct CallstackAllocations { CallstackAllocationsCallstackAllocations111 explicit CallstackAllocations(GlobalCallstackTrie::Node* n) : node(n) {} 112 113 uint64_t allocs = 0; 114 115 union { 116 CallstackMaxAllocations retain_max; 117 CallstackTotalAllocations totals; 118 } value = {}; 119 120 GlobalCallstackTrie::Node* const node; 121 ~CallstackAllocationsCallstackAllocations122 ~CallstackAllocations() { GlobalCallstackTrie::DecrementNode(node); } 123 124 bool operator<(const CallstackAllocations& other) const { 125 return node < other.node; 126 } 127 }; 128 129 // Caller needs to ensure that callsites outlives the HeapTracker. HeapTracker(GlobalCallstackTrie * callsites,bool dump_at_max_mode)130 explicit HeapTracker(GlobalCallstackTrie* callsites, bool dump_at_max_mode) 131 : callsites_(callsites), dump_at_max_mode_(dump_at_max_mode) {} 132 133 void RecordMalloc(const std::vector<unwindstack::FrameData>& callstack, 134 const std::vector<std::string>& build_ids, 135 uint64_t address, 136 uint64_t sample_size, 137 uint64_t alloc_size, 138 uint64_t sequence_number, 139 uint64_t timestamp); 140 141 template <typename F> GetCallstackAllocations(F fn)142 void GetCallstackAllocations(F fn) { 143 // There are two reasons we remove the unused callstack allocations on the 144 // next iteration of Dump: 145 // * We need to remove them after the callstacks were dumped, which 146 // currently happens after the allocations are dumped. 147 // * This way, we do not destroy and recreate callstacks as frequently. 148 for (auto it_and_alloc : dead_callstack_allocations_) { 149 auto& it = it_and_alloc.first; 150 uint64_t allocated = it_and_alloc.second; 151 const CallstackAllocations& alloc = it->second; 152 // For non-dump-at-max, we need to check, even if there are still no 153 // allocations referencing this callstack, whether there were any 154 // allocations that happened but were freed again. If that was the case, 155 // we need to keep the callsite, because the next dump will indicate a 156 // different self_alloc and self_freed. 157 if (alloc.allocs == 0 && 158 (dump_at_max_mode_ || 159 alloc.value.totals.allocation_count == allocated)) { 160 // TODO(fmayer): We could probably be smarter than throw away 161 // our whole frames cache. 162 ClearFrameCache(); 163 callstack_allocations_.erase(it); 164 } 165 } 166 dead_callstack_allocations_.clear(); 167 168 for (auto it = callstack_allocations_.begin(); 169 it != callstack_allocations_.end(); ++it) { 170 const CallstackAllocations& alloc = it->second; 171 fn(alloc); 172 173 if (alloc.allocs == 0) 174 dead_callstack_allocations_.emplace_back( 175 it, !dump_at_max_mode_ ? alloc.value.totals.allocation_count : 0); 176 } 177 } 178 179 template <typename F> GetAllocations(F fn)180 void GetAllocations(F fn) { 181 for (const auto& addr_and_allocation : allocations_) { 182 const Allocation& alloc = addr_and_allocation.second; 183 fn(addr_and_allocation.first, alloc.sample_size, alloc.alloc_size, 184 alloc.callstack_allocations()->node->id()); 185 } 186 } 187 RecordFree(uint64_t address,uint64_t sequence_number,uint64_t timestamp)188 void RecordFree(uint64_t address, 189 uint64_t sequence_number, 190 uint64_t timestamp) { 191 RecordOperation(sequence_number, {address, timestamp}); 192 } 193 ClearFrameCache()194 void ClearFrameCache() { frame_cache_.clear(); } 195 dump_timestamp()196 uint64_t dump_timestamp() { 197 return dump_at_max_mode_ ? max_timestamp_ : committed_timestamp_; 198 } 199 200 uint64_t GetSizeForTesting(const std::vector<unwindstack::FrameData>& stack, 201 std::vector<std::string> build_ids); 202 uint64_t GetMaxForTesting(const std::vector<unwindstack::FrameData>& stack, 203 std::vector<std::string> build_ids); 204 uint64_t GetMaxCountForTesting( 205 const std::vector<unwindstack::FrameData>& stack, 206 std::vector<std::string> build_ids); 207 GetTimestampForTesting()208 uint64_t GetTimestampForTesting() { return committed_timestamp_; } 209 210 private: 211 struct Allocation { AllocationAllocation212 Allocation(uint64_t size, 213 uint64_t asize, 214 uint64_t seq, 215 CallstackAllocations* csa) 216 : sample_size(size), alloc_size(asize), sequence_number(seq) { 217 SetCallstackAllocations(csa); 218 } 219 220 Allocation() = default; 221 Allocation(const Allocation&) = delete; AllocationAllocation222 Allocation(Allocation&& other) noexcept { 223 sample_size = other.sample_size; 224 alloc_size = other.alloc_size; 225 sequence_number = other.sequence_number; 226 callstack_allocations_ = other.callstack_allocations_; 227 other.callstack_allocations_ = nullptr; 228 } 229 ~AllocationAllocation230 ~Allocation() { SetCallstackAllocations(nullptr); } 231 SetCallstackAllocationsAllocation232 void SetCallstackAllocations(CallstackAllocations* callstack_allocations) { 233 if (callstack_allocations_) 234 callstack_allocations_->allocs--; 235 callstack_allocations_ = callstack_allocations; 236 if (callstack_allocations_) 237 callstack_allocations_->allocs++; 238 } 239 callstack_allocationsAllocation240 CallstackAllocations* callstack_allocations() const { 241 return callstack_allocations_; 242 } 243 244 uint64_t sample_size; 245 uint64_t alloc_size; 246 uint64_t sequence_number; 247 248 private: 249 CallstackAllocations* callstack_allocations_ = nullptr; 250 }; 251 252 struct PendingOperation { 253 uint64_t allocation_address; 254 uint64_t timestamp; 255 }; 256 MaybeCreateCallstackAllocations(GlobalCallstackTrie::Node * node)257 CallstackAllocations* MaybeCreateCallstackAllocations( 258 GlobalCallstackTrie::Node* node) { 259 auto callstack_allocations_it = callstack_allocations_.find(node); 260 if (callstack_allocations_it == callstack_allocations_.end()) { 261 GlobalCallstackTrie::IncrementNode(node); 262 bool inserted; 263 std::tie(callstack_allocations_it, inserted) = 264 callstack_allocations_.emplace(node, node); 265 PERFETTO_DCHECK(inserted); 266 } 267 return &callstack_allocations_it->second; 268 } 269 270 void RecordOperation(uint64_t sequence_number, 271 const PendingOperation& operation); 272 273 // Commits a malloc or free operation. 274 // See comment of pending_operations_ for encoding of malloc and free 275 // operations. 276 // 277 // Committing a malloc operation: Add the allocations size to 278 // CallstackAllocation::allocated. 279 // Committing a free operation: Add the allocation's size to 280 // CallstackAllocation::freed and delete the allocation. 281 void CommitOperation(uint64_t sequence_number, 282 const PendingOperation& operation); 283 AddToCallstackAllocations(uint64_t ts,const Allocation & alloc)284 void AddToCallstackAllocations(uint64_t ts, const Allocation& alloc) { 285 if (dump_at_max_mode_) { 286 current_unfreed_ += alloc.sample_size; 287 alloc.callstack_allocations()->value.retain_max.cur += alloc.sample_size; 288 alloc.callstack_allocations()->value.retain_max.cur_count++; 289 290 if (current_unfreed_ <= max_unfreed_) 291 return; 292 293 if (max_sequence_number_ == alloc.sequence_number - 1) { 294 // We know the only CallstackAllocation that has max != cur is the 295 // one we just updated. 296 alloc.callstack_allocations()->value.retain_max.max = 297 alloc.callstack_allocations()->value.retain_max.cur; 298 alloc.callstack_allocations()->value.retain_max.max_count = 299 alloc.callstack_allocations()->value.retain_max.cur_count; 300 } else { 301 for (auto& p : callstack_allocations_) { 302 // We need to reset max = cur for every CallstackAllocation, as we 303 // do not know which ones have changed since the last max. 304 // TODO(fmayer): Add an index to speed this up 305 CallstackAllocations& csa = p.second; 306 csa.value.retain_max.max = csa.value.retain_max.cur; 307 csa.value.retain_max.max_count = csa.value.retain_max.cur_count; 308 } 309 } 310 max_sequence_number_ = alloc.sequence_number; 311 max_unfreed_ = current_unfreed_; 312 max_timestamp_ = ts; 313 } else { 314 alloc.callstack_allocations()->value.totals.allocated += 315 alloc.sample_size; 316 alloc.callstack_allocations()->value.totals.allocation_count++; 317 } 318 } 319 SubtractFromCallstackAllocations(const Allocation & alloc)320 void SubtractFromCallstackAllocations(const Allocation& alloc) { 321 if (dump_at_max_mode_) { 322 current_unfreed_ -= alloc.sample_size; 323 alloc.callstack_allocations()->value.retain_max.cur -= alloc.sample_size; 324 alloc.callstack_allocations()->value.retain_max.cur_count--; 325 } else { 326 alloc.callstack_allocations()->value.totals.freed += alloc.sample_size; 327 alloc.callstack_allocations()->value.totals.free_count++; 328 } 329 } 330 331 // We cannot use an interner here, because after the last allocation goes 332 // away, we still need to keep the CallstackAllocations around until the next 333 // dump. 334 std::map<GlobalCallstackTrie::Node*, CallstackAllocations> 335 callstack_allocations_; 336 337 std::vector<std::pair<decltype(callstack_allocations_)::iterator, uint64_t>> 338 dead_callstack_allocations_; 339 340 std::map<uint64_t /* allocation address */, Allocation> allocations_; 341 342 // An operation is either a commit of an allocation or freeing of an 343 // allocation. An operation is a free if its seq_id is larger than 344 // the sequence_number of the corresponding allocation. It is a commit if its 345 // seq_id is equal to the sequence_number of the corresponding allocation. 346 // 347 // If its seq_id is less than the sequence_number of the corresponding 348 // allocation it could be either, but is ignored either way. 349 std::map<uint64_t /* seq_id */, PendingOperation /* allocation address */> 350 pending_operations_; 351 352 uint64_t committed_timestamp_ = 0; 353 // The sequence number all mallocs and frees have been handled up to. 354 uint64_t committed_sequence_number_ = 0; 355 GlobalCallstackTrie* callsites_; 356 357 const bool dump_at_max_mode_ = false; 358 // The following members are only used if dump_at_max_mode_ == true. 359 uint64_t max_sequence_number_ = 0; 360 uint64_t current_unfreed_ = 0; 361 uint64_t max_unfreed_ = 0; 362 uint64_t max_timestamp_ = 0; 363 364 // We index by abspc, which is unique as long as the maps do not change. 365 // This is why we ClearFrameCache after we reparsed maps. 366 std::unordered_map<uint64_t /* abs pc */, Interned<Frame>> frame_cache_; 367 }; 368 369 } // namespace profiling 370 } // namespace perfetto 371 372 #endif // SRC_PROFILING_MEMORY_BOOKKEEPING_H_ 373