1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_PROFILING_MEMORY_BOOKKEEPING_H_ 18 #define SRC_PROFILING_MEMORY_BOOKKEEPING_H_ 19 20 #include <functional> 21 #include <map> 22 #include <string> 23 #include <vector> 24 25 #include "perfetto/base/lookup_set.h" 26 #include "perfetto/base/string_splitter.h" 27 #include "perfetto/base/time.h" 28 #include "perfetto/trace/profiling/profile_packet.pbzero.h" 29 #include "perfetto/trace/trace_packet.pbzero.h" 30 #include "perfetto/tracing/core/trace_writer.h" 31 #include "src/profiling/memory/interner.h" 32 #include "src/profiling/memory/unwound_messages.h" 33 34 // Below is an illustration of the bookkeeping system state where 35 // PID 1 does the following allocations: 36 // 0x123: 128 bytes at [bar main] 37 // 0x234: 128 bytes at [bar main] 38 // 0xf00: 512 bytes at [foo main] 39 // PID 1 allocated but previously freed 1024 bytes at [bar main] 40 // 41 // PID 2 does the following allocations: 42 // 0x345: 512 bytes at [foo main] 43 // 0x456: 32 bytes at [foo main] 44 // PID 2 allocated but already freed 1235 bytes at [foo main] 45 // PID 2 allocated and freed 2048 bytes in main. 46 // 47 // +---------------------------------+ +-------------------+ 48 // | +---------+ HeapTracker PID 1| | GlobalCallstackTri| 49 // | |0x123 128+---+ +----------+ | | +---+ | 50 // | | | +---->alloc:1280+----------------->bar| | 51 // | |0x234 128+---+ |free: 1024| | | +-^-+ | 52 // | | | +----------+ | | +---+ ^ | 53 // | |0xf00 512+---+ | +----->foo| | | 54 // | +--------+| | +----------+ | | | +-^-+ | | 55 // | +---->alloc: 512+---+ | | | | 56 // | |free: 0| | | | +--+----+ | 57 // | +----------+ | | | | | 58 // | | | | +-+--+ | 59 // +---------------------------------+ | | |main| | 60 // | | +--+-+ | 61 // +---------------------------------+ | | ^ | 62 // | +---------+ HeapTracker PID 2| | +-------------------+ 63 // | |0x345 512+---+ +----------+ | | | 64 // | | | +---->alloc:1779+---+ | 65 // | |0x456 32+---+ |free: 1235| | | 66 // | +---------+ +----------+ | | 67 // | | | 68 // | +----------+ | | 69 // | |alloc:2048+---------------+ 70 // | |free: 2048| | 71 // | +----------+ | 72 // | | 73 // +---------------------------------+ 74 // Allocation CallstackAllocations Node 75 // 76 // The active allocations are on the leftmost side, modeled as the class 77 // HeapTracker::Allocation. 78 // 79 // The total allocated and freed bytes per callsite are in the middle, modeled 80 // as the HeapTracker::CallstackAllocations class. 81 // Note that (1280 - 1024) = 256, so alloc - free is equal to the total of the 82 // currently active allocations. 83 // Note in PID 2 there is a CallstackAllocations with 2048 allocated and 2048 84 // freed bytes. This is not currently referenced by any Allocations (as it 85 // should, as 2048 - 2048 = 0, which would mean that the total size of the 86 // allocations referencing it should be 0). This is because we haven't dumped 87 // this state yet, so the CallstackAllocations will be kept around until the 88 // next dump, written to the trace, and then destroyed. 89 // 90 // On the right hand side is the GlobalCallstackTrie, with nodes representing 91 // distinct callstacks. They have no information about the currently allocated 92 // or freed bytes, they only contain a reference count to destroy them as 93 // soon as they are no longer referenced by a HeapTracker. 94 95 namespace perfetto { 96 namespace profiling { 97 98 class HeapTracker; 99 100 struct Mapping { MappingMapping101 Mapping(Interned<std::string> b) : build_id(std::move(b)) {} 102 103 Interned<std::string> build_id; 104 uint64_t offset = 0; 105 uint64_t start = 0; 106 uint64_t end = 0; 107 uint64_t load_bias = 0; 108 std::vector<Interned<std::string>> path_components{}; 109 110 bool operator<(const Mapping& other) const { 111 return std::tie(build_id, offset, start, end, load_bias, path_components) < 112 std::tie(other.build_id, other.offset, other.start, other.end, 113 other.load_bias, other.path_components); 114 } 115 bool operator==(const Mapping& other) const { 116 return std::tie(build_id, offset, start, end, load_bias, path_components) == 117 std::tie(other.build_id, other.offset, other.start, other.end, 118 other.load_bias, other.path_components); 119 } 120 }; 121 122 struct Frame { FrameFrame123 Frame(Interned<Mapping> m, Interned<std::string> fn_name, uint64_t pc) 124 : mapping(m), function_name(fn_name), rel_pc(pc) {} 125 Interned<Mapping> mapping; 126 Interned<std::string> function_name; 127 uint64_t rel_pc; 128 129 bool operator<(const Frame& other) const { 130 return std::tie(mapping, function_name, rel_pc) < 131 std::tie(other.mapping, other.function_name, other.rel_pc); 132 } 133 134 bool operator==(const Frame& other) const { 135 return std::tie(mapping, function_name, rel_pc) == 136 std::tie(other.mapping, other.function_name, other.rel_pc); 137 } 138 }; 139 140 // Graph of function callsites. This is shared between heap dumps for 141 // different processes. Each call site is represented by a 142 // GlobalCallstackTrie::Node that is owned by the parent (i.e. calling) 143 // callsite. It has a pointer to its parent, which means the function 144 // call-graph can be reconstructed from a GlobalCallstackTrie::Node by walking 145 // down the pointers to the parents. 146 class GlobalCallstackTrie { 147 public: 148 // Node in a tree of function traces that resulted in an allocation. For 149 // instance, if alloc_buf is called from foo and bar, which are called from 150 // main, the tree looks as following. 151 // 152 // alloc_buf alloc_buf 153 // | | 154 // foo bar 155 // \ / 156 // main 157 // | 158 // libc_init 159 // | 160 // [root_] 161 // 162 // allocations_ will hold a map from the pointers returned from malloc to 163 // alloc_buf to the leafs of this tree. 164 class Node { 165 public: 166 // This is opaque except to GlobalCallstackTrie. 167 friend class GlobalCallstackTrie; 168 Node(Interned<Frame> frame)169 Node(Interned<Frame> frame) : Node(std::move(frame), nullptr) {} Node(Interned<Frame> frame,Node * parent)170 Node(Interned<Frame> frame, Node* parent) 171 : parent_(parent), location_(std::move(frame)) {} 172 id()173 uintptr_t id() const { return reinterpret_cast<uintptr_t>(this); } 174 175 private: 176 Node* GetOrCreateChild(const Interned<Frame>& loc); 177 178 uint64_t ref_count_ = 0; 179 Node* const parent_; 180 const Interned<Frame> location_; 181 base::LookupSet<Node, const Interned<Frame>, &Node::location_> children_; 182 }; 183 184 GlobalCallstackTrie() = default; 185 GlobalCallstackTrie(const GlobalCallstackTrie&) = delete; 186 GlobalCallstackTrie& operator=(const GlobalCallstackTrie&) = delete; 187 188 Node* CreateCallsite(const std::vector<FrameData>& locs); 189 static void DecrementNode(Node* node); 190 static void IncrementNode(Node* node); 191 192 std::vector<Interned<Frame>> BuildCallstack(const Node* node) const; 193 194 private: 195 Interned<Frame> InternCodeLocation(const FrameData& loc); 196 Interned<Frame> MakeRootFrame(); 197 198 Interner<std::string> string_interner_; 199 Interner<Mapping> mapping_interner_; 200 Interner<Frame> frame_interner_; 201 202 Node root_{MakeRootFrame()}; 203 }; 204 205 struct DumpState { DumpStateDumpState206 DumpState(TraceWriter* tw, uint64_t* ni) : trace_writer(tw), next_index(ni) { 207 last_written = trace_writer->written(); 208 209 current_trace_packet = trace_writer->NewTracePacket(); 210 current_trace_packet->set_timestamp( 211 static_cast<uint64_t>(base::GetBootTimeNs().count())); 212 current_profile_packet = current_trace_packet->set_profile_packet(); 213 current_profile_packet->set_index((*next_index)++); 214 215 // Explicitly reserve intern ID 0 for the empty string, so unset string 216 // fields get mapped to this. 217 auto interned_string = current_profile_packet->add_strings(); 218 constexpr const uint8_t kEmptyString[] = ""; 219 interned_string->set_id(0); 220 interned_string->set_str(kEmptyString, 0); 221 } 222 223 void WriteMap(const Interned<Mapping> map); 224 void WriteFrame(const Interned<Frame> frame); 225 void WriteString(const Interned<std::string>& str); 226 227 std::set<InternID> dumped_strings; 228 std::set<InternID> dumped_frames; 229 std::set<InternID> dumped_mappings; 230 231 std::set<GlobalCallstackTrie::Node*> callstacks_to_dump; 232 233 TraceWriter* trace_writer; 234 protos::pbzero::ProfilePacket* current_profile_packet; 235 TraceWriter::TracePacketHandle current_trace_packet; 236 uint64_t* next_index; 237 uint64_t last_written = 0; 238 NewProfilePacketDumpState239 void NewProfilePacket() { 240 PERFETTO_DLOG("New profile packet after %" PRIu64 " bytes. Total: %" PRIu64 241 ", before %" PRIu64, 242 trace_writer->written() - last_written, 243 trace_writer->written(), last_written); 244 current_profile_packet->set_continued(true); 245 last_written = trace_writer->written(); 246 247 current_trace_packet->Finalize(); 248 current_trace_packet = trace_writer->NewTracePacket(); 249 current_trace_packet->set_timestamp( 250 static_cast<uint64_t>(base::GetBootTimeNs().count())); 251 current_profile_packet = current_trace_packet->set_profile_packet(); 252 current_profile_packet->set_index((*next_index)++); 253 } 254 currently_writtenDumpState255 uint64_t currently_written() { 256 return trace_writer->written() - last_written; 257 } 258 }; 259 260 // Snapshot for memory allocations of a particular process. Shares callsites 261 // with other processes. 262 class HeapTracker { 263 public: 264 // Caller needs to ensure that callsites outlives the HeapTracker. HeapTracker(GlobalCallstackTrie * callsites)265 explicit HeapTracker(GlobalCallstackTrie* callsites) 266 : callsites_(callsites) {} 267 268 void RecordMalloc(const std::vector<FrameData>& stack, 269 uint64_t address, 270 uint64_t size, 271 uint64_t sequence_number, 272 uint64_t timestamp); 273 void Dump( 274 std::function<void(protos::pbzero::ProfilePacket::ProcessHeapSamples*)> 275 fill_process_header, 276 DumpState* dump_state); RecordFree(uint64_t address,uint64_t sequence_number,uint64_t timestamp)277 void RecordFree(uint64_t address, 278 uint64_t sequence_number, 279 uint64_t timestamp) { 280 RecordOperation(sequence_number, {address, timestamp}); 281 } 282 283 uint64_t GetSizeForTesting(const std::vector<FrameData>& stack); GetTimestampForTesting()284 uint64_t GetTimestampForTesting() { return committed_timestamp_; } 285 286 private: 287 // Sum of all the allocations for a given callstack. 288 struct CallstackAllocations { CallstackAllocationsCallstackAllocations289 CallstackAllocations(GlobalCallstackTrie::Node* n) : node(n) {} 290 291 uint64_t allocs = 0; 292 293 uint64_t allocated = 0; 294 uint64_t freed = 0; 295 uint64_t allocation_count = 0; 296 uint64_t free_count = 0; 297 298 GlobalCallstackTrie::Node* const node; 299 ~CallstackAllocationsCallstackAllocations300 ~CallstackAllocations() { GlobalCallstackTrie::DecrementNode(node); } 301 302 bool operator<(const CallstackAllocations& other) const { 303 return node < other.node; 304 } 305 }; 306 307 struct Allocation { AllocationAllocation308 Allocation(uint64_t size, uint64_t seq, CallstackAllocations* csa) 309 : total_size(size), sequence_number(seq), callstack_allocations(csa) { 310 callstack_allocations->allocs++; 311 } 312 313 Allocation() = default; 314 Allocation(const Allocation&) = delete; AllocationAllocation315 Allocation(Allocation&& other) noexcept { 316 total_size = other.total_size; 317 sequence_number = other.sequence_number; 318 callstack_allocations = other.callstack_allocations; 319 other.callstack_allocations = nullptr; 320 } 321 AddToCallstackAllocationsAllocation322 void AddToCallstackAllocations() { 323 callstack_allocations->allocation_count++; 324 callstack_allocations->allocated += total_size; 325 } 326 SubtractFromCallstackAllocationsAllocation327 void SubtractFromCallstackAllocations() { 328 callstack_allocations->free_count++; 329 callstack_allocations->freed += total_size; 330 } 331 ~AllocationAllocation332 ~Allocation() { 333 if (callstack_allocations) 334 callstack_allocations->allocs--; 335 } 336 337 uint64_t total_size; 338 uint64_t sequence_number; 339 CallstackAllocations* callstack_allocations; 340 }; 341 342 struct PendingOperation { 343 uint64_t allocation_address; 344 uint64_t timestamp; 345 }; 346 MaybeCreateCallstackAllocations(GlobalCallstackTrie::Node * node)347 CallstackAllocations* MaybeCreateCallstackAllocations( 348 GlobalCallstackTrie::Node* node) { 349 auto callstack_allocations_it = callstack_allocations_.find(node); 350 if (callstack_allocations_it == callstack_allocations_.end()) { 351 GlobalCallstackTrie::IncrementNode(node); 352 bool inserted; 353 std::tie(callstack_allocations_it, inserted) = 354 callstack_allocations_.emplace(node, node); 355 PERFETTO_DCHECK(inserted); 356 } 357 return &callstack_allocations_it->second; 358 } 359 360 void RecordOperation(uint64_t sequence_number, 361 const PendingOperation& operation); 362 363 // Commits a malloc or free operation. 364 // See comment of pending_operations_ for encoding of malloc and free 365 // operations. 366 // 367 // Committing a malloc operation: Add the allocations size to 368 // CallstackAllocation::allocated. 369 // Committing a free operation: Add the allocation's size to 370 // CallstackAllocation::freed and delete the allocation. 371 void CommitOperation(uint64_t sequence_number, 372 const PendingOperation& operation); 373 374 // We cannot use an interner here, because after the last allocation goes 375 // away, we still need to keep the CallstackAllocations around until the next 376 // dump. 377 std::map<GlobalCallstackTrie::Node*, CallstackAllocations> 378 callstack_allocations_; 379 380 std::vector<std::pair<decltype(callstack_allocations_)::iterator, uint64_t>> 381 dead_callstack_allocations_; 382 383 std::map<uint64_t /* allocation address */, Allocation> allocations_; 384 385 // An operation is either a commit of an allocation or freeing of an 386 // allocation. An operation is a free if its seq_id is larger than 387 // the sequence_number of the corresponding allocation. It is a commit if its 388 // seq_id is equal to the sequence_number of the corresponding allocation. 389 // 390 // If its seq_id is less than the sequence_number of the corresponding 391 // allocation it could be either, but is ignored either way. 392 std::map<uint64_t /* seq_id */, PendingOperation /* allocation address */> 393 pending_operations_; 394 395 uint64_t committed_timestamp_ = 0; 396 // The sequence number all mallocs and frees have been handled up to. 397 uint64_t committed_sequence_number_ = 0; 398 GlobalCallstackTrie* callsites_; 399 }; 400 401 } // namespace profiling 402 } // namespace perfetto 403 404 namespace std { 405 template <> 406 struct hash<::perfetto::profiling::Mapping> { 407 using argument_type = ::perfetto::profiling::Mapping; 408 using result_type = size_t; 409 result_type operator()(const argument_type& mapping) { 410 size_t h = 411 std::hash<::perfetto::profiling::InternID>{}(mapping.build_id.id()); 412 h ^= std::hash<uint64_t>{}(mapping.offset); 413 h ^= std::hash<uint64_t>{}(mapping.start); 414 h ^= std::hash<uint64_t>{}(mapping.end); 415 h ^= std::hash<uint64_t>{}(mapping.load_bias); 416 for (const auto& path : mapping.path_components) 417 h ^= std::hash<uint64_t>{}(path.id()); 418 return h; 419 } 420 }; 421 422 template <> 423 struct hash<::perfetto::profiling::Frame> { 424 using argument_type = ::perfetto::profiling::Frame; 425 using result_type = size_t; 426 result_type operator()(const argument_type& frame) { 427 size_t h = std::hash<::perfetto::profiling::InternID>{}(frame.mapping.id()); 428 h ^= std::hash<::perfetto::profiling::InternID>{}(frame.function_name.id()); 429 h ^= std::hash<uint64_t>{}(frame.rel_pc); 430 return h; 431 } 432 }; 433 } // namespace std 434 435 #endif // SRC_PROFILING_MEMORY_BOOKKEEPING_H_ 436