1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/profiling/memory/bookkeeping.h"
18
19 #include <fcntl.h>
20 #include <inttypes.h>
21 #include <sys/stat.h>
22 #include <sys/types.h>
23
24 #include "perfetto/base/file_utils.h"
25 #include "perfetto/base/logging.h"
26 #include "perfetto/base/scoped_file.h"
27
28 namespace perfetto {
29 namespace profiling {
30 namespace {
31 using ::perfetto::protos::pbzero::ProfilePacket;
32 // This needs to be lower than the maximum acceptable chunk size, because this
33 // is checked *before* writing another submessage. We conservatively assume
34 // submessages can be up to 100k here for a 500k chunk size.
35 // DropBox has a 500k chunk limit, and each chunk needs to parse as a proto.
36 uint32_t kPacketSizeThreshold = 400000;
37 }
38
GetOrCreateChild(const Interned<Frame> & loc)39 GlobalCallstackTrie::Node* GlobalCallstackTrie::Node::GetOrCreateChild(
40 const Interned<Frame>& loc) {
41 Node* child = children_.Get(loc);
42 if (!child)
43 child = children_.Emplace(loc, this);
44 return child;
45 }
46
RecordMalloc(const std::vector<FrameData> & callstack,uint64_t address,uint64_t size,uint64_t sequence_number,uint64_t timestamp)47 void HeapTracker::RecordMalloc(const std::vector<FrameData>& callstack,
48 uint64_t address,
49 uint64_t size,
50 uint64_t sequence_number,
51 uint64_t timestamp) {
52 auto it = allocations_.find(address);
53 if (it != allocations_.end()) {
54 Allocation& alloc = it->second;
55 PERFETTO_DCHECK(alloc.sequence_number != sequence_number);
56 if (alloc.sequence_number < sequence_number) {
57 // As we are overwriting the previous allocation, the previous allocation
58 // must have been freed.
59 //
60 // This makes the sequencing a bit incorrect. We are overwriting this
61 // allocation, so we prentend both the alloc and the free for this have
62 // already happened at committed_sequence_number_, while in fact the free
63 // might not have happened until right before this operation.
64
65 if (alloc.sequence_number > committed_sequence_number_) {
66 // Only count the previous allocation if it hasn't already been
67 // committed to avoid double counting it.
68 alloc.AddToCallstackAllocations();
69 }
70
71 alloc.SubtractFromCallstackAllocations();
72 GlobalCallstackTrie::Node* node = callsites_->CreateCallsite(callstack);
73 alloc.total_size = size;
74 alloc.sequence_number = sequence_number;
75 alloc.callstack_allocations = MaybeCreateCallstackAllocations(node);
76 }
77 } else {
78 GlobalCallstackTrie::Node* node = callsites_->CreateCallsite(callstack);
79 allocations_.emplace(address,
80 Allocation(size, sequence_number,
81 MaybeCreateCallstackAllocations(node)));
82 }
83
84 RecordOperation(sequence_number, {address, timestamp});
85 }
86
RecordOperation(uint64_t sequence_number,const PendingOperation & operation)87 void HeapTracker::RecordOperation(uint64_t sequence_number,
88 const PendingOperation& operation) {
89 if (sequence_number != committed_sequence_number_ + 1) {
90 pending_operations_.emplace(sequence_number, operation);
91 return;
92 }
93
94 CommitOperation(sequence_number, operation);
95
96 // At this point some other pending operations might be eligible to be
97 // committed.
98 auto it = pending_operations_.begin();
99 while (it != pending_operations_.end() &&
100 it->first == committed_sequence_number_ + 1) {
101 CommitOperation(it->first, it->second);
102 it = pending_operations_.erase(it);
103 }
104 }
105
CommitOperation(uint64_t sequence_number,const PendingOperation & operation)106 void HeapTracker::CommitOperation(uint64_t sequence_number,
107 const PendingOperation& operation) {
108 committed_sequence_number_++;
109 committed_timestamp_ = operation.timestamp;
110
111 uint64_t address = operation.allocation_address;
112
113 // We will see many frees for addresses we do not know about.
114 auto leaf_it = allocations_.find(address);
115 if (leaf_it == allocations_.end())
116 return;
117
118 Allocation& value = leaf_it->second;
119 if (value.sequence_number == sequence_number) {
120 value.AddToCallstackAllocations();
121 } else if (value.sequence_number < sequence_number) {
122 value.SubtractFromCallstackAllocations();
123 allocations_.erase(leaf_it);
124 }
125 // else (value.sequence_number > sequence_number:
126 // This allocation has been replaced by a newer one in RecordMalloc.
127 // This code commits ther previous allocation's malloc (and implicit free
128 // that must have happened, as there is now a new allocation at the same
129 // address). This means that this operation, be it a malloc or a free, must
130 // be treated as a no-op.
131 }
132
Dump(std::function<void (ProfilePacket::ProcessHeapSamples *)> fill_process_header,DumpState * dump_state)133 void HeapTracker::Dump(
134 std::function<void(ProfilePacket::ProcessHeapSamples*)> fill_process_header,
135 DumpState* dump_state) {
136 // There are two reasons we remove the unused callstack allocations on the
137 // next iteration of Dump:
138 // * We need to remove them after the callstacks were dumped, which currently
139 // happens after the allocations are dumped.
140 // * This way, we do not destroy and recreate callstacks as frequently.
141 for (auto it_and_alloc : dead_callstack_allocations_) {
142 auto& it = it_and_alloc.first;
143 uint64_t allocated = it_and_alloc.second;
144 const CallstackAllocations& alloc = it->second;
145 if (alloc.allocs == 0 && alloc.allocation_count == allocated)
146 callstack_allocations_.erase(it);
147 }
148 dead_callstack_allocations_.clear();
149
150 if (dump_state->currently_written() > kPacketSizeThreshold)
151 dump_state->NewProfilePacket();
152
153 ProfilePacket::ProcessHeapSamples* proto =
154 dump_state->current_profile_packet->add_process_dumps();
155 fill_process_header(proto);
156 proto->set_timestamp(committed_timestamp_);
157 for (auto it = callstack_allocations_.begin();
158 it != callstack_allocations_.end(); ++it) {
159 if (dump_state->currently_written() > kPacketSizeThreshold) {
160 dump_state->NewProfilePacket();
161 proto = dump_state->current_profile_packet->add_process_dumps();
162 fill_process_header(proto);
163 proto->set_timestamp(committed_timestamp_);
164 }
165
166 const CallstackAllocations& alloc = it->second;
167 dump_state->callstacks_to_dump.emplace(alloc.node);
168 ProfilePacket::HeapSample* sample = proto->add_samples();
169 sample->set_callstack_id(alloc.node->id());
170 sample->set_self_allocated(alloc.allocated);
171 sample->set_self_freed(alloc.freed);
172 sample->set_alloc_count(alloc.allocation_count);
173 sample->set_free_count(alloc.free_count);
174
175 if (alloc.allocs == 0)
176 dead_callstack_allocations_.emplace_back(it, alloc.allocation_count);
177 }
178 }
179
GetSizeForTesting(const std::vector<FrameData> & stack)180 uint64_t HeapTracker::GetSizeForTesting(const std::vector<FrameData>& stack) {
181 GlobalCallstackTrie::Node* node = callsites_->CreateCallsite(stack);
182 // Hack to make it go away again if it wasn't used before.
183 // This is only good because this is used for testing only.
184 GlobalCallstackTrie::IncrementNode(node);
185 GlobalCallstackTrie::DecrementNode(node);
186 auto it = callstack_allocations_.find(node);
187 if (it == callstack_allocations_.end()) {
188 return 0;
189 }
190 const CallstackAllocations& alloc = it->second;
191 return alloc.allocated - alloc.freed;
192 }
193
BuildCallstack(const Node * node) const194 std::vector<Interned<Frame>> GlobalCallstackTrie::BuildCallstack(
195 const Node* node) const {
196 std::vector<Interned<Frame>> res;
197 while (node != &root_) {
198 res.emplace_back(node->location_);
199 node = node->parent_;
200 }
201 return res;
202 }
203
CreateCallsite(const std::vector<FrameData> & callstack)204 GlobalCallstackTrie::Node* GlobalCallstackTrie::CreateCallsite(
205 const std::vector<FrameData>& callstack) {
206 Node* node = &root_;
207 for (const FrameData& loc : callstack) {
208 node = node->GetOrCreateChild(InternCodeLocation(loc));
209 }
210 return node;
211 }
212
IncrementNode(Node * node)213 void GlobalCallstackTrie::IncrementNode(Node* node) {
214 while (node != nullptr) {
215 node->ref_count_ += 1;
216 node = node->parent_;
217 }
218 }
219
DecrementNode(Node * node)220 void GlobalCallstackTrie::DecrementNode(Node* node) {
221 PERFETTO_DCHECK(node->ref_count_ >= 1);
222
223 bool delete_prev = false;
224 Node* prev = nullptr;
225 while (node != nullptr) {
226 if (delete_prev)
227 node->children_.Remove(*prev);
228 node->ref_count_ -= 1;
229 delete_prev = node->ref_count_ == 0;
230 prev = node;
231 node = node->parent_;
232 }
233 }
234
InternCodeLocation(const FrameData & loc)235 Interned<Frame> GlobalCallstackTrie::InternCodeLocation(const FrameData& loc) {
236 Mapping map(string_interner_.Intern(loc.build_id));
237 map.offset = loc.frame.map_elf_start_offset;
238 map.start = loc.frame.map_start;
239 map.end = loc.frame.map_end;
240 map.load_bias = loc.frame.map_load_bias;
241 base::StringSplitter sp(loc.frame.map_name, '/');
242 while (sp.Next())
243 map.path_components.emplace_back(string_interner_.Intern(sp.cur_token()));
244
245 Frame frame(mapping_interner_.Intern(std::move(map)),
246 string_interner_.Intern(loc.frame.function_name),
247 loc.frame.rel_pc);
248
249 return frame_interner_.Intern(frame);
250 }
251
MakeRootFrame()252 Interned<Frame> GlobalCallstackTrie::MakeRootFrame() {
253 Mapping map(string_interner_.Intern(""));
254
255 Frame frame(mapping_interner_.Intern(std::move(map)),
256 string_interner_.Intern(""), 0);
257
258 return frame_interner_.Intern(frame);
259 }
260
WriteMap(const Interned<Mapping> map)261 void DumpState::WriteMap(const Interned<Mapping> map) {
262 auto map_it_and_inserted = dumped_mappings.emplace(map.id());
263 if (map_it_and_inserted.second) {
264 for (const Interned<std::string>& str : map->path_components)
265 WriteString(str);
266
267 WriteString(map->build_id);
268
269 if (currently_written() > kPacketSizeThreshold)
270 NewProfilePacket();
271
272 auto mapping = current_profile_packet->add_mappings();
273 mapping->set_id(map.id());
274 mapping->set_offset(map->offset);
275 mapping->set_start(map->start);
276 mapping->set_end(map->end);
277 mapping->set_load_bias(map->load_bias);
278 mapping->set_build_id(map->build_id.id());
279 for (const Interned<std::string>& str : map->path_components)
280 mapping->add_path_string_ids(str.id());
281 }
282 }
283
WriteFrame(Interned<Frame> frame)284 void DumpState::WriteFrame(Interned<Frame> frame) {
285 WriteMap(frame->mapping);
286 WriteString(frame->function_name);
287 bool inserted;
288 std::tie(std::ignore, inserted) = dumped_frames.emplace(frame.id());
289 if (inserted) {
290 if (currently_written() > kPacketSizeThreshold)
291 NewProfilePacket();
292
293 auto frame_proto = current_profile_packet->add_frames();
294 frame_proto->set_id(frame.id());
295 frame_proto->set_function_name_id(frame->function_name.id());
296 frame_proto->set_mapping_id(frame->mapping.id());
297 frame_proto->set_rel_pc(frame->rel_pc);
298 }
299 }
300
WriteString(const Interned<std::string> & str)301 void DumpState::WriteString(const Interned<std::string>& str) {
302 bool inserted;
303 std::tie(std::ignore, inserted) = dumped_strings.emplace(str.id());
304 if (inserted) {
305 if (currently_written() > kPacketSizeThreshold)
306 NewProfilePacket();
307
308 auto interned_string = current_profile_packet->add_strings();
309 interned_string->set_id(str.id());
310 interned_string->set_str(reinterpret_cast<const uint8_t*>(str->c_str()),
311 str->size());
312 }
313 }
314
315 } // namespace profiling
316 } // namespace perfetto
317