1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/sampling_heap_profiler/sampling_heap_profiler.h"
6
7 #include <algorithm>
8 #include <cmath>
9 #include <utility>
10
11 #include "base/allocator/allocator_shim.h"
12 #include "base/allocator/buildflags.h"
13 #include "base/allocator/partition_allocator/partition_alloc.h"
14 #include "base/atomicops.h"
15 #include "base/debug/stack_trace.h"
16 #include "base/macros.h"
17 #include "base/no_destructor.h"
18 #include "base/partition_alloc_buildflags.h"
19 #include "base/rand_util.h"
20 #include "base/sampling_heap_profiler/lock_free_address_hash_set.h"
21 #include "base/threading/thread_local_storage.h"
22 #include "build/build_config.h"
23
24 #if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \
25 defined(OFFICIAL_BUILD)
26 #include "base/trace_event/cfi_backtrace_android.h"
27 #endif
28
29 namespace base {
30
31 using base::allocator::AllocatorDispatch;
32 using base::subtle::Atomic32;
33 using base::subtle::AtomicWord;
34
35 namespace {
36
37 // Control how many top frames to skip when recording call stack.
38 // These frames correspond to the profiler own frames.
39 const uint32_t kSkipBaseAllocatorFrames = 2;
40
41 const size_t kDefaultSamplingIntervalBytes = 128 * 1024;
42
43 // Controls if sample intervals should not be randomized. Used for testing.
44 bool g_deterministic;
45
46 // A positive value if profiling is running, otherwise it's zero.
47 Atomic32 g_running;
48
49 // Pointer to the current |LockFreeAddressHashSet|.
50 AtomicWord g_sampled_addresses_set;
51
52 // Sampling interval parameter, the mean value for intervals between samples.
53 AtomicWord g_sampling_interval = kDefaultSamplingIntervalBytes;
54
55 void (*g_hooks_install_callback)();
56 Atomic32 g_hooks_installed;
57
AllocFn(const AllocatorDispatch * self,size_t size,void * context)58 void* AllocFn(const AllocatorDispatch* self, size_t size, void* context) {
59 void* address = self->next->alloc_function(self->next, size, context);
60 SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames);
61 return address;
62 }
63
AllocZeroInitializedFn(const AllocatorDispatch * self,size_t n,size_t size,void * context)64 void* AllocZeroInitializedFn(const AllocatorDispatch* self,
65 size_t n,
66 size_t size,
67 void* context) {
68 void* address =
69 self->next->alloc_zero_initialized_function(self->next, n, size, context);
70 SamplingHeapProfiler::RecordAlloc(address, n * size,
71 kSkipBaseAllocatorFrames);
72 return address;
73 }
74
AllocAlignedFn(const AllocatorDispatch * self,size_t alignment,size_t size,void * context)75 void* AllocAlignedFn(const AllocatorDispatch* self,
76 size_t alignment,
77 size_t size,
78 void* context) {
79 void* address =
80 self->next->alloc_aligned_function(self->next, alignment, size, context);
81 SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames);
82 return address;
83 }
84
ReallocFn(const AllocatorDispatch * self,void * address,size_t size,void * context)85 void* ReallocFn(const AllocatorDispatch* self,
86 void* address,
87 size_t size,
88 void* context) {
89 // Note: size == 0 actually performs free.
90 SamplingHeapProfiler::RecordFree(address);
91 address = self->next->realloc_function(self->next, address, size, context);
92 SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames);
93 return address;
94 }
95
FreeFn(const AllocatorDispatch * self,void * address,void * context)96 void FreeFn(const AllocatorDispatch* self, void* address, void* context) {
97 SamplingHeapProfiler::RecordFree(address);
98 self->next->free_function(self->next, address, context);
99 }
100
GetSizeEstimateFn(const AllocatorDispatch * self,void * address,void * context)101 size_t GetSizeEstimateFn(const AllocatorDispatch* self,
102 void* address,
103 void* context) {
104 return self->next->get_size_estimate_function(self->next, address, context);
105 }
106
BatchMallocFn(const AllocatorDispatch * self,size_t size,void ** results,unsigned num_requested,void * context)107 unsigned BatchMallocFn(const AllocatorDispatch* self,
108 size_t size,
109 void** results,
110 unsigned num_requested,
111 void* context) {
112 unsigned num_allocated = self->next->batch_malloc_function(
113 self->next, size, results, num_requested, context);
114 for (unsigned i = 0; i < num_allocated; ++i) {
115 SamplingHeapProfiler::RecordAlloc(results[i], size,
116 kSkipBaseAllocatorFrames);
117 }
118 return num_allocated;
119 }
120
BatchFreeFn(const AllocatorDispatch * self,void ** to_be_freed,unsigned num_to_be_freed,void * context)121 void BatchFreeFn(const AllocatorDispatch* self,
122 void** to_be_freed,
123 unsigned num_to_be_freed,
124 void* context) {
125 for (unsigned i = 0; i < num_to_be_freed; ++i)
126 SamplingHeapProfiler::RecordFree(to_be_freed[i]);
127 self->next->batch_free_function(self->next, to_be_freed, num_to_be_freed,
128 context);
129 }
130
FreeDefiniteSizeFn(const AllocatorDispatch * self,void * address,size_t size,void * context)131 void FreeDefiniteSizeFn(const AllocatorDispatch* self,
132 void* address,
133 size_t size,
134 void* context) {
135 SamplingHeapProfiler::RecordFree(address);
136 self->next->free_definite_size_function(self->next, address, size, context);
137 }
138
139 AllocatorDispatch g_allocator_dispatch = {&AllocFn,
140 &AllocZeroInitializedFn,
141 &AllocAlignedFn,
142 &ReallocFn,
143 &FreeFn,
144 &GetSizeEstimateFn,
145 &BatchMallocFn,
146 &BatchFreeFn,
147 &FreeDefiniteSizeFn,
148 nullptr};
149
150 #if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
151
PartitionAllocHook(void * address,size_t size,const char *)152 void PartitionAllocHook(void* address, size_t size, const char*) {
153 SamplingHeapProfiler::RecordAlloc(address, size);
154 }
155
PartitionFreeHook(void * address)156 void PartitionFreeHook(void* address) {
157 SamplingHeapProfiler::RecordFree(address);
158 }
159
160 #endif // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
161
AccumulatedBytesTLS()162 ThreadLocalStorage::Slot& AccumulatedBytesTLS() {
163 static base::NoDestructor<base::ThreadLocalStorage::Slot>
164 accumulated_bytes_tls;
165 return *accumulated_bytes_tls;
166 }
167
168 } // namespace
169
Sample(size_t size,size_t total,uint32_t ordinal)170 SamplingHeapProfiler::Sample::Sample(size_t size,
171 size_t total,
172 uint32_t ordinal)
173 : size(size), total(total), ordinal(ordinal) {}
174
175 SamplingHeapProfiler::Sample::Sample(const Sample&) = default;
176
177 SamplingHeapProfiler::Sample::~Sample() = default;
178
179 SamplingHeapProfiler* SamplingHeapProfiler::instance_;
180
SamplingHeapProfiler()181 SamplingHeapProfiler::SamplingHeapProfiler() {
182 instance_ = this;
183 auto sampled_addresses = std::make_unique<LockFreeAddressHashSet>(64);
184 base::subtle::NoBarrier_Store(
185 &g_sampled_addresses_set,
186 reinterpret_cast<AtomicWord>(sampled_addresses.get()));
187 sampled_addresses_stack_.push(std::move(sampled_addresses));
188 }
189
190 // static
InitTLSSlot()191 void SamplingHeapProfiler::InitTLSSlot() {
192 // Preallocate the TLS slot early, so it can't cause reentracy issues
193 // when sampling is started.
194 ignore_result(AccumulatedBytesTLS().Get());
195 }
196
197 // static
InstallAllocatorHooksOnce()198 void SamplingHeapProfiler::InstallAllocatorHooksOnce() {
199 static bool hook_installed = InstallAllocatorHooks();
200 ignore_result(hook_installed);
201 }
202
203 // static
InstallAllocatorHooks()204 bool SamplingHeapProfiler::InstallAllocatorHooks() {
205 #if BUILDFLAG(USE_ALLOCATOR_SHIM)
206 base::allocator::InsertAllocatorDispatch(&g_allocator_dispatch);
207 #else
208 ignore_result(g_allocator_dispatch);
209 DLOG(WARNING)
210 << "base::allocator shims are not available for memory sampling.";
211 #endif // BUILDFLAG(USE_ALLOCATOR_SHIM)
212
213 #if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
214 base::PartitionAllocHooks::SetAllocationHook(&PartitionAllocHook);
215 base::PartitionAllocHooks::SetFreeHook(&PartitionFreeHook);
216 #endif // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
217
218 int32_t hooks_install_callback_has_been_set =
219 base::subtle::Acquire_CompareAndSwap(&g_hooks_installed, 0, 1);
220 if (hooks_install_callback_has_been_set)
221 g_hooks_install_callback();
222
223 return true;
224 }
225
226 // static
SetHooksInstallCallback(void (* hooks_install_callback)())227 void SamplingHeapProfiler::SetHooksInstallCallback(
228 void (*hooks_install_callback)()) {
229 CHECK(!g_hooks_install_callback && hooks_install_callback);
230 g_hooks_install_callback = hooks_install_callback;
231
232 int32_t profiler_has_already_been_initialized =
233 base::subtle::Release_CompareAndSwap(&g_hooks_installed, 0, 1);
234 if (profiler_has_already_been_initialized)
235 g_hooks_install_callback();
236 }
237
Start()238 uint32_t SamplingHeapProfiler::Start() {
239 #if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \
240 defined(OFFICIAL_BUILD)
241 if (!base::trace_event::CFIBacktraceAndroid::GetInitializedInstance()
242 ->can_unwind_stack_frames()) {
243 LOG(WARNING) << "Sampling heap profiler: Stack unwinding is not available.";
244 return 0;
245 }
246 #endif
247 InstallAllocatorHooksOnce();
248 base::subtle::Barrier_AtomicIncrement(&g_running, 1);
249 return last_sample_ordinal_;
250 }
251
Stop()252 void SamplingHeapProfiler::Stop() {
253 AtomicWord count = base::subtle::Barrier_AtomicIncrement(&g_running, -1);
254 CHECK_GE(count, 0);
255 }
256
SetSamplingInterval(size_t sampling_interval)257 void SamplingHeapProfiler::SetSamplingInterval(size_t sampling_interval) {
258 // TODO(alph): Reset the sample being collected if running.
259 base::subtle::Release_Store(&g_sampling_interval,
260 static_cast<AtomicWord>(sampling_interval));
261 }
262
263 // static
GetNextSampleInterval(size_t interval)264 size_t SamplingHeapProfiler::GetNextSampleInterval(size_t interval) {
265 if (UNLIKELY(g_deterministic))
266 return interval;
267
268 // We sample with a Poisson process, with constant average sampling
269 // interval. This follows the exponential probability distribution with
270 // parameter λ = 1/interval where |interval| is the average number of bytes
271 // between samples.
272 // Let u be a uniformly distributed random number between 0 and 1, then
273 // next_sample = -ln(u) / λ
274 double uniform = base::RandDouble();
275 double value = -log(uniform) * interval;
276 size_t min_value = sizeof(intptr_t);
277 // We limit the upper bound of a sample interval to make sure we don't have
278 // huge gaps in the sampling stream. Probability of the upper bound gets hit
279 // is exp(-20) ~ 2e-9, so it should not skew the distibution.
280 size_t max_value = interval * 20;
281 if (UNLIKELY(value < min_value))
282 return min_value;
283 if (UNLIKELY(value > max_value))
284 return max_value;
285 return static_cast<size_t>(value);
286 }
287
288 // static
RecordAlloc(void * address,size_t size,uint32_t skip_frames)289 void SamplingHeapProfiler::RecordAlloc(void* address,
290 size_t size,
291 uint32_t skip_frames) {
292 if (UNLIKELY(!base::subtle::NoBarrier_Load(&g_running)))
293 return;
294 if (UNLIKELY(base::ThreadLocalStorage::HasBeenDestroyed()))
295 return;
296
297 // TODO(alph): On MacOS it may call the hook several times for a single
298 // allocation. Handle the case.
299
300 intptr_t accumulated_bytes =
301 reinterpret_cast<intptr_t>(AccumulatedBytesTLS().Get());
302 accumulated_bytes += size;
303 if (LIKELY(accumulated_bytes < 0)) {
304 AccumulatedBytesTLS().Set(reinterpret_cast<void*>(accumulated_bytes));
305 return;
306 }
307
308 size_t mean_interval = base::subtle::NoBarrier_Load(&g_sampling_interval);
309 size_t samples = accumulated_bytes / mean_interval;
310 accumulated_bytes %= mean_interval;
311
312 do {
313 accumulated_bytes -= GetNextSampleInterval(mean_interval);
314 ++samples;
315 } while (accumulated_bytes >= 0);
316
317 AccumulatedBytesTLS().Set(reinterpret_cast<void*>(accumulated_bytes));
318
319 instance_->DoRecordAlloc(samples * mean_interval, size, address, skip_frames);
320 }
321
RecordStackTrace(Sample * sample,uint32_t skip_frames)322 void SamplingHeapProfiler::RecordStackTrace(Sample* sample,
323 uint32_t skip_frames) {
324 #if !defined(OS_NACL)
325 constexpr uint32_t kMaxStackEntries = 256;
326 constexpr uint32_t kSkipProfilerOwnFrames = 2;
327 skip_frames += kSkipProfilerOwnFrames;
328 #if defined(OS_ANDROID) && BUILDFLAG(CAN_UNWIND_WITH_CFI_TABLE) && \
329 defined(OFFICIAL_BUILD)
330 const void* frames[kMaxStackEntries];
331 size_t frame_count =
332 base::trace_event::CFIBacktraceAndroid::GetInitializedInstance()->Unwind(
333 frames, kMaxStackEntries);
334 #elif BUILDFLAG(CAN_UNWIND_WITH_FRAME_POINTERS)
335 const void* frames[kMaxStackEntries];
336 size_t frame_count = base::debug::TraceStackFramePointers(
337 frames, kMaxStackEntries, skip_frames);
338 skip_frames = 0;
339 #else
340 // Fall-back to capturing the stack with base::debug::StackTrace,
341 // which is likely slower, but more reliable.
342 base::debug::StackTrace stack_trace(kMaxStackEntries);
343 size_t frame_count = 0;
344 const void* const* frames = stack_trace.Addresses(&frame_count);
345 #endif
346
347 sample->stack.insert(
348 sample->stack.end(), const_cast<void**>(&frames[skip_frames]),
349 const_cast<void**>(&frames[std::max<size_t>(frame_count, skip_frames)]));
350 #endif
351 }
352
DoRecordAlloc(size_t total_allocated,size_t size,void * address,uint32_t skip_frames)353 void SamplingHeapProfiler::DoRecordAlloc(size_t total_allocated,
354 size_t size,
355 void* address,
356 uint32_t skip_frames) {
357 if (entered_.Get())
358 return;
359 entered_.Set(true);
360 {
361 base::AutoLock lock(mutex_);
362 Sample sample(size, total_allocated, ++last_sample_ordinal_);
363 RecordStackTrace(&sample, skip_frames);
364 for (auto* observer : observers_)
365 observer->SampleAdded(sample.ordinal, size, total_allocated);
366 samples_.emplace(address, std::move(sample));
367 // TODO(alph): Sometimes RecordAlloc is called twice in a row without
368 // a RecordFree in between. Investigate it.
369 if (!sampled_addresses_set().Contains(address))
370 sampled_addresses_set().Insert(address);
371 BalanceAddressesHashSet();
372 }
373 entered_.Set(false);
374 }
375
376 // static
RecordFree(void * address)377 void SamplingHeapProfiler::RecordFree(void* address) {
378 if (UNLIKELY(address == nullptr))
379 return;
380 if (UNLIKELY(sampled_addresses_set().Contains(address)))
381 instance_->DoRecordFree(address);
382 }
383
DoRecordFree(void * address)384 void SamplingHeapProfiler::DoRecordFree(void* address) {
385 if (UNLIKELY(base::ThreadLocalStorage::HasBeenDestroyed()))
386 return;
387 if (entered_.Get())
388 return;
389 entered_.Set(true);
390 {
391 base::AutoLock lock(mutex_);
392 auto it = samples_.find(address);
393 CHECK(it != samples_.end());
394 for (auto* observer : observers_)
395 observer->SampleRemoved(it->second.ordinal);
396 samples_.erase(it);
397 sampled_addresses_set().Remove(address);
398 }
399 entered_.Set(false);
400 }
401
BalanceAddressesHashSet()402 void SamplingHeapProfiler::BalanceAddressesHashSet() {
403 // Check if the load_factor of the current addresses hash set becomes higher
404 // than 1, allocate a new twice larger one, copy all the data,
405 // and switch to using it.
406 // During the copy process no other writes are made to both sets
407 // as it's behind the lock.
408 // All the readers continue to use the old one until the atomic switch
409 // process takes place.
410 LockFreeAddressHashSet& current_set = sampled_addresses_set();
411 if (current_set.load_factor() < 1)
412 return;
413 auto new_set =
414 std::make_unique<LockFreeAddressHashSet>(current_set.buckets_count() * 2);
415 new_set->Copy(current_set);
416 // Atomically switch all the new readers to the new set.
417 base::subtle::Release_Store(&g_sampled_addresses_set,
418 reinterpret_cast<AtomicWord>(new_set.get()));
419 // We still have to keep all the old maps alive to resolve the theoretical
420 // race with readers in |RecordFree| that have already obtained the map,
421 // but haven't yet managed to access it.
422 sampled_addresses_stack_.push(std::move(new_set));
423 }
424
425 // static
sampled_addresses_set()426 LockFreeAddressHashSet& SamplingHeapProfiler::sampled_addresses_set() {
427 return *reinterpret_cast<LockFreeAddressHashSet*>(
428 base::subtle::NoBarrier_Load(&g_sampled_addresses_set));
429 }
430
431 // static
GetInstance()432 SamplingHeapProfiler* SamplingHeapProfiler::GetInstance() {
433 static base::NoDestructor<SamplingHeapProfiler> instance;
434 return instance.get();
435 }
436
437 // static
SuppressRandomnessForTest(bool suppress)438 void SamplingHeapProfiler::SuppressRandomnessForTest(bool suppress) {
439 g_deterministic = suppress;
440 }
441
AddSamplesObserver(SamplesObserver * observer)442 void SamplingHeapProfiler::AddSamplesObserver(SamplesObserver* observer) {
443 CHECK(!entered_.Get());
444 entered_.Set(true);
445 {
446 base::AutoLock lock(mutex_);
447 observers_.push_back(observer);
448 }
449 entered_.Set(false);
450 }
451
RemoveSamplesObserver(SamplesObserver * observer)452 void SamplingHeapProfiler::RemoveSamplesObserver(SamplesObserver* observer) {
453 CHECK(!entered_.Get());
454 entered_.Set(true);
455 {
456 base::AutoLock lock(mutex_);
457 auto it = std::find(observers_.begin(), observers_.end(), observer);
458 CHECK(it != observers_.end());
459 observers_.erase(it);
460 }
461 entered_.Set(false);
462 }
463
GetSamples(uint32_t profile_id)464 std::vector<SamplingHeapProfiler::Sample> SamplingHeapProfiler::GetSamples(
465 uint32_t profile_id) {
466 CHECK(!entered_.Get());
467 entered_.Set(true);
468 std::vector<Sample> samples;
469 {
470 base::AutoLock lock(mutex_);
471 for (auto& it : samples_) {
472 Sample& sample = it.second;
473 if (sample.ordinal > profile_id)
474 samples.push_back(sample);
475 }
476 }
477 entered_.Set(false);
478 return samples;
479 }
480
481 } // namespace base
482