1 // Copyright 2018 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef BASE_SAMPLING_HEAP_PROFILER_POISSON_ALLOCATION_SAMPLER_H_
6 #define BASE_SAMPLING_HEAP_PROFILER_POISSON_ALLOCATION_SAMPLER_H_
7
8 #include <atomic>
9 #include <vector>
10
11 #include "base/allocator/buildflags.h"
12 #include "base/allocator/dispatcher/reentry_guard.h"
13 #include "base/allocator/dispatcher/subsystem.h"
14 #include "base/base_export.h"
15 #include "base/compiler_specific.h"
16 #include "base/gtest_prod_util.h"
17 #include "base/no_destructor.h"
18 #include "base/sampling_heap_profiler/lock_free_address_hash_set.h"
19 #include "base/synchronization/lock.h"
20 #include "base/thread_annotations.h"
21
22 namespace heap_profiling {
23 class HeapProfilerControllerTest;
24 }
25
26 namespace base {
27
28 class SamplingHeapProfilerTest;
29
30 // This singleton class implements Poisson sampling of the incoming allocations
31 // stream. It hooks onto base::allocator and base::PartitionAlloc.
32 // The only control parameter is sampling interval that controls average value
33 // of the sampling intervals. The actual intervals between samples are
34 // randomized using Poisson distribution to mitigate patterns in the allocation
35 // stream.
36 // Once accumulated allocation sizes fill up the current sample interval,
37 // a sample is generated and sent to the observers via |SampleAdded| call.
38 // When the corresponding memory that triggered the sample is freed observers
39 // get notified with |SampleRemoved| call.
40 //
41 class BASE_EXPORT PoissonAllocationSampler {
42 public:
43 class SamplesObserver {
44 public:
45 virtual ~SamplesObserver() = default;
46 virtual void SampleAdded(
47 void* address,
48 size_t size,
49 size_t total,
50 base::allocator::dispatcher::AllocationSubsystem type,
51 const char* context) = 0;
52 virtual void SampleRemoved(void* address) = 0;
53 };
54
55 // An instance of this class makes the sampler not report samples generated
56 // within the object scope for the current thread.
57 // It allows observers to allocate/deallocate memory while holding a lock
58 // without a chance to get into reentrancy problems.
59 // The current implementation doesn't support ScopedMuteThreadSamples nesting.
60 class BASE_EXPORT ScopedMuteThreadSamples {
61 public:
62 ScopedMuteThreadSamples();
63 ~ScopedMuteThreadSamples();
64
65 ScopedMuteThreadSamples(const ScopedMuteThreadSamples&) = delete;
66 ScopedMuteThreadSamples& operator=(const ScopedMuteThreadSamples&) = delete;
67
68 static bool IsMuted();
69 };
70
71 // An instance of this class makes the sampler behave deterministically to
72 // ensure test results are repeatable. Does not support nesting.
73 class BASE_EXPORT ScopedSuppressRandomnessForTesting {
74 public:
75 ScopedSuppressRandomnessForTesting();
76 ~ScopedSuppressRandomnessForTesting();
77
78 ScopedSuppressRandomnessForTesting(
79 const ScopedSuppressRandomnessForTesting&) = delete;
80 ScopedSuppressRandomnessForTesting& operator=(
81 const ScopedSuppressRandomnessForTesting&) = delete;
82
83 static bool IsSuppressed();
84 };
85
86 // Must be called early during the process initialization. It creates and
87 // reserves a TLS slot.
88 static void Init();
89
90 void AddSamplesObserver(SamplesObserver*);
91
92 // Note: After an observer is removed it is still possible to receive
93 // a notification to that observer. This is not a problem currently as
94 // the only client of this interface is the base::SamplingHeapProfiler,
95 // which is a singleton.
96 // If there's a need for this functionality in the future, one might
97 // want to put observers notification loop under a reader-writer lock.
98 void RemoveSamplesObserver(SamplesObserver*);
99
100 // Sets the mean number of bytes that will be allocated before taking a
101 // sample.
102 void SetSamplingInterval(size_t sampling_interval_bytes);
103
104 // Returns the current mean sampling interval, in bytes.
105 size_t SamplingInterval() const;
106
107 #if !BUILDFLAG(USE_ALLOCATION_EVENT_DISPATCHER)
108 ALWAYS_INLINE static void RecordAlloc(
109 void* address,
110 size_t,
111 base::allocator::dispatcher::AllocationSubsystem,
112 const char* context);
113 ALWAYS_INLINE static void RecordFree(void* address);
114 #endif
115
116 ALWAYS_INLINE void OnAllocation(
117 void* address,
118 size_t,
119 base::allocator::dispatcher::AllocationSubsystem,
120 const char* context);
121 ALWAYS_INLINE void OnFree(void* address);
122
123 static PoissonAllocationSampler* Get();
124
125 PoissonAllocationSampler(const PoissonAllocationSampler&) = delete;
126 PoissonAllocationSampler& operator=(const PoissonAllocationSampler&) = delete;
127
128 // Returns true if a ScopedMuteHookedSamplesForTesting exists. Only friends
129 // can create a ScopedMuteHookedSamplesForTesting but anyone can check the
130 // status of this. This can be read from any thread.
AreHookedSamplesMuted()131 static bool AreHookedSamplesMuted() {
132 return profiling_state_.load(std::memory_order_relaxed) &
133 ProfilingStateFlag::kHookedSamplesMutedForTesting;
134 }
135
136 private:
137 // Flags recording the state of the profiler. This does not use enum class so
138 // flags can be used in a bitmask.
139 enum ProfilingStateFlag {
140 // Set if profiling has ever been started in this session of Chrome. Once
141 // this is set, it is never reset. This is used to optimize the common case
142 // where profiling is never used.
143 kWasStarted = 1 << 0,
144 // Set if profiling is currently running. This flag is toggled on and off
145 // as sample observers are added and removed.
146 kIsRunning = 1 << 1,
147 // Set if a ScopedMuteHookedSamplesForTesting object exists.
148 kHookedSamplesMutedForTesting = 1 << 2,
149 };
150 using ProfilingStateFlagMask = int;
151
152 // An instance of this class makes the sampler only report samples with
153 // AllocatorType kManualForTesting, not those from hooked allocators. This
154 // allows unit tests to set test expectations based on only explicit calls to
155 // RecordAlloc and RecordFree.
156 //
157 // The accumulated bytes on the thread that creates a
158 // ScopedMuteHookedSamplesForTesting will also be reset to 0, and restored
159 // when the object leaves scope. This gives tests a known state to start
160 // recording samples on one thread: a full interval must pass to record a
161 // sample. Other threads will still have a random number of accumulated bytes.
162 //
163 // Only one instance may exist at a time.
164 class BASE_EXPORT ScopedMuteHookedSamplesForTesting {
165 public:
166 ScopedMuteHookedSamplesForTesting();
167 ~ScopedMuteHookedSamplesForTesting();
168
169 ScopedMuteHookedSamplesForTesting(
170 const ScopedMuteHookedSamplesForTesting&) = delete;
171 ScopedMuteHookedSamplesForTesting& operator=(
172 const ScopedMuteHookedSamplesForTesting&) = delete;
173
174 private:
175 intptr_t accumulated_bytes_snapshot_;
176 };
177
178 PoissonAllocationSampler();
179 ~PoissonAllocationSampler() = delete;
180
181 static size_t GetNextSampleInterval(size_t base_interval);
182
183 // Return the set of sampled addresses. This is only valid to call after
184 // Init().
185 static LockFreeAddressHashSet& sampled_addresses_set();
186
187 // Atomically adds `flag` to `profiling_state_`. DCHECK's if it was already
188 // set. If `flag` is kIsRunning, also sets kWasStarted. Uses
189 // std::memory_order_relaxed semantics and therefore doesn't synchronize the
190 // state of any other memory with future readers. (See the comment in
191 // RecordFree() for why this is safe.)
192 static void SetProfilingStateFlag(ProfilingStateFlag flag);
193
194 // Atomically removes `flag` from `profiling_state_`. DCHECK's if it was not
195 // already set. Uses std::memory_order_relaxed semantics and therefore doesn't
196 // synchronize the state of any other memory with future readers. (See the
197 // comment in RecordFree() for why this is safe.)
198 static void ResetProfilingStateFlag(ProfilingStateFlag flag);
199
200 void DoRecordAllocation(const ProfilingStateFlagMask state,
201 void* address,
202 size_t size,
203 base::allocator::dispatcher::AllocationSubsystem type,
204 const char* context);
205 void DoRecordFree(void* address);
206
207 void BalanceAddressesHashSet();
208
209 Lock mutex_;
210
211 // The |observers_| list is guarded by |mutex_|, however a copy of it
212 // is made before invoking the observers (to avoid performing expensive
213 // operations under the lock) as such the SamplesObservers themselves need
214 // to be thread-safe and support being invoked racily after
215 // RemoveSamplesObserver().
216 std::vector<SamplesObserver*> observers_ GUARDED_BY(mutex_);
217
218 #if !BUILDFLAG(USE_ALLOCATION_EVENT_DISPATCHER)
219 static PoissonAllocationSampler* instance_;
220 #endif
221
222 // Fast, thread-safe access to the current profiling state.
223 static std::atomic<ProfilingStateFlagMask> profiling_state_;
224
225 friend class heap_profiling::HeapProfilerControllerTest;
226 friend class NoDestructor<PoissonAllocationSampler>;
227 friend class PoissonAllocationSamplerStateTest;
228 friend class SamplingHeapProfilerTest;
229 FRIEND_TEST_ALL_PREFIXES(PoissonAllocationSamplerTest, MuteHooksWithoutInit);
230 FRIEND_TEST_ALL_PREFIXES(SamplingHeapProfilerTest, HookedAllocatorMuted);
231 };
232
233 #if !BUILDFLAG(USE_ALLOCATION_EVENT_DISPATCHER)
234 // static
RecordAlloc(void * address,size_t size,base::allocator::dispatcher::AllocationSubsystem type,const char * context)235 ALWAYS_INLINE void PoissonAllocationSampler::RecordAlloc(
236 void* address,
237 size_t size,
238 base::allocator::dispatcher::AllocationSubsystem type,
239 const char* context) {
240 instance_->OnAllocation(address, size, type, context);
241 }
242
243 // static
RecordFree(void * address)244 ALWAYS_INLINE void PoissonAllocationSampler::RecordFree(void* address) {
245 instance_->OnFree(address);
246 }
247 #endif
248
OnAllocation(void * address,size_t size,base::allocator::dispatcher::AllocationSubsystem type,const char * context)249 ALWAYS_INLINE void PoissonAllocationSampler::OnAllocation(
250 void* address,
251 size_t size,
252 base::allocator::dispatcher::AllocationSubsystem type,
253 const char* context) {
254 // The allocation hooks may be installed before the sampler is started. Check
255 // if its ever been started first to avoid extra work on the fast path,
256 // because it's the most common case.
257 const ProfilingStateFlagMask state =
258 profiling_state_.load(std::memory_order_relaxed);
259 if (LIKELY(!(state & ProfilingStateFlag::kWasStarted))) {
260 return;
261 }
262
263 // When sampling is muted for testing, only handle manual calls to
264 // RecordAlloc. (This doesn't need to be checked in RecordFree because muted
265 // allocations won't be added to sampled_addresses_set(), so RecordFree
266 // already skips them.)
267 if (UNLIKELY((state & ProfilingStateFlag::kHookedSamplesMutedForTesting) &&
268 type != base::allocator::dispatcher::AllocationSubsystem::
269 kManualForTesting)) {
270 return;
271 }
272
273 // Note: ReentryGuard prevents from recursions introduced by malloc and
274 // initialization of thread local storage which happen in the allocation path
275 // only (please see docs of ReentryGuard for full details).
276 allocator::dispatcher::ReentryGuard reentry_guard;
277
278 if (UNLIKELY(!reentry_guard)) {
279 return;
280 }
281
282 DoRecordAllocation(state, address, size, type, context);
283 }
284
OnFree(void * address)285 ALWAYS_INLINE void PoissonAllocationSampler::OnFree(void* address) {
286 // The allocation hooks may be installed before the sampler is started. Check
287 // if its ever been started first to avoid extra work on the fast path,
288 // because it's the most common case. Note that DoRecordFree still needs to be
289 // called if the sampler was started but is now stopped, to track allocations
290 // that were recorded while the sampler was still running.
291 //
292 // Relaxed ordering is safe here because there's only one case where
293 // RecordAlloc and RecordFree MUST see the same value of `profiling_state_`.
294 // Assume thread A updates `profiling_state_` from 0 to kWasStarted |
295 // kIsRunning, thread B calls RecordAlloc, and thread C calls RecordFree.
296 // (Something else could update `profiling_state_` to remove kIsRunning before
297 // RecordAlloc or RecordFree.)
298 //
299 // 1. If RecordAlloc(p) sees !kWasStarted or !kIsRunning it will return
300 // immediately, so p won't be in sampled_address_set(). So no matter what
301 // RecordFree(p) sees it will also return immediately.
302 //
303 // 2. If RecordFree() is called with a pointer that was never passed to
304 // RecordAlloc(), again it will return immediately no matter what it sees.
305 //
306 // 3. If RecordAlloc(p) sees kIsRunning it will put p in
307 // sampled_address_set(). In this case RecordFree(p) MUST see !kWasStarted
308 // or it will return without removing p:
309 //
310 // 3a. If the program got p as the return value from malloc() and passed it
311 // to free(), then RecordFree() happens-after RecordAlloc() and
312 // therefore will see the same value of `profiling_state_` as
313 // RecordAlloc() for all memory orders. (Proof: using the definitions
314 // of sequenced-after, happens-after and inter-thread happens-after
315 // from https://en.cppreference.com/w/cpp/atomic/memory_order, malloc()
316 // calls RecordAlloc() so its return is sequenced-after RecordAlloc();
317 // free() inter-thread happens-after malloc's return because it
318 // consumes the result; RecordFree() is sequenced-after its caller,
319 // free(); therefore RecordFree() interthread happens-after
320 // RecordAlloc().)
321 // 3b. If the program is freeing a random pointer which coincidentally was
322 // also returned from malloc(), such that free(p) does not happen-after
323 // malloc(), then there is already an unavoidable race condition. If
324 // the profiler sees malloc() before free(p), then it will add p to
325 // sampled_addresses_set() and then remove it; otherwise it will do
326 // nothing in RecordFree() and add p to sampled_addresses_set() in
327 // RecordAlloc(), recording a potential leak. Reading
328 // `profiling_state_` with relaxed ordering adds another possibility:
329 // if the profiler sees malloc() with kWasStarted and then free without
330 // kWasStarted, it will add p to sampled_addresses_set() in
331 // RecordAlloc() and then do nothing in RecordFree(). This has the same
332 // outcome as the existing race.
333 const ProfilingStateFlagMask state =
334 profiling_state_.load(std::memory_order_relaxed);
335 if (LIKELY(!(state & ProfilingStateFlag::kWasStarted))) {
336 return;
337 }
338 if (UNLIKELY(address == nullptr)) {
339 return;
340 }
341 if (LIKELY(!sampled_addresses_set().Contains(address))) {
342 return;
343 }
344 if (UNLIKELY(ScopedMuteThreadSamples::IsMuted())) {
345 return;
346 }
347
348 // Note: ReentryGuard prevents from recursions introduced by malloc and
349 // initialization of thread local storage which happen in the allocation path
350 // only (please see docs of ReentryGuard for full details). Therefore, the
351 // DoNotifyFree doesn't need to be guarded.
352
353 DoRecordFree(address);
354 }
355
356 } // namespace base
357
358 #endif // BASE_SAMPLING_HEAP_PROFILER_POISSON_ALLOCATION_SAMPLER_H_
359