1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/profiler/stack_sampler.h"
6
7 #include <iterator>
8 #include <utility>
9
10 #include "base/check.h"
11 #include "base/compiler_specific.h"
12 #include "base/memory/ptr_util.h"
13 #include "base/memory/stack_allocated.h"
14 #include "base/metrics/histogram_functions.h"
15 #include "base/numerics/safe_conversions.h"
16 #include "base/profiler/metadata_recorder.h"
17 #include "base/profiler/profile_builder.h"
18 #include "base/profiler/sample_metadata.h"
19 #include "base/profiler/stack_buffer.h"
20 #include "base/profiler/stack_copier.h"
21 #include "base/profiler/suspendable_thread_delegate.h"
22 #include "base/profiler/unwinder.h"
23 #include "base/ranges/algorithm.h"
24 #include "base/task/thread_pool.h"
25
26 #if PA_BUILDFLAG(USE_PARTITION_ALLOC)
27 #include "partition_alloc/tagging.h" // nogncheck
28 #endif
29
30 // IMPORTANT NOTE: Some functions within this implementation are invoked while
31 // the target thread is suspended so it must not do any allocation from the
32 // heap, including indirectly via use of DCHECK/CHECK or other logging
33 // statements. Otherwise this code can deadlock on heap locks acquired by the
34 // target thread before it was suspended. These functions are commented with "NO
35 // HEAP ALLOCATIONS".
36
37 namespace base {
38
39 namespace {
40
41 using CallbackRunner = base::RefCountedData<ScopedClosureRunner>;
42
GetUnwinder(const UnwinderCapture & state)43 Unwinder* GetUnwinder(const UnwinderCapture& state) {
44 return std::get<0>(state);
45 }
46
GetStateCapture(const UnwinderCapture & state)47 UnwinderStateCapture* GetStateCapture(const UnwinderCapture& state) {
48 return std::get<1>(state).get();
49 }
50
51 // Notifies the unwinders about the stack capture, and records metadata, while
52 // the thread is suspended.
53 class StackCopierDelegate : public StackCopier::Delegate {
54 STACK_ALLOCATED();
55
56 public:
StackCopierDelegate(const std::vector<UnwinderCapture> * unwinders,ProfileBuilder * profile_builder,MetadataRecorder::MetadataProvider * metadata_provider)57 StackCopierDelegate(const std::vector<UnwinderCapture>* unwinders,
58 ProfileBuilder* profile_builder,
59 MetadataRecorder::MetadataProvider* metadata_provider)
60 : unwinders_(unwinders),
61 profile_builder_(profile_builder),
62 metadata_provider_(metadata_provider) {}
63
64 StackCopierDelegate(const StackCopierDelegate&) = delete;
65 StackCopierDelegate& operator=(const StackCopierDelegate&) = delete;
66
67 // StackCopier::Delegate:
68 // IMPORTANT NOTE: to avoid deadlock this function must not invoke any
69 // non-reentrant code that is also invoked by the target thread. In
70 // particular, it may not perform any heap allocation or deallocation,
71 // including indirectly via use of DCHECK/CHECK or other logging statements.
OnStackCopy()72 void OnStackCopy() override {
73 for (const auto& unwinder : *unwinders_) {
74 GetUnwinder(unwinder)->OnStackCapture(GetStateCapture(unwinder));
75 }
76
77 profile_builder_->RecordMetadata(*metadata_provider_);
78 }
79
80 private:
81 const std::vector<UnwinderCapture>* unwinders_;
82
83 ProfileBuilder* const profile_builder_;
84 const MetadataRecorder::MetadataProvider* const metadata_provider_;
85 };
86
87 } // namespace
88
89 StackSampler::~StackSampler() = default;
90
CreateStackBuffer()91 std::unique_ptr<StackBuffer> StackSampler::CreateStackBuffer() {
92 size_t size = GetStackBufferSize();
93 if (size == 0) {
94 return nullptr;
95 }
96 return std::make_unique<StackBuffer>(size);
97 }
98
Initialize()99 void StackSampler::Initialize() {
100 was_initialized_ = true;
101 unwind_data_->Initialize(std::move(unwinders_factory_).Run());
102 thread_pool_runner_ = base::ThreadPool::CreateSequencedTaskRunner({});
103
104 // The thread pool might not start right away (or it may never start), so we
105 // schedule a job and wait for it to become running before we schedule other
106 // work.
107 thread_pool_runner_->PostTaskAndReply(
108 FROM_HERE, base::DoNothing(),
109 base::BindOnce(&StackSampler::ThreadPoolRunning,
110 weak_ptr_factory_.GetWeakPtr()));
111 }
112
ThreadPoolRunning()113 void StackSampler::ThreadPoolRunning() {
114 thread_pool_ready_ = true;
115 unwind_data_->OnThreadPoolRunning();
116 }
117
Stop(OnceClosure done_callback)118 void StackSampler::Stop(OnceClosure done_callback) {
119 if (thread_pool_ready_) {
120 // Post a task to the sequenced task runner to ensure we've completed any
121 // remaining work. We need to ensure we use a CallbackRunner here
122 // because we want to ensure `done_callback` is called even if
123 // PostTaskAndReply returns false.
124 auto callback_runner = base::MakeRefCounted<CallbackRunner>(
125 ScopedClosureRunner(std::move(done_callback)));
126 bool res = thread_pool_runner_->PostTaskAndReply(
127 FROM_HERE, base::DoNothing(),
128 base::BindOnce([](scoped_refptr<CallbackRunner> runner) {},
129 callback_runner));
130 if (!res) {
131 callback_runner->data.RunAndReset();
132 }
133
134 } else {
135 std::move(done_callback).Run();
136 }
137 }
138
AddAuxUnwinder(std::unique_ptr<Unwinder> unwinder)139 void StackSampler::AddAuxUnwinder(std::unique_ptr<Unwinder> unwinder) {
140 if (thread_pool_ready_) {
141 // If we have initialized a thread pool, then we need the Initialize to
142 // be called on the thread pool since it will manipulate the ModuleCache,
143 // but AddAuxUnwinder needs to happen on the SamplingThread.
144 thread_pool_runner_->PostTaskAndReplyWithResult(
145 FROM_HERE,
146 base::BindOnce(
147 [](StackUnwindData* unwind_data,
148 std::unique_ptr<Unwinder> unwinder) {
149 unwinder->Initialize(unwind_data->module_cache());
150 return unwinder;
151 },
152 base::Unretained(unwind_data_.get()), std::move(unwinder)),
153 base::BindOnce(&StackSampler::AddAuxUnwinderWithoutInit,
154 weak_ptr_factory_.GetWeakPtr()));
155 } else {
156 // Initialize() invokes Initialize() on the unwinders that are present
157 // at the time. If it hasn't occurred yet, we allow it to add the initial
158 // modules, otherwise we do it here.
159 if (was_initialized_) {
160 unwinder->Initialize(unwind_data_->module_cache());
161 }
162 unwind_data_->AddAuxUnwinder(std::move(unwinder));
163 }
164 }
165
AddAuxUnwinderWithoutInit(std::unique_ptr<Unwinder> unwinder)166 void StackSampler::AddAuxUnwinderWithoutInit(
167 std::unique_ptr<Unwinder> unwinder) {
168 unwind_data_->AddAuxUnwinder(std::move(unwinder));
169 }
170
RecordStackFrames(StackBuffer * stack_buffer,PlatformThreadId thread_id,base::OnceClosure done_callback)171 void StackSampler::RecordStackFrames(StackBuffer* stack_buffer,
172 PlatformThreadId thread_id,
173 base::OnceClosure done_callback) {
174 DCHECK(stack_buffer);
175
176 #if PA_BUILDFLAG(USE_PARTITION_ALLOC)
177 // Disable MTE during this function because this function indiscriminately
178 // reads stack frames, some of which belong to system libraries, not Chrome
179 // itself. With stack tagging, some bytes on the stack have MTE tags different
180 // from the stack pointer tag.
181 partition_alloc::SuspendTagCheckingScope suspend_tag_checking_scope;
182 #endif
183
184 if (record_sample_callback_) {
185 record_sample_callback_.Run();
186 }
187
188 RegisterContext thread_context;
189 uintptr_t stack_top;
190 TimeTicks timestamp;
191
192 std::vector<UnwinderCapture> unwinders = unwind_data_->GetUnwinderSnapshot();
193 ProfileBuilder* profile_builder = unwind_data_->profile_builder();
194
195 bool copy_stack_succeeded;
196 {
197 // Make this scope as small as possible because |metadata_provider| is
198 // holding a lock.
199 MetadataRecorder::MetadataProvider metadata_provider(
200 GetSampleMetadataRecorder(), thread_id);
201 StackCopierDelegate delegate(&unwinders, profile_builder,
202 &metadata_provider);
203 copy_stack_succeeded = stack_copier_->CopyStack(
204 stack_buffer, &stack_top, ×tamp, &thread_context, &delegate);
205 }
206 if (!copy_stack_succeeded) {
207 profile_builder->OnSampleCompleted(
208 {}, timestamp.is_null() ? TimeTicks::Now() : timestamp);
209 std::move(done_callback).Run();
210 return;
211 }
212
213 for (const auto& unwinder : unwinders) {
214 GetUnwinder(unwinder)->UpdateModules(GetStateCapture(unwinder));
215 }
216
217 if (test_delegate_) {
218 test_delegate_->OnPreStackWalk();
219 }
220
221 #if BUILDFLAG(IS_CHROMEOS)
222 ptrdiff_t stack_size = reinterpret_cast<uint8_t*>(stack_top) -
223 reinterpret_cast<uint8_t*>(stack_buffer->buffer());
224 constexpr int kBytesPerKilobyte = 1024;
225
226 if ((++stack_size_histogram_sampling_counter_ %
227 kUMAHistogramDownsampleAmount) == 0) {
228 // Record the size of the stack to tune kLargeStackSize.
229 // UmaHistogramMemoryKB has a min of 1000, which isn't useful for our
230 // purposes, so call UmaHistogramCustomCounts directly.
231 // Min is 4KB, since that's the normal pagesize and setting kLargeStackSize
232 // smaller than that would be pointless. Max is 8MB since that's the
233 // current ChromeOS stack size; we shouldn't be able to get a number
234 // larger than that.
235 UmaHistogramCustomCounts(
236 "Memory.StackSamplingProfiler.StackSampleSize2",
237 saturated_cast<int>(stack_size / kBytesPerKilobyte), 4, 8 * 1024, 50);
238 }
239
240 // We expect to very rarely see stacks larger than kLargeStackSize. If we see
241 // a stack larger than kLargeStackSize, we tell the kernel to discard the
242 // contents of the buffer (using madvise(MADV_DONTNEED)) after the first
243 // kLargeStackSize bytes to avoid permanently allocating memory that we won't
244 // use again. We don't want kLargeStackSize to be too small, however; for if
245 // we are constantly calling madvise(MADV_DONTNEED) and then writing to the
246 // same parts of the buffer, we're not saving memory and we'll cause extra
247 // page faults.
248 constexpr ptrdiff_t kLargeStackSize = 32 * kBytesPerKilobyte;
249 if (stack_size > kLargeStackSize) {
250 stack_buffer->MarkUpperBufferContentsAsUnneeded(kLargeStackSize);
251 }
252 #endif // #if BUILDFLAG(IS_CHROMEOS)
253
254 if (thread_pool_ready_) {
255 // Since `stack_buffer` needs to be the maximum stack size and be
256 // preallocated it tends to be much larger than the actual stack size. So we
257 // copy the stack here that is a smaller size before passing it over to the
258 // worker. To allocate a `StackBuffer` for every sample not be good.
259 std::unique_ptr<StackBuffer> cloned_stack =
260 stack_copier_->CloneStack(*stack_buffer, &stack_top, &thread_context);
261 thread_pool_runner_->PostTaskAndReplyWithResult(
262 FROM_HERE,
263 base::BindOnce(
264 [](StackUnwindData* unwind_data,
265 std::vector<UnwinderCapture> unwinders,
266 RegisterContext thread_context,
267 std::unique_ptr<StackBuffer> stack, uintptr_t stack_top) {
268 return WalkStack(unwind_data->module_cache(), &thread_context,
269 stack_top, std::move(unwinders));
270 },
271 base::Unretained(unwind_data_.get()), std::move(unwinders),
272 OwnedRef(thread_context), std::move(cloned_stack), stack_top),
273 base::BindOnce(&StackSampler::UnwindComplete,
274 weak_ptr_factory_.GetWeakPtr(), timestamp,
275 std::move(done_callback)));
276 } else {
277 auto frames = WalkStack(unwind_data_->module_cache(), &thread_context,
278 stack_top, std::move(unwinders));
279 UnwindComplete(timestamp, std::move(done_callback), std::move(frames));
280 }
281 }
282
UnwindComplete(TimeTicks timestamp,OnceClosure done_callback,std::vector<Frame> frames)283 void StackSampler::UnwindComplete(TimeTicks timestamp,
284 OnceClosure done_callback,
285 std::vector<Frame> frames) {
286 unwind_data_->profile_builder()->OnSampleCompleted(std::move(frames),
287 timestamp);
288 std::move(done_callback).Run();
289 }
290
GetStackUnwindData()291 StackUnwindData* StackSampler::GetStackUnwindData() {
292 return unwind_data_.get();
293 }
294
295 // static
WalkStackForTesting(ModuleCache * module_cache,RegisterContext * thread_context,uintptr_t stack_top,std::vector<UnwinderCapture> unwinders)296 std::vector<Frame> StackSampler::WalkStackForTesting(
297 ModuleCache* module_cache,
298 RegisterContext* thread_context,
299 uintptr_t stack_top,
300 std::vector<UnwinderCapture> unwinders) {
301 return WalkStack(module_cache, thread_context, stack_top,
302 std::move(unwinders));
303 }
304
305 // static
CreateForTesting(std::unique_ptr<StackCopier> stack_copier,std::unique_ptr<StackUnwindData> stack_unwind_data,UnwindersFactory core_unwinders_factory,RepeatingClosure record_sample_callback,StackSamplerTestDelegate * test_delegate)306 std::unique_ptr<StackSampler> StackSampler::CreateForTesting(
307 std::unique_ptr<StackCopier> stack_copier,
308 std::unique_ptr<StackUnwindData> stack_unwind_data,
309 UnwindersFactory core_unwinders_factory,
310 RepeatingClosure record_sample_callback,
311 StackSamplerTestDelegate* test_delegate) {
312 return base::WrapUnique(
313 new StackSampler(std::move(stack_copier), std::move(stack_unwind_data),
314 std::move(core_unwinders_factory),
315 record_sample_callback, test_delegate));
316 }
317
StackSampler(std::unique_ptr<StackCopier> stack_copier,std::unique_ptr<StackUnwindData> stack_unwind_data,UnwindersFactory core_unwinders_factory,RepeatingClosure record_sample_callback,StackSamplerTestDelegate * test_delegate)318 StackSampler::StackSampler(std::unique_ptr<StackCopier> stack_copier,
319 std::unique_ptr<StackUnwindData> stack_unwind_data,
320 UnwindersFactory core_unwinders_factory,
321 RepeatingClosure record_sample_callback,
322 StackSamplerTestDelegate* test_delegate)
323 : stack_copier_(std::move(stack_copier)),
324 unwinders_factory_(std::move(core_unwinders_factory)),
325 record_sample_callback_(std::move(record_sample_callback)),
326 test_delegate_(test_delegate),
327 unwind_data_(std::move(stack_unwind_data)) {
328 CHECK(unwinders_factory_);
329 }
330
331 // static
WalkStack(ModuleCache * module_cache,RegisterContext * thread_context,uintptr_t stack_top,std::vector<UnwinderCapture> unwinders)332 std::vector<Frame> StackSampler::WalkStack(
333 ModuleCache* module_cache,
334 RegisterContext* thread_context,
335 uintptr_t stack_top,
336 std::vector<UnwinderCapture> unwinders) {
337 std::vector<Frame> stack;
338 // Reserve enough memory for most stacks, to avoid repeated
339 // allocations. Approximately 99.9% of recorded stacks are 128 frames or
340 // fewer.
341 stack.reserve(128);
342
343 // Record the first frame from the context values.
344 stack.emplace_back(RegisterContextInstructionPointer(thread_context),
345 module_cache->GetModuleForAddress(
346 RegisterContextInstructionPointer(thread_context)));
347
348 size_t prior_stack_size;
349 UnwindResult result;
350 do {
351 // Choose an authoritative unwinder for the current module. Use the first
352 // unwinder that thinks it can unwind from the current frame.
353 auto unwinder =
354 ranges::find_if(unwinders, [&stack](const UnwinderCapture& unwinder) {
355 return GetUnwinder(unwinder)->CanUnwindFrom(stack.back());
356 });
357 if (unwinder == unwinders.end()) {
358 return stack;
359 }
360
361 prior_stack_size = stack.size();
362 result = GetUnwinder(*unwinder)->TryUnwind(
363 GetStateCapture(*unwinder), thread_context, stack_top, &stack);
364
365 // The unwinder with the lowest priority should be the only one that returns
366 // COMPLETED since the stack starts in native code.
367 DCHECK(result != UnwindResult::kCompleted || *unwinder == unwinders.back());
368 } while (result != UnwindResult::kAborted &&
369 result != UnwindResult::kCompleted &&
370 // Give up if the authoritative unwinder for the module was unable to
371 // unwind.
372 stack.size() > prior_stack_size);
373
374 return stack;
375 }
376
377 StackSamplerTestDelegate::~StackSamplerTestDelegate() = default;
378
379 StackSamplerTestDelegate::StackSamplerTestDelegate() = default;
380
381 } // namespace base
382