• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/profiler/stack_sampler.h"
6 
7 #include <iterator>
8 #include <utility>
9 
10 #include "base/check.h"
11 #include "base/compiler_specific.h"
12 #include "base/memory/ptr_util.h"
13 #include "base/memory/stack_allocated.h"
14 #include "base/metrics/histogram_functions.h"
15 #include "base/numerics/safe_conversions.h"
16 #include "base/profiler/metadata_recorder.h"
17 #include "base/profiler/profile_builder.h"
18 #include "base/profiler/sample_metadata.h"
19 #include "base/profiler/stack_buffer.h"
20 #include "base/profiler/stack_copier.h"
21 #include "base/profiler/suspendable_thread_delegate.h"
22 #include "base/profiler/unwinder.h"
23 #include "base/ranges/algorithm.h"
24 #include "base/task/thread_pool.h"
25 
26 #if PA_BUILDFLAG(USE_PARTITION_ALLOC)
27 #include "partition_alloc/tagging.h"  // nogncheck
28 #endif
29 
30 // IMPORTANT NOTE: Some functions within this implementation are invoked while
31 // the target thread is suspended so it must not do any allocation from the
32 // heap, including indirectly via use of DCHECK/CHECK or other logging
33 // statements. Otherwise this code can deadlock on heap locks acquired by the
34 // target thread before it was suspended. These functions are commented with "NO
35 // HEAP ALLOCATIONS".
36 
37 namespace base {
38 
39 namespace {
40 
41 using CallbackRunner = base::RefCountedData<ScopedClosureRunner>;
42 
GetUnwinder(const UnwinderCapture & state)43 Unwinder* GetUnwinder(const UnwinderCapture& state) {
44   return std::get<0>(state);
45 }
46 
GetStateCapture(const UnwinderCapture & state)47 UnwinderStateCapture* GetStateCapture(const UnwinderCapture& state) {
48   return std::get<1>(state).get();
49 }
50 
51 // Notifies the unwinders about the stack capture, and records metadata, while
52 // the thread is suspended.
53 class StackCopierDelegate : public StackCopier::Delegate {
54   STACK_ALLOCATED();
55 
56  public:
StackCopierDelegate(const std::vector<UnwinderCapture> * unwinders,ProfileBuilder * profile_builder,MetadataRecorder::MetadataProvider * metadata_provider)57   StackCopierDelegate(const std::vector<UnwinderCapture>* unwinders,
58                       ProfileBuilder* profile_builder,
59                       MetadataRecorder::MetadataProvider* metadata_provider)
60       : unwinders_(unwinders),
61         profile_builder_(profile_builder),
62         metadata_provider_(metadata_provider) {}
63 
64   StackCopierDelegate(const StackCopierDelegate&) = delete;
65   StackCopierDelegate& operator=(const StackCopierDelegate&) = delete;
66 
67   // StackCopier::Delegate:
68   // IMPORTANT NOTE: to avoid deadlock this function must not invoke any
69   // non-reentrant code that is also invoked by the target thread. In
70   // particular, it may not perform any heap allocation or deallocation,
71   // including indirectly via use of DCHECK/CHECK or other logging statements.
OnStackCopy()72   void OnStackCopy() override {
73     for (const auto& unwinder : *unwinders_) {
74       GetUnwinder(unwinder)->OnStackCapture(GetStateCapture(unwinder));
75     }
76 
77     profile_builder_->RecordMetadata(*metadata_provider_);
78   }
79 
80  private:
81   const std::vector<UnwinderCapture>* unwinders_;
82 
83   ProfileBuilder* const profile_builder_;
84   const MetadataRecorder::MetadataProvider* const metadata_provider_;
85 };
86 
87 }  // namespace
88 
89 StackSampler::~StackSampler() = default;
90 
CreateStackBuffer()91 std::unique_ptr<StackBuffer> StackSampler::CreateStackBuffer() {
92   size_t size = GetStackBufferSize();
93   if (size == 0) {
94     return nullptr;
95   }
96   return std::make_unique<StackBuffer>(size);
97 }
98 
Initialize()99 void StackSampler::Initialize() {
100   was_initialized_ = true;
101   unwind_data_->Initialize(std::move(unwinders_factory_).Run());
102   thread_pool_runner_ = base::ThreadPool::CreateSequencedTaskRunner({});
103 
104   // The thread pool might not start right away (or it may never start), so we
105   // schedule a job and wait for it to become running before we schedule other
106   // work.
107   thread_pool_runner_->PostTaskAndReply(
108       FROM_HERE, base::DoNothing(),
109       base::BindOnce(&StackSampler::ThreadPoolRunning,
110                      weak_ptr_factory_.GetWeakPtr()));
111 }
112 
ThreadPoolRunning()113 void StackSampler::ThreadPoolRunning() {
114   thread_pool_ready_ = true;
115   unwind_data_->OnThreadPoolRunning();
116 }
117 
Stop(OnceClosure done_callback)118 void StackSampler::Stop(OnceClosure done_callback) {
119   if (thread_pool_ready_) {
120     // Post a task to the sequenced task runner to ensure we've completed any
121     // remaining work. We need to ensure we use a CallbackRunner here
122     // because we want to ensure `done_callback` is called even if
123     // PostTaskAndReply returns false.
124     auto callback_runner = base::MakeRefCounted<CallbackRunner>(
125         ScopedClosureRunner(std::move(done_callback)));
126     bool res = thread_pool_runner_->PostTaskAndReply(
127         FROM_HERE, base::DoNothing(),
128         base::BindOnce([](scoped_refptr<CallbackRunner> runner) {},
129                        callback_runner));
130     if (!res) {
131       callback_runner->data.RunAndReset();
132     }
133 
134   } else {
135     std::move(done_callback).Run();
136   }
137 }
138 
AddAuxUnwinder(std::unique_ptr<Unwinder> unwinder)139 void StackSampler::AddAuxUnwinder(std::unique_ptr<Unwinder> unwinder) {
140   if (thread_pool_ready_) {
141     // If we have initialized a thread pool, then we need the Initialize to
142     // be called on the thread pool since it will manipulate the ModuleCache,
143     // but AddAuxUnwinder needs to happen on the SamplingThread.
144     thread_pool_runner_->PostTaskAndReplyWithResult(
145         FROM_HERE,
146         base::BindOnce(
147             [](StackUnwindData* unwind_data,
148                std::unique_ptr<Unwinder> unwinder) {
149               unwinder->Initialize(unwind_data->module_cache());
150               return unwinder;
151             },
152             base::Unretained(unwind_data_.get()), std::move(unwinder)),
153         base::BindOnce(&StackSampler::AddAuxUnwinderWithoutInit,
154                        weak_ptr_factory_.GetWeakPtr()));
155   } else {
156     // Initialize() invokes Initialize() on the unwinders that are present
157     // at the time. If it hasn't occurred yet, we allow it to add the initial
158     // modules, otherwise we do it here.
159     if (was_initialized_) {
160       unwinder->Initialize(unwind_data_->module_cache());
161     }
162     unwind_data_->AddAuxUnwinder(std::move(unwinder));
163   }
164 }
165 
AddAuxUnwinderWithoutInit(std::unique_ptr<Unwinder> unwinder)166 void StackSampler::AddAuxUnwinderWithoutInit(
167     std::unique_ptr<Unwinder> unwinder) {
168   unwind_data_->AddAuxUnwinder(std::move(unwinder));
169 }
170 
RecordStackFrames(StackBuffer * stack_buffer,PlatformThreadId thread_id,base::OnceClosure done_callback)171 void StackSampler::RecordStackFrames(StackBuffer* stack_buffer,
172                                      PlatformThreadId thread_id,
173                                      base::OnceClosure done_callback) {
174   DCHECK(stack_buffer);
175 
176 #if PA_BUILDFLAG(USE_PARTITION_ALLOC)
177   // Disable MTE during this function because this function indiscriminately
178   // reads stack frames, some of which belong to system libraries, not Chrome
179   // itself. With stack tagging, some bytes on the stack have MTE tags different
180   // from the stack pointer tag.
181   partition_alloc::SuspendTagCheckingScope suspend_tag_checking_scope;
182 #endif
183 
184   if (record_sample_callback_) {
185     record_sample_callback_.Run();
186   }
187 
188   RegisterContext thread_context;
189   uintptr_t stack_top;
190   TimeTicks timestamp;
191 
192   std::vector<UnwinderCapture> unwinders = unwind_data_->GetUnwinderSnapshot();
193   ProfileBuilder* profile_builder = unwind_data_->profile_builder();
194 
195   bool copy_stack_succeeded;
196   {
197     // Make this scope as small as possible because |metadata_provider| is
198     // holding a lock.
199     MetadataRecorder::MetadataProvider metadata_provider(
200         GetSampleMetadataRecorder(), thread_id);
201     StackCopierDelegate delegate(&unwinders, profile_builder,
202                                  &metadata_provider);
203     copy_stack_succeeded = stack_copier_->CopyStack(
204         stack_buffer, &stack_top, &timestamp, &thread_context, &delegate);
205   }
206   if (!copy_stack_succeeded) {
207     profile_builder->OnSampleCompleted(
208         {}, timestamp.is_null() ? TimeTicks::Now() : timestamp);
209     std::move(done_callback).Run();
210     return;
211   }
212 
213   for (const auto& unwinder : unwinders) {
214     GetUnwinder(unwinder)->UpdateModules(GetStateCapture(unwinder));
215   }
216 
217   if (test_delegate_) {
218     test_delegate_->OnPreStackWalk();
219   }
220 
221 #if BUILDFLAG(IS_CHROMEOS)
222   ptrdiff_t stack_size = reinterpret_cast<uint8_t*>(stack_top) -
223                          reinterpret_cast<uint8_t*>(stack_buffer->buffer());
224   constexpr int kBytesPerKilobyte = 1024;
225 
226   if ((++stack_size_histogram_sampling_counter_ %
227        kUMAHistogramDownsampleAmount) == 0) {
228     // Record the size of the stack to tune kLargeStackSize.
229     // UmaHistogramMemoryKB has a min of 1000, which isn't useful for our
230     // purposes, so call UmaHistogramCustomCounts directly.
231     // Min is 4KB, since that's the normal pagesize and setting kLargeStackSize
232     // smaller than that would be pointless. Max is 8MB since that's the
233     // current ChromeOS stack size; we shouldn't be able to get a number
234     // larger than that.
235     UmaHistogramCustomCounts(
236         "Memory.StackSamplingProfiler.StackSampleSize2",
237         saturated_cast<int>(stack_size / kBytesPerKilobyte), 4, 8 * 1024, 50);
238   }
239 
240   // We expect to very rarely see stacks larger than kLargeStackSize. If we see
241   // a stack larger than kLargeStackSize, we tell the kernel to discard the
242   // contents of the buffer (using madvise(MADV_DONTNEED)) after the first
243   // kLargeStackSize bytes to avoid permanently allocating memory that we won't
244   // use again. We don't want kLargeStackSize to be too small, however; for if
245   // we are constantly calling madvise(MADV_DONTNEED) and then writing to the
246   // same parts of the buffer, we're not saving memory and we'll cause extra
247   // page faults.
248   constexpr ptrdiff_t kLargeStackSize = 32 * kBytesPerKilobyte;
249   if (stack_size > kLargeStackSize) {
250     stack_buffer->MarkUpperBufferContentsAsUnneeded(kLargeStackSize);
251   }
252 #endif  // #if BUILDFLAG(IS_CHROMEOS)
253 
254   if (thread_pool_ready_) {
255     // Since `stack_buffer` needs to be the maximum stack size and be
256     // preallocated it tends to be much larger than the actual stack size. So we
257     // copy the stack here that is a smaller size before passing it over to the
258     // worker. To allocate a `StackBuffer` for every sample not be good.
259     std::unique_ptr<StackBuffer> cloned_stack =
260         stack_copier_->CloneStack(*stack_buffer, &stack_top, &thread_context);
261     thread_pool_runner_->PostTaskAndReplyWithResult(
262         FROM_HERE,
263         base::BindOnce(
264             [](StackUnwindData* unwind_data,
265                std::vector<UnwinderCapture> unwinders,
266                RegisterContext thread_context,
267                std::unique_ptr<StackBuffer> stack, uintptr_t stack_top) {
268               return WalkStack(unwind_data->module_cache(), &thread_context,
269                                stack_top, std::move(unwinders));
270             },
271             base::Unretained(unwind_data_.get()), std::move(unwinders),
272             OwnedRef(thread_context), std::move(cloned_stack), stack_top),
273         base::BindOnce(&StackSampler::UnwindComplete,
274                        weak_ptr_factory_.GetWeakPtr(), timestamp,
275                        std::move(done_callback)));
276   } else {
277     auto frames = WalkStack(unwind_data_->module_cache(), &thread_context,
278                             stack_top, std::move(unwinders));
279     UnwindComplete(timestamp, std::move(done_callback), std::move(frames));
280   }
281 }
282 
UnwindComplete(TimeTicks timestamp,OnceClosure done_callback,std::vector<Frame> frames)283 void StackSampler::UnwindComplete(TimeTicks timestamp,
284                                   OnceClosure done_callback,
285                                   std::vector<Frame> frames) {
286   unwind_data_->profile_builder()->OnSampleCompleted(std::move(frames),
287                                                      timestamp);
288   std::move(done_callback).Run();
289 }
290 
GetStackUnwindData()291 StackUnwindData* StackSampler::GetStackUnwindData() {
292   return unwind_data_.get();
293 }
294 
295 // static
WalkStackForTesting(ModuleCache * module_cache,RegisterContext * thread_context,uintptr_t stack_top,std::vector<UnwinderCapture> unwinders)296 std::vector<Frame> StackSampler::WalkStackForTesting(
297     ModuleCache* module_cache,
298     RegisterContext* thread_context,
299     uintptr_t stack_top,
300     std::vector<UnwinderCapture> unwinders) {
301   return WalkStack(module_cache, thread_context, stack_top,
302                    std::move(unwinders));
303 }
304 
305 // static
CreateForTesting(std::unique_ptr<StackCopier> stack_copier,std::unique_ptr<StackUnwindData> stack_unwind_data,UnwindersFactory core_unwinders_factory,RepeatingClosure record_sample_callback,StackSamplerTestDelegate * test_delegate)306 std::unique_ptr<StackSampler> StackSampler::CreateForTesting(
307     std::unique_ptr<StackCopier> stack_copier,
308     std::unique_ptr<StackUnwindData> stack_unwind_data,
309     UnwindersFactory core_unwinders_factory,
310     RepeatingClosure record_sample_callback,
311     StackSamplerTestDelegate* test_delegate) {
312   return base::WrapUnique(
313       new StackSampler(std::move(stack_copier), std::move(stack_unwind_data),
314                        std::move(core_unwinders_factory),
315                        record_sample_callback, test_delegate));
316 }
317 
StackSampler(std::unique_ptr<StackCopier> stack_copier,std::unique_ptr<StackUnwindData> stack_unwind_data,UnwindersFactory core_unwinders_factory,RepeatingClosure record_sample_callback,StackSamplerTestDelegate * test_delegate)318 StackSampler::StackSampler(std::unique_ptr<StackCopier> stack_copier,
319                            std::unique_ptr<StackUnwindData> stack_unwind_data,
320                            UnwindersFactory core_unwinders_factory,
321                            RepeatingClosure record_sample_callback,
322                            StackSamplerTestDelegate* test_delegate)
323     : stack_copier_(std::move(stack_copier)),
324       unwinders_factory_(std::move(core_unwinders_factory)),
325       record_sample_callback_(std::move(record_sample_callback)),
326       test_delegate_(test_delegate),
327       unwind_data_(std::move(stack_unwind_data)) {
328   CHECK(unwinders_factory_);
329 }
330 
331 // static
WalkStack(ModuleCache * module_cache,RegisterContext * thread_context,uintptr_t stack_top,std::vector<UnwinderCapture> unwinders)332 std::vector<Frame> StackSampler::WalkStack(
333     ModuleCache* module_cache,
334     RegisterContext* thread_context,
335     uintptr_t stack_top,
336     std::vector<UnwinderCapture> unwinders) {
337   std::vector<Frame> stack;
338   // Reserve enough memory for most stacks, to avoid repeated
339   // allocations. Approximately 99.9% of recorded stacks are 128 frames or
340   // fewer.
341   stack.reserve(128);
342 
343   // Record the first frame from the context values.
344   stack.emplace_back(RegisterContextInstructionPointer(thread_context),
345                      module_cache->GetModuleForAddress(
346                          RegisterContextInstructionPointer(thread_context)));
347 
348   size_t prior_stack_size;
349   UnwindResult result;
350   do {
351     // Choose an authoritative unwinder for the current module. Use the first
352     // unwinder that thinks it can unwind from the current frame.
353     auto unwinder =
354         ranges::find_if(unwinders, [&stack](const UnwinderCapture& unwinder) {
355           return GetUnwinder(unwinder)->CanUnwindFrom(stack.back());
356         });
357     if (unwinder == unwinders.end()) {
358       return stack;
359     }
360 
361     prior_stack_size = stack.size();
362     result = GetUnwinder(*unwinder)->TryUnwind(
363         GetStateCapture(*unwinder), thread_context, stack_top, &stack);
364 
365     // The unwinder with the lowest priority should be the only one that returns
366     // COMPLETED since the stack starts in native code.
367     DCHECK(result != UnwindResult::kCompleted || *unwinder == unwinders.back());
368   } while (result != UnwindResult::kAborted &&
369            result != UnwindResult::kCompleted &&
370            // Give up if the authoritative unwinder for the module was unable to
371            // unwind.
372            stack.size() > prior_stack_size);
373 
374   return stack;
375 }
376 
377 StackSamplerTestDelegate::~StackSamplerTestDelegate() = default;
378 
379 StackSamplerTestDelegate::StackSamplerTestDelegate() = default;
380 
381 }  // namespace base
382