1 /**
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include <sys/syscall.h>
17
18 #include "libpandabase/macros.h"
19 #include "os/thread.h"
20 #include "runtime/tooling/sampler/sampling_profiler.h"
21 #include "runtime/include/managed_thread.h"
22 #include "runtime/thread_manager.h"
23 #include "runtime/tooling/sampler/stack_walker_base.h"
24 #include "runtime/tooling/pt_thread_info.h"
25 #include "runtime/signal_handler.h"
26 #include "runtime/coroutines/coroutine.h"
27
28 namespace ark::tooling::sampler {
29
30 static std::atomic<int> g_sCurrentHandlersCounter = 0;
31
32 /* static */
33 Sampler *Sampler::instance_ = nullptr;
34
35 static std::atomic<size_t> g_sLostSamples = 0;
36 static std::atomic<size_t> g_sLostSegvSamples = 0;
37 static std::atomic<size_t> g_sLostInvalidSamples = 0;
38 static std::atomic<size_t> g_sLostNotFindSamples = 0;
39 static std::atomic<size_t> g_sTotalSamples = 0;
40
41 class ScopedThreadSampling {
42 public:
ScopedThreadSampling(ThreadSamplingInfo * samplingInfo)43 explicit ScopedThreadSampling(ThreadSamplingInfo *samplingInfo) : samplingInfo_(samplingInfo)
44 {
45 ASSERT(samplingInfo_ != nullptr);
46 ASSERT(samplingInfo_->IsThreadSampling() == false);
47 samplingInfo_->SetThreadSampling(true);
48 }
49
~ScopedThreadSampling()50 ~ScopedThreadSampling()
51 {
52 ASSERT(samplingInfo_->IsThreadSampling() == true);
53 samplingInfo_->SetThreadSampling(false);
54 }
55
56 private:
57 ThreadSamplingInfo *samplingInfo_;
58
59 NO_COPY_SEMANTIC(ScopedThreadSampling);
60 NO_MOVE_SEMANTIC(ScopedThreadSampling);
61 };
62
63 class ScopedHandlersCounting {
64 public:
ScopedHandlersCounting()65 explicit ScopedHandlersCounting()
66 {
67 ++g_sCurrentHandlersCounter;
68 }
69
~ScopedHandlersCounting()70 ~ScopedHandlersCounting()
71 {
72 --g_sCurrentHandlersCounter;
73 }
74
75 NO_COPY_SEMANTIC(ScopedHandlersCounting);
76 NO_MOVE_SEMANTIC(ScopedHandlersCounting);
77 };
78
79 /* static */
Create()80 Sampler *Sampler::Create()
81 {
82 /*
83 * Sampler can be created only once and managed by one thread
84 * Runtime::Tools owns it ptr after it's created
85 */
86 ASSERT(instance_ == nullptr);
87 instance_ = new Sampler;
88
89 /**
90 * As soon as the sampler is created, we subscribe to the events
91 * This is done so that start and stop do not depend on the runtime
92 * Internal issue #13780
93 */
94 ASSERT(Runtime::GetCurrent() != nullptr);
95
96 Runtime::GetCurrent()->GetNotificationManager()->AddListener(instance_,
97 RuntimeNotificationManager::Event::THREAD_EVENTS);
98 Runtime::GetCurrent()->GetNotificationManager()->AddListener(instance_,
99 RuntimeNotificationManager::Event::LOAD_MODULE);
100 /**
101 * Collect threads and modules which were created before sampler start
102 * If we collect them before add listeners then new thread can be created (or new module can be loaded)
103 * so we will lose this thread (or module)
104 */
105 instance_->CollectThreads();
106 instance_->CollectModules();
107
108 return Sampler::instance_;
109 }
110
111 /* static */
Destroy(Sampler * sampler)112 void Sampler::Destroy(Sampler *sampler)
113 {
114 ASSERT(instance_ != nullptr);
115 ASSERT(instance_ == sampler);
116 ASSERT(!sampler->isActive_);
117
118 LOG(INFO, PROFILER) << "Total samples: " << g_sTotalSamples << "\nLost samples: " << g_sLostSamples;
119 LOG(INFO, PROFILER) << "Lost samples(Invalid method ptr): " << g_sLostInvalidSamples
120 << "\nLost samples(Invalid pf ptr): " << g_sLostNotFindSamples;
121 LOG(INFO, PROFILER) << "Lost samples(SIGSEGV occured): " << g_sLostSegvSamples;
122
123 Runtime::GetCurrent()->GetNotificationManager()->RemoveListener(instance_,
124 RuntimeNotificationManager::Event::THREAD_EVENTS);
125 Runtime::GetCurrent()->GetNotificationManager()->RemoveListener(instance_,
126 RuntimeNotificationManager::Event::LOAD_MODULE);
127
128 instance_->ClearManagedThreadSet();
129 instance_->ClearLoadedPfs();
130
131 delete sampler;
132 instance_ = nullptr;
133 }
134
Sampler()135 Sampler::Sampler() : runtime_(Runtime::GetCurrent()), sampleInterval_(DEFAULT_SAMPLE_INTERVAL_US)
136 {
137 ASSERT_NATIVE_CODE();
138 }
139
AddThreadHandle(ManagedThread * thread)140 void Sampler::AddThreadHandle(ManagedThread *thread)
141 {
142 os::memory::LockHolder holder(managedThreadsLock_);
143 managedThreads_.insert(thread->GetId());
144 }
145
EraseThreadHandle(ManagedThread * thread)146 void Sampler::EraseThreadHandle(ManagedThread *thread)
147 {
148 os::memory::LockHolder holder(managedThreadsLock_);
149 managedThreads_.erase(thread->GetId());
150 }
151
ThreadStart(ManagedThread * managedThread)152 void Sampler::ThreadStart(ManagedThread *managedThread)
153 {
154 AddThreadHandle(managedThread);
155 }
156
ThreadEnd(ManagedThread * managedThread)157 void Sampler::ThreadEnd(ManagedThread *managedThread)
158 {
159 EraseThreadHandle(managedThread);
160 }
161
LoadModule(std::string_view name)162 void Sampler::LoadModule(std::string_view name)
163 {
164 auto callback = [this, name](const panda_file::File &pf) {
165 if (pf.GetFilename() == name) {
166 auto ptrId = reinterpret_cast<uintptr_t>(&pf);
167 FileInfo pfModule;
168 pfModule.ptr = ptrId;
169 pfModule.pathname = pf.GetFullFileName();
170 pfModule.checksum = pf.GetHeader()->checksum;
171 if (!loadedPfsQueue_.FindValue(ptrId)) {
172 loadedPfsQueue_.Push(pfModule);
173 }
174 os::memory::LockHolder holder(loadedPfsLock_);
175 this->loadedPfs_.push_back(pfModule);
176 return false;
177 }
178 return true;
179 };
180 runtime_->GetClassLinker()->EnumeratePandaFiles(callback, false);
181 }
182
Start(const char * filename)183 bool Sampler::Start(const char *filename)
184 {
185 if (isActive_) {
186 LOG(ERROR, PROFILER) << "Attemp to start sampling profiler while it's already started";
187 return false;
188 }
189
190 if (UNLIKELY(!communicator_.Init())) {
191 LOG(ERROR, PROFILER) << "Failed to create pipes for sampling listener. Profiler cannot be started";
192 return false;
193 }
194
195 isActive_ = true;
196 // Creating std::string instead of sending pointer to avoid UB stack-use-after-scope
197 listenerThread_ = std::make_unique<std::thread>(&Sampler::ListenerThreadEntry, this, std::string(filename));
198 listenerTid_ = listenerThread_->native_handle();
199
200 // All prepairing actions should be done before this thread is started
201 samplerThread_ = std::make_unique<std::thread>(&Sampler::SamplerThreadEntry, this);
202 samplerTid_ = samplerThread_->native_handle();
203
204 return true;
205 }
206
Stop()207 void Sampler::Stop()
208 {
209 if (!isActive_) {
210 LOG(ERROR, PROFILER) << "Attemp to stop sampling profiler, but it was not started";
211 return;
212 }
213 if (!samplerThread_->joinable()) {
214 LOG(FATAL, PROFILER) << "Sampling profiler thread unexpectedly disappeared";
215 UNREACHABLE();
216 }
217 if (!listenerThread_->joinable()) {
218 LOG(FATAL, PROFILER) << "Listener profiler thread unexpectedly disappeared";
219 UNREACHABLE();
220 }
221
222 isActive_ = false;
223 samplerThread_->join();
224 listenerThread_->join();
225
226 // After threads are stopped we can clear all sampler info
227 samplerThread_.reset();
228 listenerThread_.reset();
229 samplerTid_ = 0;
230 listenerTid_ = 0;
231 }
232
WriteLoadedPandaFiles(StreamWriter * writerPtr)233 void Sampler::WriteLoadedPandaFiles(StreamWriter *writerPtr)
234 {
235 os::memory::LockHolder holder(loadedPfsLock_);
236 if (LIKELY(loadedPfs_.empty())) {
237 return;
238 }
239 for (const auto &module : loadedPfs_) {
240 if (!writerPtr->IsModuleWritten(module)) {
241 writerPtr->WriteModule(module);
242 }
243 }
244 loadedPfs_.clear();
245 }
246
CollectThreads()247 void Sampler::CollectThreads()
248 {
249 auto tManager = runtime_->GetPandaVM()->GetThreadManager();
250 if (UNLIKELY(tManager == nullptr)) {
251 // NOTE(m.strizhak): make it for languages without thread_manager
252 LOG(FATAL, PROFILER) << "Thread manager is nullptr";
253 UNREACHABLE();
254 }
255
256 tManager->EnumerateThreads(
257 [this](ManagedThread *thread) {
258 AddThreadHandle(thread);
259 return true;
260 },
261 static_cast<unsigned int>(EnumerationFlag::ALL), static_cast<unsigned int>(EnumerationFlag::VM_THREAD));
262 }
263
CollectModules()264 void Sampler::CollectModules()
265 {
266 auto callback = [this](const panda_file::File &pf) {
267 auto ptrId = reinterpret_cast<uintptr_t>(&pf);
268 FileInfo pfModule;
269
270 pfModule.ptr = ptrId;
271 pfModule.pathname = pf.GetFullFileName();
272 pfModule.checksum = pf.GetHeader()->checksum;
273
274 if (!loadedPfsQueue_.FindValue(ptrId)) {
275 loadedPfsQueue_.Push(pfModule);
276 }
277
278 os::memory::LockHolder holder(loadedPfsLock_);
279 this->loadedPfs_.push_back(pfModule);
280
281 return true;
282 };
283 runtime_->GetClassLinker()->EnumeratePandaFiles(callback, false);
284 }
285
GetThreadStatus(ManagedThread * mthread)286 static SampleInfo::ThreadStatus GetThreadStatus(ManagedThread *mthread)
287 {
288 ASSERT(mthread != nullptr);
289
290 auto threadStatus = mthread->GetStatus();
291 if (threadStatus == ThreadStatus::RUNNING) {
292 return SampleInfo::ThreadStatus::RUNNING;
293 }
294
295 bool isCoroutineRunning = false;
296 if (Coroutine::ThreadIsCoroutine(mthread)) {
297 isCoroutineRunning = Coroutine::CastFromThread(mthread)->GetCoroutineStatus() == Coroutine::Status::RUNNING;
298 }
299 if (threadStatus == ThreadStatus::NATIVE && isCoroutineRunning) {
300 return SampleInfo::ThreadStatus::RUNNING;
301 }
302
303 return SampleInfo::ThreadStatus::SUSPENDED;
304 }
305
306 struct SamplerFrameInfo {
307 Frame *frame;
308 bool isCompiled;
309 };
310
311 /**
312 * @brief Collects samples from boundary frames.
313 * @returns true if bypass frame was found, false otherwise.
314 */
CollectBoundaryFrames(SamplerFrameInfo & frameInfo,SampleInfo & sample,size_t & stackCounter)315 static bool CollectBoundaryFrames(SamplerFrameInfo &frameInfo, SampleInfo &sample, size_t &stackCounter)
316 {
317 ASSERT(frameInfo.frame != nullptr);
318
319 bool isFrameBoundary = true;
320 while (isFrameBoundary) {
321 auto *prevFrame = frameInfo.frame->GetPrevFrame();
322 const auto *method = frameInfo.frame->GetMethod();
323 if (StackWalkerBase::IsMethodInI2CFrame(method)) {
324 sample.stackInfo.managedStack[stackCounter].pandaFilePtr = helpers::ToUnderlying(FrameKind::BRIDGE);
325 sample.stackInfo.managedStack[stackCounter].fileId = helpers::ToUnderlying(FrameKind::BRIDGE);
326 ++stackCounter;
327
328 frameInfo.frame = prevFrame;
329 frameInfo.isCompiled = false;
330 } else if (StackWalkerBase::IsMethodInC2IFrame(method)) {
331 sample.stackInfo.managedStack[stackCounter].pandaFilePtr = helpers::ToUnderlying(FrameKind::BRIDGE);
332 sample.stackInfo.managedStack[stackCounter].fileId = helpers::ToUnderlying(FrameKind::BRIDGE);
333 ++stackCounter;
334
335 frameInfo.frame = prevFrame;
336 frameInfo.isCompiled = true;
337 } else if (StackWalkerBase::IsMethodInBPFrame(method)) {
338 g_sLostSamples++;
339 return true;
340 } else {
341 isFrameBoundary = false;
342 }
343 }
344 return false;
345 }
346
ProcessCompiledTopFrame(SamplerFrameInfo & frameInfo,SampleInfo & sample,size_t & stackCounter,void * signalContextPtr)347 static void ProcessCompiledTopFrame(SamplerFrameInfo &frameInfo, SampleInfo &sample, size_t &stackCounter,
348 void *signalContextPtr)
349 {
350 CFrame cframe(frameInfo.frame);
351 if (cframe.IsNative()) {
352 return;
353 }
354
355 auto signalContext = SignalContext(signalContextPtr);
356 auto fp = signalContext.GetFP();
357 if (fp == nullptr) {
358 sample.stackInfo.managedStack[stackCounter].pandaFilePtr = helpers::ToUnderlying(FrameKind::BRIDGE);
359 sample.stackInfo.managedStack[stackCounter].fileId = helpers::ToUnderlying(FrameKind::BRIDGE);
360 ++stackCounter;
361
362 // fp is not set yet, so cframe not finished, currently in bridge, previous frame iframe
363 frameInfo.isCompiled = false;
364 return;
365 }
366
367 auto pc = signalContext.GetPC();
368 bool pcInCompiledCode = InAllocatedCodeRange(pc);
369 if (pcInCompiledCode) {
370 // Currently in compiled method so get it from fp
371 frameInfo.frame = reinterpret_cast<Frame *>(fp);
372 } else {
373 const LockFreeQueue &pfsQueue = Sampler::GetSampleQueuePF();
374 auto pfId = reinterpret_cast<uintptr_t>(frameInfo.frame->GetMethod()->GetPandaFile());
375 if (pfsQueue.FindValue(pfId)) {
376 sample.stackInfo.managedStack[stackCounter].pandaFilePtr = helpers::ToUnderlying(FrameKind::BRIDGE);
377 sample.stackInfo.managedStack[stackCounter].fileId = helpers::ToUnderlying(FrameKind::BRIDGE);
378 ++stackCounter;
379
380 // pc not in jitted code, so fp is not up-to-date, currently not in cfame
381 frameInfo.isCompiled = false;
382 }
383 }
384 }
385
386 /**
387 * @brief Walk stack frames and collect samples.
388 * @returns true if invalid frame was encountered, false otherwise.
389 */
CollectFrames(SamplerFrameInfo & frameInfo,SampleInfo & sample,size_t & stackCounter)390 static bool CollectFrames(SamplerFrameInfo &frameInfo, SampleInfo &sample, size_t &stackCounter)
391 {
392 const LockFreeQueue &pfsQueue = Sampler::GetSampleQueuePF();
393 auto stackWalker = StackWalkerBase(frameInfo.frame, frameInfo.isCompiled);
394 while (stackWalker.HasFrame()) {
395 auto *method = stackWalker.GetMethod();
396 if (method == nullptr || IsInvalidPointer(reinterpret_cast<uintptr_t>(method))) {
397 g_sLostSamples++;
398 g_sLostInvalidSamples++;
399 return true;
400 }
401
402 auto *pf = method->GetPandaFile();
403 auto pfId = reinterpret_cast<uintptr_t>(pf);
404 if (!pfsQueue.FindValue(pfId)) {
405 g_sLostSamples++;
406 g_sLostNotFindSamples++;
407 return true;
408 }
409
410 sample.stackInfo.managedStack[stackCounter].pandaFilePtr = pfId;
411 sample.stackInfo.managedStack[stackCounter].fileId = method->GetFileId().GetOffset();
412
413 ++stackCounter;
414 stackWalker.NextFrame();
415
416 if (stackCounter == SampleInfo::StackInfo::MAX_STACK_DEPTH) {
417 // According to the limitations we should drop all frames that is higher than MAX_STACK_DEPTH
418 break;
419 }
420 }
421
422 return false;
423 }
424
SigProfSamplingProfilerHandler(int signum,siginfo_t * siginfo,void * ptr)425 void SigProfSamplingProfilerHandler([[maybe_unused]] int signum, [[maybe_unused]] siginfo_t *siginfo,
426 [[maybe_unused]] void *ptr)
427 {
428 if (g_sCurrentHandlersCounter == 0) {
429 // Sampling ended if S_CURRENT_HANDLERS_COUNTER is 0. Thread started executing handler for signal
430 // that was sent before end, so thread is late now and we should return from handler
431 return;
432 }
433 auto scopedHandlersCounting = ScopedHandlersCounting();
434
435 ManagedThread *mthread = ManagedThread::GetCurrent();
436 ASSERT(mthread != nullptr);
437
438 // Checking that code is being executed
439 auto *framePtr = reinterpret_cast<CFrame::SlotType *>(mthread->GetCurrentFrame());
440 if (framePtr == nullptr) {
441 return;
442 }
443
444 g_sTotalSamples++;
445
446 // Note that optimized variables may end up with incorrect value as a consequence of a longjmp() operation
447 // - see "local variable clobbering and setjmp".
448 // Variables below are not volatile because they are not used after longjmp() is done.
449 SamplerFrameInfo frameInfo {mthread->GetCurrentFrame(), mthread->IsCurrentFrameCompiled()};
450
451 SampleInfo sample {};
452 // `mthread` is passed as non-const argument into `GetThreadStatus`, so call it before `setjmp`
453 // in order to bypass "variable might be clobbered by ‘longjmp’" compiler warning.
454 sample.threadInfo.threadStatus = GetThreadStatus(mthread);
455 size_t stackCounter = 0;
456
457 ScopedThreadSampling scopedThreadSampling(mthread->GetPtThreadInfo()->GetSamplingInfo());
458
459 auto &sigSegvJmpBuf = mthread->GetPtThreadInfo()->GetSamplingInfo()->GetSigSegvJmpEnv();
460 // NOLINTNEXTLINE(cert-err52-cpp)
461 if (setjmp(sigSegvJmpBuf) != 0) {
462 // This code executed after longjmp()
463 // In case of SIGSEGV we lose the sample
464 g_sLostSamples++;
465 g_sLostSegvSamples++;
466 return;
467 }
468
469 if (StackWalkerBase::IsMethodInBoundaryFrame(frameInfo.frame->GetMethod())) {
470 auto foundBypassFrame = CollectBoundaryFrames(frameInfo, sample, stackCounter);
471 if (foundBypassFrame) {
472 return;
473 }
474 } else if (frameInfo.isCompiled) {
475 ProcessCompiledTopFrame(frameInfo, sample, stackCounter, ptr);
476 }
477
478 auto lostSample = CollectFrames(frameInfo, sample, stackCounter);
479 if (lostSample) {
480 return;
481 }
482
483 if (stackCounter == 0) {
484 return;
485 }
486 sample.stackInfo.managedStackSize = stackCounter;
487 sample.threadInfo.threadId = os::thread::GetCurrentThreadId();
488
489 const ThreadCommunicator &communicator = Sampler::GetSampleCommunicator();
490 communicator.SendSample(sample);
491 }
492
SamplerThreadEntry()493 void Sampler::SamplerThreadEntry()
494 {
495 struct sigaction action {};
496 action.sa_sigaction = &SigProfSamplingProfilerHandler;
497 action.sa_flags = SA_SIGINFO | SA_ONSTACK;
498 // Clear signal set
499 sigemptyset(&action.sa_mask);
500 // Ignore incoming sigprof if handler isn't completed
501 sigaddset(&action.sa_mask, SIGPROF);
502
503 struct sigaction oldAction {};
504
505 if (sigaction(SIGPROF, &action, &oldAction) == -1) {
506 LOG(FATAL, PROFILER) << "Sigaction failed, can't start profiling";
507 UNREACHABLE();
508 }
509
510 // We keep handler assigned to SigProfSamplingProfilerHandler after sampling end because
511 // otherwice deadlock can happen if signal will be slow and reach thread after handler resignation
512 if (oldAction.sa_sigaction != nullptr && oldAction.sa_sigaction != SigProfSamplingProfilerHandler) {
513 LOG(FATAL, PROFILER) << "SIGPROF signal handler was overriden in sampling profiler";
514 UNREACHABLE();
515 }
516 ++g_sCurrentHandlersCounter;
517
518 auto pid = getpid();
519 // Atomic with relaxed order reason: data race with isActive_
520 while (isActive_.load(std::memory_order_relaxed)) {
521 {
522 os::memory::LockHolder holder(managedThreadsLock_);
523 for (const auto &threadId : managedThreads_) {
524 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg)
525 if (syscall(SYS_tgkill, pid, threadId, SIGPROF) != 0) {
526 LOG(ERROR, PROFILER) << "Can't send signal to thread";
527 }
528 }
529 }
530 os::thread::NativeSleepUS(sampleInterval_);
531 }
532
533 // Sending last sample on finish to avoid of deadlock in listener
534 SampleInfo lastSample;
535 lastSample.stackInfo.managedStackSize = 0;
536 communicator_.SendSample(lastSample);
537
538 --g_sCurrentHandlersCounter;
539
540 const unsigned int timeToSleepMs = 100;
541 do {
542 os::thread::NativeSleep(timeToSleepMs);
543 } while (g_sCurrentHandlersCounter != 0);
544 }
545
546 // Passing std:string copy instead of reference, 'cause another thread owns this object
547 // NOLINTNEXTLINE(performance-unnecessary-value-param)
ListenerThreadEntry(std::string outputFile)548 void Sampler::ListenerThreadEntry(std::string outputFile)
549 {
550 auto writerPtr = std::make_unique<StreamWriter>(outputFile.c_str());
551 // Writing panda files that were loaded before sampler was created
552 WriteLoadedPandaFiles(writerPtr.get());
553
554 SampleInfo bufferSample;
555 // Atomic with relaxed order reason: data race with isActive_
556 while (isActive_.load(std::memory_order_relaxed)) {
557 WriteLoadedPandaFiles(writerPtr.get());
558 communicator_.ReadSample(&bufferSample);
559 if (LIKELY(bufferSample.stackInfo.managedStackSize != 0)) {
560 writerPtr->WriteSample(bufferSample);
561 }
562 }
563 // Writing all remaining samples
564 while (!communicator_.IsPipeEmpty()) {
565 WriteLoadedPandaFiles(writerPtr.get());
566 communicator_.ReadSample(&bufferSample);
567 if (LIKELY(bufferSample.stackInfo.managedStackSize != 0)) {
568 writerPtr->WriteSample(bufferSample);
569 }
570 }
571 }
572
573 } // namespace ark::tooling::sampler
574