#include #include #include #if defined(C10_SUPPORTS_SIGNAL_HANDLER) // Normal signal handler implementation. #include #include #include #include #include #include #include #include #include #include #include #include #ifdef C10_ANDROID #ifndef SYS_gettid #define SYS_gettid __NR_gettid #endif #ifndef SYS_tgkill #define SYS_tgkill __NR_tgkill #endif #endif namespace { struct sigaction previousSighup; struct sigaction previousSigint; std::atomic sigintCount(0); std::atomic sighupCount(0); std::atomic hookedUpCount(0); void handleSignal(int signal) { switch (signal) { // TODO: what if the previous handler uses sa_sigaction? case SIGHUP: sighupCount += 1; if (previousSighup.sa_handler) { previousSighup.sa_handler(signal); } break; case SIGINT: sigintCount += 1; if (previousSigint.sa_handler) { previousSigint.sa_handler(signal); } break; } } void hookupHandler() { if (hookedUpCount++) { return; } struct sigaction sa {}; // Setup the handler sa.sa_handler = &handleSignal; // Restart the system call, if at all possible sa.sa_flags = SA_RESTART; // Block every signal during the handler sigfillset(&sa.sa_mask); // Intercept SIGHUP and SIGINT if (sigaction(SIGHUP, &sa, &previousSighup) == -1) { LOG(FATAL) << "Cannot install SIGHUP handler."; } if (sigaction(SIGINT, &sa, &previousSigint) == -1) { LOG(FATAL) << "Cannot install SIGINT handler."; } } // Set the signal handlers to the default. void unhookHandler() { if (--hookedUpCount > 0) { return; } struct sigaction sa {}; // Setup the sighub handler sa.sa_handler = SIG_DFL; // Restart the system call, if at all possible sa.sa_flags = SA_RESTART; // Block every signal during the handler sigfillset(&sa.sa_mask); // Intercept SIGHUP and SIGINT if (sigaction(SIGHUP, &previousSighup, nullptr) == -1) { LOG(FATAL) << "Cannot uninstall SIGHUP handler."; } if (sigaction(SIGINT, &previousSigint, nullptr) == -1) { LOG(FATAL) << "Cannot uninstall SIGINT handler."; } } } // namespace namespace c10 { #if defined(C10_SUPPORTS_FATAL_SIGNAL_HANDLERS) FatalSignalHandler& FatalSignalHandler::getInstance() { // Leaky singleton to avoid module destructor race. static FatalSignalHandler* handler = new FatalSignalHandler(); return *handler; } FatalSignalHandler::~FatalSignalHandler() = default; FatalSignalHandler::FatalSignalHandler() : fatalSignalHandlersInstalled(false), fatalSignalReceived(false), fatalSignalName(""), writingCond(), writingMutex(), signalReceived(false) {} // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays) FatalSignalHandler::signal_handler FatalSignalHandler::kSignalHandlers[] = { {"SIGABRT", SIGABRT, {}}, {"SIGINT", SIGINT, {}}, {"SIGILL", SIGILL, {}}, {"SIGFPE", SIGFPE, {}}, {"SIGBUS", SIGBUS, {}}, {"SIGSEGV", SIGSEGV, {}}, {nullptr, 0, {}}}; struct sigaction* FatalSignalHandler::getPreviousSigaction(int signum) { for (auto handler = kSignalHandlers; handler->name != nullptr; handler++) { if (handler->signum == signum) { return &handler->previous; } } return nullptr; } const char* FatalSignalHandler::getSignalName(int signum) { for (auto handler = kSignalHandlers; handler->name != nullptr; handler++) { if (handler->signum == signum) { return handler->name; } } return nullptr; } void FatalSignalHandler::callPreviousSignalHandler( struct sigaction* action, int signum, siginfo_t* info, void* ctx) { if (!action->sa_handler) { return; } if ((action->sa_flags & SA_SIGINFO) == SA_SIGINFO) { action->sa_sigaction(signum, info, ctx); } else { action->sa_handler(signum); } } // needsLock signals whether we need to lock our writing mutex. void FatalSignalHandler::stacktraceSignalHandler(bool needsLock) { std::unique_lock ul(writingMutex, std::defer_lock); if (needsLock) { ul.lock(); signalReceived = true; } pid_t tid = static_cast(syscall(SYS_gettid)); std::string backtrace = fmt::format( "{}({}), PID: {}, Thread {}: \n {}", fatalSignalName, fatalSignum, ::getpid(), tid, c10::get_backtrace()); std::cerr << backtrace << std::endl; if (needsLock) { ul.unlock(); writingCond.notify_all(); } } void FatalSignalHandler::fatalSignalHandlerPostProcess() {} void FatalSignalHandler::fatalSignalHandlerStatic(int signum) { getInstance().fatalSignalHandler(signum); } // Our fatal signal entry point void FatalSignalHandler::fatalSignalHandler(int signum) { // Check if this is a proper signal that we declared above. const char* name = getSignalName(signum); if (!name) { return; } if (fatalSignalReceived) { return; } // Set the flag so that our SIGUSR2 handler knows that we're aborting and // that it should intercept any SIGUSR2 signal. fatalSignalReceived = true; // Set state for other threads. fatalSignum = signum; fatalSignalName = name; // Linux doesn't have a nice userland API for enumerating threads so we // need to use the proc pseudo-filesystem. DIR* procDir = opendir("/proc/self/task"); if (procDir) { pid_t pid = getpid(); pid_t currentTid = static_cast(syscall(SYS_gettid)); struct dirent* entry = nullptr; std::unique_lock ul(writingMutex); while ((entry = readdir(procDir)) != nullptr) { if (entry->d_name[0] == '.') { continue; } pid_t tid = atoi(entry->d_name); // If we've found the current thread then we'll jump into the SIGUSR2 // handler instead of signaling to avoid deadlocking. if (tid != currentTid) { signalReceived = false; syscall(SYS_tgkill, pid, tid, SIGUSR2); auto now = std::chrono::system_clock::now(); using namespace std::chrono_literals; // we use wait_until instead of wait because on ROCm there was // a single thread that wouldn't receive the SIGUSR2 if (std::cv_status::timeout == writingCond.wait_until(ul, now + 2s)) { if (!signalReceived) { std::cerr << "signal lost waiting for stacktrace " << pid << ":" << tid << std::endl; break; } } } else { stacktraceSignalHandler(false); } } } else { perror("Failed to open /proc/self/task"); } fatalSignalHandlerPostProcess(); sigaction(signum, getPreviousSigaction(signum), nullptr); raise(signum); } // Our SIGUSR2 entry point void FatalSignalHandler::stacktraceSignalHandlerStatic( int signum, siginfo_t* info, void* ctx) { getInstance().stacktraceSignalHandler(signum, info, ctx); } void FatalSignalHandler::stacktraceSignalHandler( int signum, siginfo_t* info, void* ctx) { if (fatalSignalReceived) { stacktraceSignalHandler(true); } else { // We don't want to actually change the signal handler as we want to // remain the signal handler so that we may get the usr2 signal later. callPreviousSignalHandler(&previousSigusr2, signum, info, ctx); } } // Installs SIGABRT signal handler so that we get stack traces // from every thread on SIGABRT caused exit. Also installs SIGUSR2 handler // so that threads can communicate with each other (be sure if you use SIGUSR2) // to install your handler before initing caffe2 (we properly fall back to // the previous handler if we didn't initiate the SIGUSR2). void FatalSignalHandler::installFatalSignalHandlers() { std::lock_guard locker(fatalSignalHandlersInstallationMutex); if (fatalSignalHandlersInstalled) { return; } fatalSignalHandlersInstalled = true; struct sigaction sa {}; sigemptyset(&sa.sa_mask); // Since we'll be in an exiting situation it's possible there's memory // corruption, so make our own stack just in case. sa.sa_flags = SA_ONSTACK | SA_SIGINFO; sa.sa_handler = FatalSignalHandler::fatalSignalHandlerStatic; for (auto* handler = kSignalHandlers; handler->name != nullptr; handler++) { if (sigaction(handler->signum, &sa, &handler->previous)) { std::string str("Failed to add "); str += handler->name; str += " handler!"; perror(str.c_str()); } } sa.sa_sigaction = FatalSignalHandler::stacktraceSignalHandlerStatic; if (sigaction(SIGUSR2, &sa, &previousSigusr2)) { perror("Failed to add SIGUSR2 handler!"); } } void FatalSignalHandler::uninstallFatalSignalHandlers() { std::lock_guard locker(fatalSignalHandlersInstallationMutex); if (!fatalSignalHandlersInstalled) { return; } fatalSignalHandlersInstalled = false; for (auto* handler = kSignalHandlers; handler->name != nullptr; handler++) { if (sigaction(handler->signum, &handler->previous, nullptr)) { std::string str("Failed to remove "); str += handler->name; str += " handler!"; perror(str.c_str()); } else { handler->previous = {}; } } if (sigaction(SIGUSR2, &previousSigusr2, nullptr)) { perror("Failed to add SIGUSR2 handler!"); } else { previousSigusr2 = {}; } } #endif // defined(C10_SUPPORTS_FATAL_SIGNAL_HANDLERS) SignalHandler::SignalHandler( SignalHandler::Action SIGINT_action, SignalHandler::Action SIGHUP_action) : SIGINT_action_(SIGINT_action), SIGHUP_action_(SIGHUP_action), my_sigint_count_(sigintCount), my_sighup_count_(sighupCount) { hookupHandler(); } SignalHandler::~SignalHandler() { unhookHandler(); } // Return true iff a SIGINT has been received since the last time this // function was called. bool SignalHandler::GotSIGINT() { uint64_t count = sigintCount; uint64_t localCount = my_sigint_count_.exchange(count); return (localCount != count); } // Return true iff a SIGHUP has been received since the last time this // function was called. bool SignalHandler::GotSIGHUP() { uint64_t count = sighupCount; uint64_t localCount = my_sighup_count_.exchange(count); return (localCount != count); } SignalHandler::Action SignalHandler::CheckForSignals() { if (GotSIGHUP()) { return SIGHUP_action_; } if (GotSIGINT()) { return SIGINT_action_; } return SignalHandler::Action::NONE; } #if defined(C10_SUPPORTS_FATAL_SIGNAL_HANDLERS) void FatalSignalHandler::setPrintStackTracesOnFatalSignal(bool print) { if (print) { installFatalSignalHandlers(); } else { uninstallFatalSignalHandlers(); } } bool FatalSignalHandler::printStackTracesOnFatalSignal() { std::lock_guard locker(fatalSignalHandlersInstallationMutex); return fatalSignalHandlersInstalled; } #endif // defined(C10_SUPPORTS_FATAL_SIGNAL_HANDLERS) } // namespace c10 #else // defined(C10_SUPPORTS_SIGNAL_HANDLER) // TODO: Currently we do not support signal handling in non-Linux yet - below is // a minimal implementation that makes things compile. namespace c10 { SignalHandler::SignalHandler( SignalHandler::Action SIGINT_action, SignalHandler::Action SIGHUP_action) { SIGINT_action_ = SIGINT_action; SIGHUP_action_ = SIGHUP_action; my_sigint_count_ = 0; my_sighup_count_ = 0; } SignalHandler::~SignalHandler() {} bool SignalHandler::GotSIGINT() { return false; } bool SignalHandler::GotSIGHUP() { return false; } SignalHandler::Action SignalHandler::CheckForSignals() { return SignalHandler::Action::NONE; } } // namespace c10 #endif // defined(C10_SUPPORTS_SIGNAL_HANDLER)