• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "perfetto/ext/base/watchdog.h"
18 
19 #if PERFETTO_BUILDFLAG(PERFETTO_WATCHDOG)
20 
21 #include <fcntl.h>
22 #include <poll.h>
23 #include <signal.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <sys/syscall.h>
27 #include <sys/timerfd.h>
28 #include <unistd.h>
29 
30 #include <algorithm>
31 #include <cinttypes>
32 #include <fstream>
33 #include <thread>
34 
35 #include "perfetto/base/build_config.h"
36 #include "perfetto/base/logging.h"
37 #include "perfetto/base/thread_utils.h"
38 #include "perfetto/base/time.h"
39 #include "perfetto/ext/base/crash_keys.h"
40 #include "perfetto/ext/base/file_utils.h"
41 #include "perfetto/ext/base/scoped_file.h"
42 #include "perfetto/ext/base/utils.h"
43 
44 namespace perfetto {
45 namespace base {
46 
47 namespace {
48 
49 constexpr uint32_t kDefaultPollingInterval = 30 * 1000;
50 
51 base::CrashKey g_crash_key_reason("wdog_reason");
52 
IsMultipleOf(uint32_t number,uint32_t divisor)53 bool IsMultipleOf(uint32_t number, uint32_t divisor) {
54   return number >= divisor && number % divisor == 0;
55 }
56 
MeanForArray(const uint64_t array[],size_t size)57 double MeanForArray(const uint64_t array[], size_t size) {
58   uint64_t total = 0;
59   for (size_t i = 0; i < size; i++) {
60     total += array[i];
61   }
62   return static_cast<double>(total / size);
63 }
64 
65 }  //  namespace
66 
ReadProcStat(int fd,ProcStat * out)67 bool ReadProcStat(int fd, ProcStat* out) {
68   char c[512];
69   size_t c_pos = 0;
70   while (c_pos < sizeof(c) - 1) {
71     ssize_t rd = PERFETTO_EINTR(read(fd, c + c_pos, sizeof(c) - c_pos));
72     if (rd < 0) {
73       PERFETTO_ELOG("Failed to read stat file to enforce resource limits.");
74       return false;
75     }
76     if (rd == 0)
77       break;
78     c_pos += static_cast<size_t>(rd);
79   }
80   PERFETTO_CHECK(c_pos < sizeof(c));
81   c[c_pos] = '\0';
82 
83   if (sscanf(c,
84              "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu "
85              "%lu %*d %*d %*d %*d %*d %*d %*u %*u %ld",
86              &out->utime, &out->stime, &out->rss_pages) != 3) {
87     PERFETTO_ELOG("Invalid stat format: %s", c);
88     return false;
89   }
90   return true;
91 }
92 
Watchdog(uint32_t polling_interval_ms)93 Watchdog::Watchdog(uint32_t polling_interval_ms)
94     : polling_interval_ms_(polling_interval_ms) {}
95 
~Watchdog()96 Watchdog::~Watchdog() {
97   if (!thread_.joinable()) {
98     PERFETTO_DCHECK(!enabled_);
99     return;
100   }
101   PERFETTO_DCHECK(enabled_);
102   enabled_ = false;
103 
104   // Rearm the timer to 1ns from now. This will cause the watchdog thread to
105   // wakeup from the poll() and see |enabled_| == false.
106   // This code path is used only in tests. In production code the watchdog is
107   // a singleton and is never destroyed.
108   struct itimerspec ts {};
109   ts.it_value.tv_sec = 0;
110   ts.it_value.tv_nsec = 1;
111   timerfd_settime(*timer_fd_, /*flags=*/0, &ts, nullptr);
112 
113   thread_.join();
114 }
115 
GetInstance()116 Watchdog* Watchdog::GetInstance() {
117   static Watchdog* watchdog = new Watchdog(kDefaultPollingInterval);
118   return watchdog;
119 }
120 
121 // Can be called from any thread.
CreateFatalTimer(uint32_t ms,WatchdogCrashReason crash_reason)122 Watchdog::Timer Watchdog::CreateFatalTimer(uint32_t ms,
123                                            WatchdogCrashReason crash_reason) {
124   if (!enabled_.load(std::memory_order_relaxed))
125     return Watchdog::Timer(this, 0, crash_reason);
126 
127   return Watchdog::Timer(this, ms, crash_reason);
128 }
129 
130 // Can be called from any thread.
AddFatalTimer(TimerData timer)131 void Watchdog::AddFatalTimer(TimerData timer) {
132   std::lock_guard<std::mutex> guard(mutex_);
133   timers_.emplace_back(std::move(timer));
134   RearmTimerFd_Locked();
135 }
136 
137 // Can be called from any thread.
RemoveFatalTimer(TimerData timer)138 void Watchdog::RemoveFatalTimer(TimerData timer) {
139   std::lock_guard<std::mutex> guard(mutex_);
140   for (auto it = timers_.begin(); it != timers_.end(); it++) {
141     if (*it == timer) {
142       timers_.erase(it);
143       break;  // Remove only one. Doesn't matter which one.
144     }
145   }
146   RearmTimerFd_Locked();
147 }
148 
RearmTimerFd_Locked()149 void Watchdog::RearmTimerFd_Locked() {
150   if (!enabled_)
151     return;
152   auto it = std::min_element(timers_.begin(), timers_.end());
153 
154   // We use one timerfd to handle all the oustanding |timers_|. Keep it armed
155   // to the task expiring soonest.
156   struct itimerspec ts {};
157   if (it != timers_.end()) {
158     ts.it_value = ToPosixTimespec(it->deadline);
159   }
160   // If |timers_| is empty (it == end()) |ts.it_value| will remain
161   // zero-initialized and that will disarm the timer in the call below.
162   int res = timerfd_settime(*timer_fd_, TFD_TIMER_ABSTIME, &ts, nullptr);
163   PERFETTO_DCHECK(res == 0);
164 }
165 
Start()166 void Watchdog::Start() {
167   std::lock_guard<std::mutex> guard(mutex_);
168   if (thread_.joinable()) {
169     PERFETTO_DCHECK(enabled_);
170   } else {
171     PERFETTO_DCHECK(!enabled_);
172 
173 #if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \
174     PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
175     // Kick the thread to start running but only on Android or Linux.
176     timer_fd_.reset(
177         timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK));
178     if (!timer_fd_) {
179       PERFETTO_PLOG(
180           "timerfd_create failed, the Perfetto watchdog is not available");
181       return;
182     }
183     enabled_ = true;
184     RearmTimerFd_Locked();  // Deal with timers created before Start().
185     thread_ = std::thread(&Watchdog::ThreadMain, this);
186 #endif
187   }
188 }
189 
SetMemoryLimit(uint64_t bytes,uint32_t window_ms)190 void Watchdog::SetMemoryLimit(uint64_t bytes, uint32_t window_ms) {
191   // Update the fields under the lock.
192   std::lock_guard<std::mutex> guard(mutex_);
193 
194   PERFETTO_CHECK(IsMultipleOf(window_ms, polling_interval_ms_) || bytes == 0);
195 
196   size_t size = bytes == 0 ? 0 : window_ms / polling_interval_ms_ + 1;
197   memory_window_bytes_.Reset(size);
198   memory_limit_bytes_ = bytes;
199 }
200 
SetCpuLimit(uint32_t percentage,uint32_t window_ms)201 void Watchdog::SetCpuLimit(uint32_t percentage, uint32_t window_ms) {
202   std::lock_guard<std::mutex> guard(mutex_);
203 
204   PERFETTO_CHECK(percentage <= 100);
205   PERFETTO_CHECK(IsMultipleOf(window_ms, polling_interval_ms_) ||
206                  percentage == 0);
207 
208   size_t size = percentage == 0 ? 0 : window_ms / polling_interval_ms_ + 1;
209   cpu_window_time_ticks_.Reset(size);
210   cpu_limit_percentage_ = percentage;
211 }
212 
ThreadMain()213 void Watchdog::ThreadMain() {
214   // Register crash keys explicitly to avoid running out of slots at crash time.
215   g_crash_key_reason.Register();
216 
217   base::ScopedFile stat_fd(base::OpenFile("/proc/self/stat", O_RDONLY));
218   if (!stat_fd) {
219     PERFETTO_ELOG("Failed to open stat file to enforce resource limits.");
220     return;
221   }
222 
223   PERFETTO_DCHECK(timer_fd_);
224 
225   constexpr uint8_t kFdCount = 1;
226   struct pollfd fds[kFdCount]{};
227   fds[0].fd = *timer_fd_;
228   fds[0].events = POLLIN;
229 
230   for (;;) {
231     // We use the poll() timeout to drive the periodic ticks for the cpu/memory
232     // checks. The only other case when the poll() unblocks is when we crash
233     // (or have to quit via enabled_ == false, but that happens only in tests).
234     auto ret = poll(fds, kFdCount, static_cast<int>(polling_interval_ms_));
235     if (!enabled_)
236       return;
237     if (ret < 0) {
238       if (errno == ENOMEM || errno == EINTR) {
239         // Should happen extremely rarely.
240         std::this_thread::sleep_for(std::chrono::milliseconds(100));
241         continue;
242       }
243       PERFETTO_FATAL("watchdog poll() failed");
244     }
245 
246     // If we get here either:
247     // 1. poll() timed out, in which case we should process cpu/mem guardrails.
248     // 2. A timer expired, in which case we shall crash.
249 
250     uint64_t expired = 0;  // Must be exactly 8 bytes.
251     auto res = PERFETTO_EINTR(read(*timer_fd_, &expired, sizeof(expired)));
252     PERFETTO_DCHECK((res < 0 && (errno == EAGAIN)) ||
253                     (res == sizeof(expired) && expired > 0));
254     const auto now = GetWallTimeMs();
255 
256     // Check if any of the timers expired.
257     int tid_to_kill = 0;
258     WatchdogCrashReason crash_reason{};
259     std::unique_lock<std::mutex> guard(mutex_);
260     for (const auto& timer : timers_) {
261       if (now >= timer.deadline) {
262         tid_to_kill = timer.thread_id;
263         crash_reason = timer.crash_reason;
264         break;
265       }
266     }
267     guard.unlock();
268 
269     if (tid_to_kill)
270       SerializeLogsAndKillThread(tid_to_kill, crash_reason);
271 
272     // Check CPU and memory guardrails (if enabled).
273     lseek(stat_fd.get(), 0, SEEK_SET);
274     ProcStat stat;
275     if (!ReadProcStat(stat_fd.get(), &stat))
276       continue;
277     uint64_t cpu_time = stat.utime + stat.stime;
278     uint64_t rss_bytes =
279         static_cast<uint64_t>(stat.rss_pages) * base::GetSysPageSize();
280 
281     bool threshold_exceeded = false;
282     guard.lock();
283     if (CheckMemory_Locked(rss_bytes)) {
284       threshold_exceeded = true;
285       crash_reason = WatchdogCrashReason::kMemGuardrail;
286     } else if (CheckCpu_Locked(cpu_time)) {
287       threshold_exceeded = true;
288       crash_reason = WatchdogCrashReason::kCpuGuardrail;
289     }
290     guard.unlock();
291 
292     if (threshold_exceeded)
293       SerializeLogsAndKillThread(getpid(), crash_reason);
294   }
295 }
296 
SerializeLogsAndKillThread(int tid,WatchdogCrashReason crash_reason)297 void Watchdog::SerializeLogsAndKillThread(int tid,
298                                           WatchdogCrashReason crash_reason) {
299   g_crash_key_reason.Set(static_cast<int>(crash_reason));
300 
301   // We are about to die. Serialize the logs into the crash buffer so the
302   // debuggerd crash handler picks them up and attaches to the bugreport.
303   // In the case of a PERFETTO_CHECK/PERFETTO_FATAL this is done in logging.h.
304   // But in the watchdog case, we don't hit that codepath and must do ourselves.
305   MaybeSerializeLastLogsForCrashReporting();
306 
307   // Send a SIGABRT to the thread that armed the timer. This is to see the
308   // callstack of the thread that is stuck in a long task rather than the
309   // watchdog thread.
310   if (syscall(__NR_tgkill, getpid(), tid, SIGABRT) < 0) {
311     // At this point the process must die. If for any reason the tgkill doesn't
312     // work (e.g. the thread has disappeared), force a crash from here.
313     abort();
314   }
315 
316   if (disable_kill_failsafe_for_testing_)
317     return;
318 
319   // The tgkill() above will take some milliseconds to cause a crash, as it
320   // involves the kernel to queue the SIGABRT on the target thread (often the
321   // main thread, which is != watchdog thread) and do a scheduling round.
322   // If something goes wrong though (the target thread has signals masked or
323   // is stuck in an uninterruptible+wakekill syscall) force quit from this
324   // thread.
325   std::this_thread::sleep_for(std::chrono::seconds(10));
326   abort();
327 }
328 
CheckMemory_Locked(uint64_t rss_bytes)329 bool Watchdog::CheckMemory_Locked(uint64_t rss_bytes) {
330   if (memory_limit_bytes_ == 0)
331     return false;
332 
333   // Add the current stat value to the ring buffer and check that the mean
334   // remains under our threshold.
335   if (memory_window_bytes_.Push(rss_bytes)) {
336     if (memory_window_bytes_.Mean() >
337         static_cast<double>(memory_limit_bytes_)) {
338       PERFETTO_ELOG(
339           "Memory watchdog trigger. Memory window of %f bytes is above the "
340           "%" PRIu64 " bytes limit.",
341           memory_window_bytes_.Mean(), memory_limit_bytes_);
342       return true;
343     }
344   }
345   return false;
346 }
347 
CheckCpu_Locked(uint64_t cpu_time)348 bool Watchdog::CheckCpu_Locked(uint64_t cpu_time) {
349   if (cpu_limit_percentage_ == 0)
350     return false;
351 
352   // Add the cpu time to the ring buffer.
353   if (cpu_window_time_ticks_.Push(cpu_time)) {
354     // Compute the percentage over the whole window and check that it remains
355     // under the threshold.
356     uint64_t difference_ticks = cpu_window_time_ticks_.NewestWhenFull() -
357                                 cpu_window_time_ticks_.OldestWhenFull();
358     double window_interval_ticks =
359         (static_cast<double>(WindowTimeForRingBuffer(cpu_window_time_ticks_)) /
360          1000.0) *
361         static_cast<double>(sysconf(_SC_CLK_TCK));
362     double percentage = static_cast<double>(difference_ticks) /
363                         static_cast<double>(window_interval_ticks) * 100;
364     if (percentage > cpu_limit_percentage_) {
365       PERFETTO_ELOG("CPU watchdog trigger. %f%% CPU use is above the %" PRIu32
366                     "%% CPU limit.",
367                     percentage, cpu_limit_percentage_);
368       return true;
369     }
370   }
371   return false;
372 }
373 
WindowTimeForRingBuffer(const WindowedInterval & window)374 uint32_t Watchdog::WindowTimeForRingBuffer(const WindowedInterval& window) {
375   return static_cast<uint32_t>(window.size() - 1) * polling_interval_ms_;
376 }
377 
Push(uint64_t sample)378 bool Watchdog::WindowedInterval::Push(uint64_t sample) {
379   // Add the sample to the current position in the ring buffer.
380   buffer_[position_] = sample;
381 
382   // Update the position with next one circularily.
383   position_ = (position_ + 1) % size_;
384 
385   // Set the filled flag the first time we wrap.
386   filled_ = filled_ || position_ == 0;
387   return filled_;
388 }
389 
Mean() const390 double Watchdog::WindowedInterval::Mean() const {
391   return MeanForArray(buffer_.get(), size_);
392 }
393 
Clear()394 void Watchdog::WindowedInterval::Clear() {
395   position_ = 0;
396   buffer_.reset(new uint64_t[size_]());
397 }
398 
Reset(size_t new_size)399 void Watchdog::WindowedInterval::Reset(size_t new_size) {
400   position_ = 0;
401   size_ = new_size;
402   buffer_.reset(new_size == 0 ? nullptr : new uint64_t[new_size]());
403 }
404 
Timer(Watchdog * watchdog,uint32_t ms,WatchdogCrashReason crash_reason)405 Watchdog::Timer::Timer(Watchdog* watchdog,
406                        uint32_t ms,
407                        WatchdogCrashReason crash_reason)
408     : watchdog_(watchdog) {
409   if (!ms)
410     return;  // No-op timer created when the watchdog is disabled.
411   timer_data_.deadline = GetWallTimeMs() + std::chrono::milliseconds(ms);
412   timer_data_.thread_id = GetThreadId();
413   timer_data_.crash_reason = crash_reason;
414   PERFETTO_DCHECK(watchdog_);
415   watchdog_->AddFatalTimer(timer_data_);
416 }
417 
~Timer()418 Watchdog::Timer::~Timer() {
419   if (timer_data_.deadline.count())
420     watchdog_->RemoveFatalTimer(timer_data_);
421 }
422 
Timer(Timer && other)423 Watchdog::Timer::Timer(Timer&& other) noexcept {
424   watchdog_ = std::move(other.watchdog_);
425   other.watchdog_ = nullptr;
426   timer_data_ = std::move(other.timer_data_);
427   other.timer_data_ = TimerData();
428 }
429 
430 }  // namespace base
431 }  // namespace perfetto
432 
433 #endif  // PERFETTO_BUILDFLAG(PERFETTO_WATCHDOG)
434