1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "perfetto/ext/base/platform.h"
18 #include "perfetto/ext/base/watchdog.h"
19 
20 #if PERFETTO_BUILDFLAG(PERFETTO_WATCHDOG)
21 
22 #include <fcntl.h>
23 #include <poll.h>
24 #include <signal.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <sys/syscall.h>
28 #include <sys/timerfd.h>
29 #include <unistd.h>
30 
31 #include <algorithm>
32 #include <cinttypes>
33 #include <fstream>
34 #include <thread>
35 
36 #include "perfetto/base/build_config.h"
37 #include "perfetto/base/logging.h"
38 #include "perfetto/base/thread_utils.h"
39 #include "perfetto/base/time.h"
40 #include "perfetto/ext/base/crash_keys.h"
41 #include "perfetto/ext/base/file_utils.h"
42 #include "perfetto/ext/base/scoped_file.h"
43 #include "perfetto/ext/base/utils.h"
44 
45 namespace perfetto {
46 namespace base {
47 
48 namespace {
49 
50 constexpr uint32_t kDefaultPollingInterval = 30 * 1000;
51 
52 base::CrashKey g_crash_key_reason("wdog_reason");
53 
IsMultipleOf(uint32_t number,uint32_t divisor)54 bool IsMultipleOf(uint32_t number, uint32_t divisor) {
55   return number >= divisor && number % divisor == 0;
56 }
57 
MeanForArray(const uint64_t array[],size_t size)58 double MeanForArray(const uint64_t array[], size_t size) {
59   uint64_t total = 0;
60   for (size_t i = 0; i < size; i++) {
61     total += array[i];
62   }
63   return static_cast<double>(total / size);
64 }
65 
66 }  //  namespace
67 
ReadProcStat(int fd,ProcStat * out)68 bool ReadProcStat(int fd, ProcStat* out) {
69   char c[512];
70   size_t c_pos = 0;
71   while (c_pos < sizeof(c) - 1) {
72     ssize_t rd = PERFETTO_EINTR(read(fd, c + c_pos, sizeof(c) - c_pos));
73     if (rd < 0) {
74       PERFETTO_ELOG("Failed to read stat file to enforce resource limits.");
75       return false;
76     }
77     if (rd == 0)
78       break;
79     c_pos += static_cast<size_t>(rd);
80   }
81   PERFETTO_CHECK(c_pos < sizeof(c));
82   c[c_pos] = '\0';
83 
84   if (sscanf(c,
85              "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu "
86              "%lu %*d %*d %*d %*d %*d %*d %*u %*u %ld",
87              &out->utime, &out->stime, &out->rss_pages) != 3) {
88     PERFETTO_ELOG("Invalid stat format: %s", c);
89     return false;
90   }
91   return true;
92 }
93 
Watchdog(uint32_t polling_interval_ms)94 Watchdog::Watchdog(uint32_t polling_interval_ms)
95     : polling_interval_ms_(polling_interval_ms) {}
96 
~Watchdog()97 Watchdog::~Watchdog() {
98   if (!thread_.joinable()) {
99     PERFETTO_DCHECK(!enabled_);
100     return;
101   }
102   PERFETTO_DCHECK(enabled_);
103   enabled_ = false;
104 
105   // Rearm the timer to 1ns from now. This will cause the watchdog thread to
106   // wakeup from the poll() and see |enabled_| == false.
107   // This code path is used only in tests. In production code the watchdog is
108   // a singleton and is never destroyed.
109   struct itimerspec ts {};
110   ts.it_value.tv_sec = 0;
111   ts.it_value.tv_nsec = 1;
112   timerfd_settime(*timer_fd_, /*flags=*/0, &ts, nullptr);
113 
114   thread_.join();
115 }
116 
GetInstance()117 Watchdog* Watchdog::GetInstance() {
118   static Watchdog* watchdog = new Watchdog(kDefaultPollingInterval);
119   return watchdog;
120 }
121 
122 // Can be called from any thread.
CreateFatalTimer(uint32_t ms,WatchdogCrashReason crash_reason)123 Watchdog::Timer Watchdog::CreateFatalTimer(uint32_t ms,
124                                            WatchdogCrashReason crash_reason) {
125   if (!enabled_.load(std::memory_order_relaxed))
126     return Watchdog::Timer(this, 0, crash_reason);
127 
128   return Watchdog::Timer(this, ms, crash_reason);
129 }
130 
131 // Can be called from any thread.
AddFatalTimer(TimerData timer)132 void Watchdog::AddFatalTimer(TimerData timer) {
133   std::lock_guard<std::mutex> guard(mutex_);
134   timers_.emplace_back(std::move(timer));
135   RearmTimerFd_Locked();
136 }
137 
138 // Can be called from any thread.
RemoveFatalTimer(TimerData timer)139 void Watchdog::RemoveFatalTimer(TimerData timer) {
140   std::lock_guard<std::mutex> guard(mutex_);
141   for (auto it = timers_.begin(); it != timers_.end(); it++) {
142     if (*it == timer) {
143       timers_.erase(it);
144       break;  // Remove only one. Doesn't matter which one.
145     }
146   }
147   RearmTimerFd_Locked();
148 }
149 
RearmTimerFd_Locked()150 void Watchdog::RearmTimerFd_Locked() {
151   if (!enabled_)
152     return;
153   auto it = std::min_element(timers_.begin(), timers_.end());
154 
155   // We use one timerfd to handle all the oustanding |timers_|. Keep it armed
156   // to the task expiring soonest.
157   struct itimerspec ts {};
158   if (it != timers_.end()) {
159     ts.it_value = ToPosixTimespec(it->deadline);
160   }
161   // If |timers_| is empty (it == end()) |ts.it_value| will remain
162   // zero-initialized and that will disarm the timer in the call below.
163   int res = timerfd_settime(*timer_fd_, TFD_TIMER_ABSTIME, &ts, nullptr);
164   PERFETTO_DCHECK(res == 0);
165 }
166 
Start()167 void Watchdog::Start() {
168   std::lock_guard<std::mutex> guard(mutex_);
169   if (thread_.joinable()) {
170     PERFETTO_DCHECK(enabled_);
171   } else {
172     PERFETTO_DCHECK(!enabled_);
173 
174 #if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \
175     PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
176     // Kick the thread to start running but only on Android or Linux.
177     timer_fd_.reset(
178         timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK));
179     if (!timer_fd_) {
180       PERFETTO_PLOG(
181           "timerfd_create failed, the Perfetto watchdog is not available");
182       return;
183     }
184     enabled_ = true;
185     RearmTimerFd_Locked();  // Deal with timers created before Start().
186     thread_ = std::thread(&Watchdog::ThreadMain, this);
187 #endif
188   }
189 }
190 
SetMemoryLimit(uint64_t bytes,uint32_t window_ms)191 void Watchdog::SetMemoryLimit(uint64_t bytes, uint32_t window_ms) {
192   // Update the fields under the lock.
193   std::lock_guard<std::mutex> guard(mutex_);
194 
195   PERFETTO_CHECK(IsMultipleOf(window_ms, polling_interval_ms_) || bytes == 0);
196 
197   size_t size = bytes == 0 ? 0 : window_ms / polling_interval_ms_ + 1;
198   memory_window_bytes_.Reset(size);
199   memory_limit_bytes_ = bytes;
200 }
201 
SetCpuLimit(uint32_t percentage,uint32_t window_ms)202 void Watchdog::SetCpuLimit(uint32_t percentage, uint32_t window_ms) {
203   std::lock_guard<std::mutex> guard(mutex_);
204 
205   PERFETTO_CHECK(percentage <= 100);
206   PERFETTO_CHECK(IsMultipleOf(window_ms, polling_interval_ms_) ||
207                  percentage == 0);
208 
209   size_t size = percentage == 0 ? 0 : window_ms / polling_interval_ms_ + 1;
210   cpu_window_time_ticks_.Reset(size);
211   cpu_limit_percentage_ = percentage;
212 }
213 
ThreadMain()214 void Watchdog::ThreadMain() {
215   // Register crash keys explicitly to avoid running out of slots at crash time.
216   g_crash_key_reason.Register();
217 
218   base::ScopedFile stat_fd(base::OpenFile("/proc/self/stat", O_RDONLY));
219   if (!stat_fd) {
220     PERFETTO_ELOG("Failed to open stat file to enforce resource limits.");
221     return;
222   }
223 
224   PERFETTO_DCHECK(timer_fd_);
225 
226   constexpr uint8_t kFdCount = 1;
227   struct pollfd fds[kFdCount]{};
228   fds[0].fd = *timer_fd_;
229   fds[0].events = POLLIN;
230 
231   for (;;) {
232     // We use the poll() timeout to drive the periodic ticks for the cpu/memory
233     // checks. The only other case when the poll() unblocks is when we crash
234     // (or have to quit via enabled_ == false, but that happens only in tests).
235     platform::BeforeMaybeBlockingSyscall();
236     auto ret = poll(fds, kFdCount, static_cast<int>(polling_interval_ms_));
237     platform::AfterMaybeBlockingSyscall();
238     if (!enabled_)
239       return;
240     if (ret < 0) {
241       if (errno == ENOMEM || errno == EINTR) {
242         // Should happen extremely rarely.
243         std::this_thread::sleep_for(std::chrono::milliseconds(100));
244         continue;
245       }
246       PERFETTO_FATAL("watchdog poll() failed");
247     }
248 
249     // If we get here either:
250     // 1. poll() timed out, in which case we should process cpu/mem guardrails.
251     // 2. A timer expired, in which case we shall crash.
252 
253     uint64_t expired = 0;  // Must be exactly 8 bytes.
254     auto res = PERFETTO_EINTR(read(*timer_fd_, &expired, sizeof(expired)));
255     PERFETTO_DCHECK((res < 0 && (errno == EAGAIN)) ||
256                     (res == sizeof(expired) && expired > 0));
257     const auto now = GetWallTimeMs();
258 
259     // Check if any of the timers expired.
260     int tid_to_kill = 0;
261     WatchdogCrashReason crash_reason{};
262     std::unique_lock<std::mutex> guard(mutex_);
263     for (const auto& timer : timers_) {
264       if (now >= timer.deadline) {
265         tid_to_kill = timer.thread_id;
266         crash_reason = timer.crash_reason;
267         break;
268       }
269     }
270     guard.unlock();
271 
272     if (tid_to_kill)
273       SerializeLogsAndKillThread(tid_to_kill, crash_reason);
274 
275     // Check CPU and memory guardrails (if enabled).
276     lseek(stat_fd.get(), 0, SEEK_SET);
277     ProcStat stat;
278     if (!ReadProcStat(stat_fd.get(), &stat))
279       continue;
280     uint64_t cpu_time = stat.utime + stat.stime;
281     uint64_t rss_bytes =
282         static_cast<uint64_t>(stat.rss_pages) * base::GetSysPageSize();
283 
284     bool threshold_exceeded = false;
285     guard.lock();
286     if (CheckMemory_Locked(rss_bytes)) {
287       threshold_exceeded = true;
288       crash_reason = WatchdogCrashReason::kMemGuardrail;
289     } else if (CheckCpu_Locked(cpu_time)) {
290       threshold_exceeded = true;
291       crash_reason = WatchdogCrashReason::kCpuGuardrail;
292     }
293     guard.unlock();
294 
295     if (threshold_exceeded)
296       SerializeLogsAndKillThread(getpid(), crash_reason);
297   }
298 }
299 
SerializeLogsAndKillThread(int tid,WatchdogCrashReason crash_reason)300 void Watchdog::SerializeLogsAndKillThread(int tid,
301                                           WatchdogCrashReason crash_reason) {
302   g_crash_key_reason.Set(static_cast<int>(crash_reason));
303 
304   // We are about to die. Serialize the logs into the crash buffer so the
305   // debuggerd crash handler picks them up and attaches to the bugreport.
306   // In the case of a PERFETTO_CHECK/PERFETTO_FATAL this is done in logging.h.
307   // But in the watchdog case, we don't hit that codepath and must do ourselves.
308   MaybeSerializeLastLogsForCrashReporting();
309 
310   // Send a SIGABRT to the thread that armed the timer. This is to see the
311   // callstack of the thread that is stuck in a long task rather than the
312   // watchdog thread.
313   if (syscall(__NR_tgkill, getpid(), tid, SIGABRT) < 0) {
314     // At this point the process must die. If for any reason the tgkill doesn't
315     // work (e.g. the thread has disappeared), force a crash from here.
316     abort();
317   }
318 
319   if (disable_kill_failsafe_for_testing_)
320     return;
321 
322   // The tgkill() above will take some milliseconds to cause a crash, as it
323   // involves the kernel to queue the SIGABRT on the target thread (often the
324   // main thread, which is != watchdog thread) and do a scheduling round.
325   // If something goes wrong though (the target thread has signals masked or
326   // is stuck in an uninterruptible+wakekill syscall) force quit from this
327   // thread.
328   std::this_thread::sleep_for(std::chrono::seconds(10));
329   abort();
330 }
331 
CheckMemory_Locked(uint64_t rss_bytes)332 bool Watchdog::CheckMemory_Locked(uint64_t rss_bytes) {
333   if (memory_limit_bytes_ == 0)
334     return false;
335 
336   // Add the current stat value to the ring buffer and check that the mean
337   // remains under our threshold.
338   if (memory_window_bytes_.Push(rss_bytes)) {
339     if (memory_window_bytes_.Mean() >
340         static_cast<double>(memory_limit_bytes_)) {
341       PERFETTO_ELOG(
342           "Memory watchdog trigger. Memory window of %f bytes is above the "
343           "%" PRIu64 " bytes limit.",
344           memory_window_bytes_.Mean(), memory_limit_bytes_);
345       return true;
346     }
347   }
348   return false;
349 }
350 
CheckCpu_Locked(uint64_t cpu_time)351 bool Watchdog::CheckCpu_Locked(uint64_t cpu_time) {
352   if (cpu_limit_percentage_ == 0)
353     return false;
354 
355   // Add the cpu time to the ring buffer.
356   if (cpu_window_time_ticks_.Push(cpu_time)) {
357     // Compute the percentage over the whole window and check that it remains
358     // under the threshold.
359     uint64_t difference_ticks = cpu_window_time_ticks_.NewestWhenFull() -
360                                 cpu_window_time_ticks_.OldestWhenFull();
361     double window_interval_ticks =
362         (static_cast<double>(WindowTimeForRingBuffer(cpu_window_time_ticks_)) /
363          1000.0) *
364         static_cast<double>(sysconf(_SC_CLK_TCK));
365     double percentage = static_cast<double>(difference_ticks) /
366                         static_cast<double>(window_interval_ticks) * 100;
367     if (percentage > cpu_limit_percentage_) {
368       PERFETTO_ELOG("CPU watchdog trigger. %f%% CPU use is above the %" PRIu32
369                     "%% CPU limit.",
370                     percentage, cpu_limit_percentage_);
371       return true;
372     }
373   }
374   return false;
375 }
376 
WindowTimeForRingBuffer(const WindowedInterval & window)377 uint32_t Watchdog::WindowTimeForRingBuffer(const WindowedInterval& window) {
378   return static_cast<uint32_t>(window.size() - 1) * polling_interval_ms_;
379 }
380 
Push(uint64_t sample)381 bool Watchdog::WindowedInterval::Push(uint64_t sample) {
382   // Add the sample to the current position in the ring buffer.
383   buffer_[position_] = sample;
384 
385   // Update the position with next one circularily.
386   position_ = (position_ + 1) % size_;
387 
388   // Set the filled flag the first time we wrap.
389   filled_ = filled_ || position_ == 0;
390   return filled_;
391 }
392 
Mean() const393 double Watchdog::WindowedInterval::Mean() const {
394   return MeanForArray(buffer_.get(), size_);
395 }
396 
Clear()397 void Watchdog::WindowedInterval::Clear() {
398   position_ = 0;
399   buffer_.reset(new uint64_t[size_]());
400 }
401 
Reset(size_t new_size)402 void Watchdog::WindowedInterval::Reset(size_t new_size) {
403   position_ = 0;
404   size_ = new_size;
405   buffer_.reset(new_size == 0 ? nullptr : new uint64_t[new_size]());
406 }
407 
Timer(Watchdog * watchdog,uint32_t ms,WatchdogCrashReason crash_reason)408 Watchdog::Timer::Timer(Watchdog* watchdog,
409                        uint32_t ms,
410                        WatchdogCrashReason crash_reason)
411     : watchdog_(watchdog) {
412   if (!ms)
413     return;  // No-op timer created when the watchdog is disabled.
414   timer_data_.deadline = GetWallTimeMs() + std::chrono::milliseconds(ms);
415   timer_data_.thread_id = GetThreadId();
416   timer_data_.crash_reason = crash_reason;
417   PERFETTO_DCHECK(watchdog_);
418   watchdog_->AddFatalTimer(timer_data_);
419 }
420 
~Timer()421 Watchdog::Timer::~Timer() {
422   if (timer_data_.deadline.count())
423     watchdog_->RemoveFatalTimer(timer_data_);
424 }
425 
Timer(Timer && other)426 Watchdog::Timer::Timer(Timer&& other) noexcept {
427   watchdog_ = std::move(other.watchdog_);
428   other.watchdog_ = nullptr;
429   timer_data_ = std::move(other.timer_data_);
430   other.timer_data_ = TimerData();
431 }
432 
433 }  // namespace base
434 }  // namespace perfetto
435 
436 #endif  // PERFETTO_BUILDFLAG(PERFETTO_WATCHDOG)
437