1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "perfetto/ext/base/watchdog.h"
18
19 #if PERFETTO_BUILDFLAG(PERFETTO_WATCHDOG)
20
21 #include <fcntl.h>
22 #include <poll.h>
23 #include <signal.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <sys/syscall.h>
27 #include <sys/timerfd.h>
28 #include <unistd.h>
29
30 #include <algorithm>
31 #include <cinttypes>
32 #include <fstream>
33 #include <thread>
34
35 #include "perfetto/base/build_config.h"
36 #include "perfetto/base/logging.h"
37 #include "perfetto/base/thread_utils.h"
38 #include "perfetto/base/time.h"
39 #include "perfetto/ext/base/crash_keys.h"
40 #include "perfetto/ext/base/file_utils.h"
41 #include "perfetto/ext/base/scoped_file.h"
42 #include "perfetto/ext/base/utils.h"
43
44 namespace perfetto {
45 namespace base {
46
47 namespace {
48
49 constexpr uint32_t kDefaultPollingInterval = 30 * 1000;
50
51 base::CrashKey g_crash_key_reason("wdog_reason");
52
IsMultipleOf(uint32_t number,uint32_t divisor)53 bool IsMultipleOf(uint32_t number, uint32_t divisor) {
54 return number >= divisor && number % divisor == 0;
55 }
56
MeanForArray(const uint64_t array[],size_t size)57 double MeanForArray(const uint64_t array[], size_t size) {
58 uint64_t total = 0;
59 for (size_t i = 0; i < size; i++) {
60 total += array[i];
61 }
62 return static_cast<double>(total / size);
63 }
64
65 } // namespace
66
ReadProcStat(int fd,ProcStat * out)67 bool ReadProcStat(int fd, ProcStat* out) {
68 char c[512];
69 size_t c_pos = 0;
70 while (c_pos < sizeof(c) - 1) {
71 ssize_t rd = PERFETTO_EINTR(read(fd, c + c_pos, sizeof(c) - c_pos));
72 if (rd < 0) {
73 PERFETTO_ELOG("Failed to read stat file to enforce resource limits.");
74 return false;
75 }
76 if (rd == 0)
77 break;
78 c_pos += static_cast<size_t>(rd);
79 }
80 PERFETTO_CHECK(c_pos < sizeof(c));
81 c[c_pos] = '\0';
82
83 if (sscanf(c,
84 "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu "
85 "%lu %*d %*d %*d %*d %*d %*d %*u %*u %ld",
86 &out->utime, &out->stime, &out->rss_pages) != 3) {
87 PERFETTO_ELOG("Invalid stat format: %s", c);
88 return false;
89 }
90 return true;
91 }
92
Watchdog(uint32_t polling_interval_ms)93 Watchdog::Watchdog(uint32_t polling_interval_ms)
94 : polling_interval_ms_(polling_interval_ms) {}
95
~Watchdog()96 Watchdog::~Watchdog() {
97 if (!thread_.joinable()) {
98 PERFETTO_DCHECK(!enabled_);
99 return;
100 }
101 PERFETTO_DCHECK(enabled_);
102 enabled_ = false;
103
104 // Rearm the timer to 1ns from now. This will cause the watchdog thread to
105 // wakeup from the poll() and see |enabled_| == false.
106 // This code path is used only in tests. In production code the watchdog is
107 // a singleton and is never destroyed.
108 struct itimerspec ts {};
109 ts.it_value.tv_sec = 0;
110 ts.it_value.tv_nsec = 1;
111 timerfd_settime(*timer_fd_, /*flags=*/0, &ts, nullptr);
112
113 thread_.join();
114 }
115
GetInstance()116 Watchdog* Watchdog::GetInstance() {
117 static Watchdog* watchdog = new Watchdog(kDefaultPollingInterval);
118 return watchdog;
119 }
120
121 // Can be called from any thread.
CreateFatalTimer(uint32_t ms,WatchdogCrashReason crash_reason)122 Watchdog::Timer Watchdog::CreateFatalTimer(uint32_t ms,
123 WatchdogCrashReason crash_reason) {
124 if (!enabled_.load(std::memory_order_relaxed))
125 return Watchdog::Timer(this, 0, crash_reason);
126
127 return Watchdog::Timer(this, ms, crash_reason);
128 }
129
130 // Can be called from any thread.
AddFatalTimer(TimerData timer)131 void Watchdog::AddFatalTimer(TimerData timer) {
132 std::lock_guard<std::mutex> guard(mutex_);
133 timers_.emplace_back(std::move(timer));
134 RearmTimerFd_Locked();
135 }
136
137 // Can be called from any thread.
RemoveFatalTimer(TimerData timer)138 void Watchdog::RemoveFatalTimer(TimerData timer) {
139 std::lock_guard<std::mutex> guard(mutex_);
140 for (auto it = timers_.begin(); it != timers_.end(); it++) {
141 if (*it == timer) {
142 timers_.erase(it);
143 break; // Remove only one. Doesn't matter which one.
144 }
145 }
146 RearmTimerFd_Locked();
147 }
148
RearmTimerFd_Locked()149 void Watchdog::RearmTimerFd_Locked() {
150 if (!enabled_)
151 return;
152 auto it = std::min_element(timers_.begin(), timers_.end());
153
154 // We use one timerfd to handle all the oustanding |timers_|. Keep it armed
155 // to the task expiring soonest.
156 struct itimerspec ts {};
157 if (it != timers_.end()) {
158 ts.it_value = ToPosixTimespec(it->deadline);
159 }
160 // If |timers_| is empty (it == end()) |ts.it_value| will remain
161 // zero-initialized and that will disarm the timer in the call below.
162 int res = timerfd_settime(*timer_fd_, TFD_TIMER_ABSTIME, &ts, nullptr);
163 PERFETTO_DCHECK(res == 0);
164 }
165
Start()166 void Watchdog::Start() {
167 std::lock_guard<std::mutex> guard(mutex_);
168 if (thread_.joinable()) {
169 PERFETTO_DCHECK(enabled_);
170 } else {
171 PERFETTO_DCHECK(!enabled_);
172
173 #if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \
174 PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
175 // Kick the thread to start running but only on Android or Linux.
176 timer_fd_.reset(
177 timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK));
178 if (!timer_fd_) {
179 PERFETTO_PLOG(
180 "timerfd_create failed, the Perfetto watchdog is not available");
181 return;
182 }
183 enabled_ = true;
184 RearmTimerFd_Locked(); // Deal with timers created before Start().
185 thread_ = std::thread(&Watchdog::ThreadMain, this);
186 #endif
187 }
188 }
189
SetMemoryLimit(uint64_t bytes,uint32_t window_ms)190 void Watchdog::SetMemoryLimit(uint64_t bytes, uint32_t window_ms) {
191 // Update the fields under the lock.
192 std::lock_guard<std::mutex> guard(mutex_);
193
194 PERFETTO_CHECK(IsMultipleOf(window_ms, polling_interval_ms_) || bytes == 0);
195
196 size_t size = bytes == 0 ? 0 : window_ms / polling_interval_ms_ + 1;
197 memory_window_bytes_.Reset(size);
198 memory_limit_bytes_ = bytes;
199 }
200
SetCpuLimit(uint32_t percentage,uint32_t window_ms)201 void Watchdog::SetCpuLimit(uint32_t percentage, uint32_t window_ms) {
202 std::lock_guard<std::mutex> guard(mutex_);
203
204 PERFETTO_CHECK(percentage <= 100);
205 PERFETTO_CHECK(IsMultipleOf(window_ms, polling_interval_ms_) ||
206 percentage == 0);
207
208 size_t size = percentage == 0 ? 0 : window_ms / polling_interval_ms_ + 1;
209 cpu_window_time_ticks_.Reset(size);
210 cpu_limit_percentage_ = percentage;
211 }
212
ThreadMain()213 void Watchdog::ThreadMain() {
214 // Register crash keys explicitly to avoid running out of slots at crash time.
215 g_crash_key_reason.Register();
216
217 base::ScopedFile stat_fd(base::OpenFile("/proc/self/stat", O_RDONLY));
218 if (!stat_fd) {
219 PERFETTO_ELOG("Failed to open stat file to enforce resource limits.");
220 return;
221 }
222
223 PERFETTO_DCHECK(timer_fd_);
224
225 constexpr uint8_t kFdCount = 1;
226 struct pollfd fds[kFdCount]{};
227 fds[0].fd = *timer_fd_;
228 fds[0].events = POLLIN;
229
230 for (;;) {
231 // We use the poll() timeout to drive the periodic ticks for the cpu/memory
232 // checks. The only other case when the poll() unblocks is when we crash
233 // (or have to quit via enabled_ == false, but that happens only in tests).
234 auto ret = poll(fds, kFdCount, static_cast<int>(polling_interval_ms_));
235 if (!enabled_)
236 return;
237 if (ret < 0) {
238 if (errno == ENOMEM || errno == EINTR) {
239 // Should happen extremely rarely.
240 std::this_thread::sleep_for(std::chrono::milliseconds(100));
241 continue;
242 }
243 PERFETTO_FATAL("watchdog poll() failed");
244 }
245
246 // If we get here either:
247 // 1. poll() timed out, in which case we should process cpu/mem guardrails.
248 // 2. A timer expired, in which case we shall crash.
249
250 uint64_t expired = 0; // Must be exactly 8 bytes.
251 auto res = PERFETTO_EINTR(read(*timer_fd_, &expired, sizeof(expired)));
252 PERFETTO_DCHECK((res < 0 && (errno == EAGAIN)) ||
253 (res == sizeof(expired) && expired > 0));
254 const auto now = GetWallTimeMs();
255
256 // Check if any of the timers expired.
257 int tid_to_kill = 0;
258 WatchdogCrashReason crash_reason{};
259 std::unique_lock<std::mutex> guard(mutex_);
260 for (const auto& timer : timers_) {
261 if (now >= timer.deadline) {
262 tid_to_kill = timer.thread_id;
263 crash_reason = timer.crash_reason;
264 break;
265 }
266 }
267 guard.unlock();
268
269 if (tid_to_kill)
270 SerializeLogsAndKillThread(tid_to_kill, crash_reason);
271
272 // Check CPU and memory guardrails (if enabled).
273 lseek(stat_fd.get(), 0, SEEK_SET);
274 ProcStat stat;
275 if (!ReadProcStat(stat_fd.get(), &stat))
276 continue;
277 uint64_t cpu_time = stat.utime + stat.stime;
278 uint64_t rss_bytes =
279 static_cast<uint64_t>(stat.rss_pages) * base::GetSysPageSize();
280
281 bool threshold_exceeded = false;
282 guard.lock();
283 if (CheckMemory_Locked(rss_bytes)) {
284 threshold_exceeded = true;
285 crash_reason = WatchdogCrashReason::kMemGuardrail;
286 } else if (CheckCpu_Locked(cpu_time)) {
287 threshold_exceeded = true;
288 crash_reason = WatchdogCrashReason::kCpuGuardrail;
289 }
290 guard.unlock();
291
292 if (threshold_exceeded)
293 SerializeLogsAndKillThread(getpid(), crash_reason);
294 }
295 }
296
SerializeLogsAndKillThread(int tid,WatchdogCrashReason crash_reason)297 void Watchdog::SerializeLogsAndKillThread(int tid,
298 WatchdogCrashReason crash_reason) {
299 g_crash_key_reason.Set(static_cast<int>(crash_reason));
300
301 // We are about to die. Serialize the logs into the crash buffer so the
302 // debuggerd crash handler picks them up and attaches to the bugreport.
303 // In the case of a PERFETTO_CHECK/PERFETTO_FATAL this is done in logging.h.
304 // But in the watchdog case, we don't hit that codepath and must do ourselves.
305 MaybeSerializeLastLogsForCrashReporting();
306
307 // Send a SIGABRT to the thread that armed the timer. This is to see the
308 // callstack of the thread that is stuck in a long task rather than the
309 // watchdog thread.
310 if (syscall(__NR_tgkill, getpid(), tid, SIGABRT) < 0) {
311 // At this point the process must die. If for any reason the tgkill doesn't
312 // work (e.g. the thread has disappeared), force a crash from here.
313 abort();
314 }
315
316 if (disable_kill_failsafe_for_testing_)
317 return;
318
319 // The tgkill() above will take some milliseconds to cause a crash, as it
320 // involves the kernel to queue the SIGABRT on the target thread (often the
321 // main thread, which is != watchdog thread) and do a scheduling round.
322 // If something goes wrong though (the target thread has signals masked or
323 // is stuck in an uninterruptible+wakekill syscall) force quit from this
324 // thread.
325 std::this_thread::sleep_for(std::chrono::seconds(10));
326 abort();
327 }
328
CheckMemory_Locked(uint64_t rss_bytes)329 bool Watchdog::CheckMemory_Locked(uint64_t rss_bytes) {
330 if (memory_limit_bytes_ == 0)
331 return false;
332
333 // Add the current stat value to the ring buffer and check that the mean
334 // remains under our threshold.
335 if (memory_window_bytes_.Push(rss_bytes)) {
336 if (memory_window_bytes_.Mean() >
337 static_cast<double>(memory_limit_bytes_)) {
338 PERFETTO_ELOG(
339 "Memory watchdog trigger. Memory window of %f bytes is above the "
340 "%" PRIu64 " bytes limit.",
341 memory_window_bytes_.Mean(), memory_limit_bytes_);
342 return true;
343 }
344 }
345 return false;
346 }
347
CheckCpu_Locked(uint64_t cpu_time)348 bool Watchdog::CheckCpu_Locked(uint64_t cpu_time) {
349 if (cpu_limit_percentage_ == 0)
350 return false;
351
352 // Add the cpu time to the ring buffer.
353 if (cpu_window_time_ticks_.Push(cpu_time)) {
354 // Compute the percentage over the whole window and check that it remains
355 // under the threshold.
356 uint64_t difference_ticks = cpu_window_time_ticks_.NewestWhenFull() -
357 cpu_window_time_ticks_.OldestWhenFull();
358 double window_interval_ticks =
359 (static_cast<double>(WindowTimeForRingBuffer(cpu_window_time_ticks_)) /
360 1000.0) *
361 static_cast<double>(sysconf(_SC_CLK_TCK));
362 double percentage = static_cast<double>(difference_ticks) /
363 static_cast<double>(window_interval_ticks) * 100;
364 if (percentage > cpu_limit_percentage_) {
365 PERFETTO_ELOG("CPU watchdog trigger. %f%% CPU use is above the %" PRIu32
366 "%% CPU limit.",
367 percentage, cpu_limit_percentage_);
368 return true;
369 }
370 }
371 return false;
372 }
373
WindowTimeForRingBuffer(const WindowedInterval & window)374 uint32_t Watchdog::WindowTimeForRingBuffer(const WindowedInterval& window) {
375 return static_cast<uint32_t>(window.size() - 1) * polling_interval_ms_;
376 }
377
Push(uint64_t sample)378 bool Watchdog::WindowedInterval::Push(uint64_t sample) {
379 // Add the sample to the current position in the ring buffer.
380 buffer_[position_] = sample;
381
382 // Update the position with next one circularily.
383 position_ = (position_ + 1) % size_;
384
385 // Set the filled flag the first time we wrap.
386 filled_ = filled_ || position_ == 0;
387 return filled_;
388 }
389
Mean() const390 double Watchdog::WindowedInterval::Mean() const {
391 return MeanForArray(buffer_.get(), size_);
392 }
393
Clear()394 void Watchdog::WindowedInterval::Clear() {
395 position_ = 0;
396 buffer_.reset(new uint64_t[size_]());
397 }
398
Reset(size_t new_size)399 void Watchdog::WindowedInterval::Reset(size_t new_size) {
400 position_ = 0;
401 size_ = new_size;
402 buffer_.reset(new_size == 0 ? nullptr : new uint64_t[new_size]());
403 }
404
Timer(Watchdog * watchdog,uint32_t ms,WatchdogCrashReason crash_reason)405 Watchdog::Timer::Timer(Watchdog* watchdog,
406 uint32_t ms,
407 WatchdogCrashReason crash_reason)
408 : watchdog_(watchdog) {
409 if (!ms)
410 return; // No-op timer created when the watchdog is disabled.
411 timer_data_.deadline = GetWallTimeMs() + std::chrono::milliseconds(ms);
412 timer_data_.thread_id = GetThreadId();
413 timer_data_.crash_reason = crash_reason;
414 PERFETTO_DCHECK(watchdog_);
415 watchdog_->AddFatalTimer(timer_data_);
416 }
417
~Timer()418 Watchdog::Timer::~Timer() {
419 if (timer_data_.deadline.count())
420 watchdog_->RemoveFatalTimer(timer_data_);
421 }
422
Timer(Timer && other)423 Watchdog::Timer::Timer(Timer&& other) noexcept {
424 watchdog_ = std::move(other.watchdog_);
425 other.watchdog_ = nullptr;
426 timer_data_ = std::move(other.timer_data_);
427 other.timer_data_ = TimerData();
428 }
429
430 } // namespace base
431 } // namespace perfetto
432
433 #endif // PERFETTO_BUILDFLAG(PERFETTO_WATCHDOG)
434