1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef INCLUDE_PERFETTO_EXT_BASE_WATCHDOG_POSIX_H_ 18 #define INCLUDE_PERFETTO_EXT_BASE_WATCHDOG_POSIX_H_ 19 20 #include "perfetto/base/time.h" 21 #include "perfetto/ext/base/scoped_file.h" 22 23 #include <atomic> 24 #include <mutex> 25 #include <thread> 26 #include <vector> 27 28 namespace perfetto { 29 namespace base { 30 31 enum class WatchdogCrashReason; // Defined in watchdog.h. 32 33 struct ProcStat { 34 unsigned long int utime = 0l; 35 unsigned long int stime = 0l; 36 long int rss_pages = -1l; 37 }; 38 39 bool ReadProcStat(int fd, ProcStat* out); 40 41 // Ensures that the calling program does not exceed certain hard limits on 42 // resource usage e.g. time, memory and CPU. If exceeded, the program is 43 // crashed. 44 class Watchdog { 45 public: 46 struct TimerData { 47 TimeMillis deadline{}; // Absolute deadline, CLOCK_MONOTONIC. 48 int thread_id = 0; // The tid we'll send a SIGABRT to on expiry. 49 WatchdogCrashReason crash_reason{}; // Becomes a crash key. 50 51 TimerData() = default; TimerDataTimerData52 TimerData(TimeMillis d, int t) : deadline(d), thread_id(t) {} 53 bool operator<(const TimerData& x) const { 54 return std::tie(deadline, thread_id) < std::tie(x.deadline, x.thread_id); 55 } 56 bool operator==(const TimerData& x) const { 57 return std::tie(deadline, thread_id) == std::tie(x.deadline, x.thread_id); 58 } 59 }; 60 61 // Handle to the timer set to crash the program. If the handle is dropped, 62 // the timer is removed so the program does not crash. 63 class Timer { 64 public: 65 ~Timer(); 66 Timer(Timer&&) noexcept; 67 68 private: 69 friend class Watchdog; 70 71 explicit Timer(Watchdog*, uint32_t ms, WatchdogCrashReason); 72 Timer(const Timer&) = delete; 73 Timer& operator=(const Timer&) = delete; 74 75 // In production this is always Watchdog::GetInstance(), which is long 76 // lived. However unittests use a non-global instance. 77 Watchdog* watchdog_ = nullptr; 78 TimerData timer_data_; 79 }; 80 virtual ~Watchdog(); 81 82 static Watchdog* GetInstance(); 83 84 // Sets a timer which will crash the program in |ms| milliseconds if the 85 // returned handle is not destroyed before this point. 86 // WatchdogCrashReason is used only to set a crash key in the case of a crash, 87 // to disambiguate different timer types. 88 Timer CreateFatalTimer(uint32_t ms, WatchdogCrashReason); 89 90 // Starts the watchdog thread which monitors the memory and CPU usage 91 // of the program. 92 void Start(); 93 94 // Sets a limit on the memory (defined as the RSS) used by the program 95 // averaged over the last |window_ms| milliseconds. If |kb| is 0, any 96 // existing limit is removed. 97 // Note: |window_ms| has to be a multiple of |polling_interval_ms_|. 98 void SetMemoryLimit(uint64_t bytes, uint32_t window_ms); 99 100 // Sets a limit on the CPU usage used by the program averaged over the last 101 // |window_ms| milliseconds. If |percentage| is 0, any existing limit is 102 // removed. 103 // Note: |window_ms| has to be a multiple of |polling_interval_ms_|. 104 void SetCpuLimit(uint32_t percentage, uint32_t window_ms); 105 106 private: 107 // Represents a ring buffer in which integer values can be stored. 108 class WindowedInterval { 109 public: 110 // Pushes a new value into a ring buffer wrapping if necessary and returns 111 // whether the ring buffer is full. 112 bool Push(uint64_t sample); 113 114 // Returns the mean of the values in the buffer. 115 double Mean() const; 116 117 // Clears the ring buffer while keeping the existing size. 118 void Clear(); 119 120 // Resets the size of the buffer as well as clearing it. 121 void Reset(size_t new_size); 122 123 // Gets the oldest value inserted in the buffer. The buffer must be full 124 // (i.e. Push returned true) before this method can be called. OldestWhenFull()125 uint64_t OldestWhenFull() const { 126 PERFETTO_CHECK(filled_); 127 return buffer_[position_]; 128 } 129 130 // Gets the newest value inserted in the buffer. The buffer must be full 131 // (i.e. Push returned true) before this method can be called. NewestWhenFull()132 uint64_t NewestWhenFull() const { 133 PERFETTO_CHECK(filled_); 134 return buffer_[(position_ + size_ - 1) % size_]; 135 } 136 137 // Returns the size of the ring buffer. size()138 size_t size() const { return size_; } 139 140 private: 141 bool filled_ = false; 142 size_t position_ = 0; 143 size_t size_ = 0; 144 std::unique_ptr<uint64_t[]> buffer_; 145 }; 146 147 Watchdog(const Watchdog&) = delete; 148 Watchdog& operator=(const Watchdog&) = delete; 149 Watchdog(Watchdog&&) = delete; 150 Watchdog& operator=(Watchdog&&) = delete; 151 152 // Main method for the watchdog thread. 153 void ThreadMain(); 154 155 // Check each type of resource every |polling_interval_ms_| miillis. 156 // Returns true if the threshold is exceeded and the process should be killed. 157 bool CheckMemory_Locked(uint64_t rss_bytes); 158 bool CheckCpu_Locked(uint64_t cpu_time); 159 160 void AddFatalTimer(TimerData); 161 void RemoveFatalTimer(TimerData); 162 void RearmTimerFd_Locked(); 163 void SerializeLogsAndKillThread(int tid, WatchdogCrashReason); 164 165 // Computes the time interval spanned by a given ring buffer with respect 166 // to |polling_interval_ms_|. 167 uint32_t WindowTimeForRingBuffer(const WindowedInterval& window); 168 169 const uint32_t polling_interval_ms_; 170 std::atomic<bool> enabled_{false}; 171 std::thread thread_; 172 ScopedPlatformHandle timer_fd_; 173 174 // --- Begin lock-protected members --- 175 176 std::mutex mutex_; 177 178 uint64_t memory_limit_bytes_ = 0; 179 WindowedInterval memory_window_bytes_; 180 181 uint32_t cpu_limit_percentage_ = 0; 182 WindowedInterval cpu_window_time_ticks_; 183 184 // Outstanding timers created via CreateFatalTimer() and not yet destroyed. 185 // The vector is not sorted. In most cases there are only 1-2 timers, we can 186 // afford O(N) operations. 187 // All the timers in the list share the same |timer_fd_|, which is keeped 188 // armed on the min(timers_) through RearmTimerFd_Locked(). 189 std::vector<TimerData> timers_; 190 191 // --- End lock-protected members --- 192 193 protected: 194 // Protected for testing. 195 explicit Watchdog(uint32_t polling_interval_ms); 196 197 bool disable_kill_failsafe_for_testing_ = false; 198 }; 199 200 } // namespace base 201 } // namespace perfetto 202 #endif // INCLUDE_PERFETTO_EXT_BASE_WATCHDOG_POSIX_H_ 203