1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef INCLUDE_PERFETTO_EXT_BASE_WATCHDOG_POSIX_H_ 18 #define INCLUDE_PERFETTO_EXT_BASE_WATCHDOG_POSIX_H_ 19 20 #include "perfetto/base/thread_annotations.h" 21 #include "perfetto/base/time.h" 22 #include "perfetto/ext/base/scoped_file.h" 23 24 #include <atomic> 25 #include <mutex> 26 #include <thread> 27 #include <vector> 28 29 namespace perfetto { 30 namespace base { 31 32 enum class WatchdogCrashReason; // Defined in watchdog.h. 33 34 struct ProcStat { 35 unsigned long int utime = 0l; 36 unsigned long int stime = 0l; 37 long int rss_pages = -1l; 38 }; 39 40 bool ReadProcStat(int fd, ProcStat* out); 41 42 // Ensures that the calling program does not exceed certain hard limits on 43 // resource usage e.g. time, memory and CPU. If exceeded, the program is 44 // crashed. 45 class Watchdog { 46 public: 47 struct TimerData { 48 TimeMillis deadline{}; // Absolute deadline, CLOCK_MONOTONIC. 49 int thread_id = 0; // The tid we'll send a SIGABRT to on expiry. 50 WatchdogCrashReason crash_reason{}; // Becomes a crash key. 51 52 TimerData() = default; TimerDataTimerData53 TimerData(TimeMillis d, int t) : deadline(d), thread_id(t) {} 54 bool operator<(const TimerData& x) const { 55 return std::tie(deadline, thread_id) < std::tie(x.deadline, x.thread_id); 56 } 57 bool operator==(const TimerData& x) const { 58 return std::tie(deadline, thread_id) == std::tie(x.deadline, x.thread_id); 59 } 60 }; 61 62 // Handle to the timer set to crash the program. If the handle is dropped, 63 // the timer is removed so the program does not crash. 64 class Timer { 65 public: 66 ~Timer(); 67 Timer(Timer&&) noexcept; 68 69 private: 70 friend class Watchdog; 71 72 explicit Timer(Watchdog*, uint32_t ms, WatchdogCrashReason); 73 Timer(const Timer&) = delete; 74 Timer& operator=(const Timer&) = delete; 75 76 // In production this is always Watchdog::GetInstance(), which is long 77 // lived. However unittests use a non-global instance. 78 Watchdog* watchdog_ = nullptr; 79 TimerData timer_data_; 80 }; 81 virtual ~Watchdog(); 82 83 static Watchdog* GetInstance(); 84 85 // Sets a timer which will crash the program in |ms| milliseconds if the 86 // returned handle is not destroyed before this point. 87 // WatchdogCrashReason is used only to set a crash key in the case of a crash, 88 // to disambiguate different timer types. 89 Timer CreateFatalTimer(uint32_t ms, WatchdogCrashReason); 90 91 // Starts the watchdog thread which monitors the memory and CPU usage 92 // of the program. 93 void Start(); 94 95 // Sets a limit on the memory (defined as the RSS) used by the program 96 // averaged over the last |window_ms| milliseconds. If |kb| is 0, any 97 // existing limit is removed. 98 // Note: |window_ms| has to be a multiple of |polling_interval_ms_|. 99 void SetMemoryLimit(uint64_t bytes, uint32_t window_ms); 100 101 // Sets a limit on the CPU usage used by the program averaged over the last 102 // |window_ms| milliseconds. If |percentage| is 0, any existing limit is 103 // removed. 104 // Note: |window_ms| has to be a multiple of |polling_interval_ms_|. 105 void SetCpuLimit(uint32_t percentage, uint32_t window_ms); 106 107 private: 108 // Represents a ring buffer in which integer values can be stored. 109 class WindowedInterval { 110 public: 111 // Pushes a new value into a ring buffer wrapping if necessary and returns 112 // whether the ring buffer is full. 113 bool Push(uint64_t sample); 114 115 // Returns the mean of the values in the buffer. 116 double Mean() const; 117 118 // Clears the ring buffer while keeping the existing size. 119 void Clear(); 120 121 // Resets the size of the buffer as well as clearing it. 122 void Reset(size_t new_size); 123 124 // Gets the oldest value inserted in the buffer. The buffer must be full 125 // (i.e. Push returned true) before this method can be called. OldestWhenFull()126 uint64_t OldestWhenFull() const { 127 PERFETTO_CHECK(filled_); 128 return buffer_[position_]; 129 } 130 131 // Gets the newest value inserted in the buffer. The buffer must be full 132 // (i.e. Push returned true) before this method can be called. NewestWhenFull()133 uint64_t NewestWhenFull() const { 134 PERFETTO_CHECK(filled_); 135 return buffer_[(position_ + size_ - 1) % size_]; 136 } 137 138 // Returns the size of the ring buffer. size()139 size_t size() const { return size_; } 140 141 private: 142 bool filled_ = false; 143 size_t position_ = 0; 144 size_t size_ = 0; 145 std::unique_ptr<uint64_t[]> buffer_; 146 }; 147 148 Watchdog(const Watchdog&) = delete; 149 Watchdog& operator=(const Watchdog&) = delete; 150 Watchdog(Watchdog&&) = delete; 151 Watchdog& operator=(Watchdog&&) = delete; 152 153 // Main method for the watchdog thread. 154 void ThreadMain(); 155 156 // Check each type of resource every |polling_interval_ms_| miillis. 157 // Returns true if the threshold is exceeded and the process should be killed. 158 bool CheckMemory_Locked(uint64_t rss_bytes) 159 PERFETTO_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 160 bool CheckCpu_Locked(uint64_t cpu_time) 161 PERFETTO_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 162 163 void AddFatalTimer(TimerData); 164 void RemoveFatalTimer(TimerData); 165 void RearmTimerFd_Locked() PERFETTO_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 166 void SerializeLogsAndKillThread(int tid, WatchdogCrashReason); 167 168 // Computes the time interval spanned by a given ring buffer with respect 169 // to |polling_interval_ms_|. 170 uint32_t WindowTimeForRingBuffer(const WindowedInterval& window); 171 172 const uint32_t polling_interval_ms_; 173 std::atomic<bool> enabled_{false}; 174 std::thread thread_; 175 ScopedPlatformHandle timer_fd_; 176 177 std::mutex mutex_; 178 179 uint64_t memory_limit_bytes_ PERFETTO_GUARDED_BY(mutex_) = 0; 180 WindowedInterval memory_window_bytes_ PERFETTO_GUARDED_BY(mutex_); 181 182 uint32_t cpu_limit_percentage_ PERFETTO_GUARDED_BY(mutex_) = 0; 183 WindowedInterval cpu_window_time_ticks_ PERFETTO_GUARDED_BY(mutex_); 184 185 // Outstanding timers created via CreateFatalTimer() and not yet destroyed. 186 // The vector is not sorted. In most cases there are only 1-2 timers, we can 187 // afford O(N) operations. 188 // All the timers in the list share the same |timer_fd_|, which is keeped 189 // armed on the min(timers_) through RearmTimerFd_Locked(). 190 std::vector<TimerData> timers_ PERFETTO_GUARDED_BY(mutex_); 191 192 protected: 193 // Protected for testing. 194 explicit Watchdog(uint32_t polling_interval_ms); 195 196 bool disable_kill_failsafe_for_testing_ = false; 197 }; 198 199 } // namespace base 200 } // namespace perfetto 201 #endif // INCLUDE_PERFETTO_EXT_BASE_WATCHDOG_POSIX_H_ 202