• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef INCLUDE_PERFETTO_EXT_BASE_WATCHDOG_POSIX_H_
18 #define INCLUDE_PERFETTO_EXT_BASE_WATCHDOG_POSIX_H_
19 
20 #include "perfetto/base/thread_annotations.h"
21 #include "perfetto/base/time.h"
22 #include "perfetto/ext/base/scoped_file.h"
23 
24 #include <atomic>
25 #include <mutex>
26 #include <thread>
27 #include <vector>
28 
29 namespace perfetto {
30 namespace base {
31 
32 enum class WatchdogCrashReason;  // Defined in watchdog.h.
33 
34 struct ProcStat {
35   unsigned long int utime = 0l;
36   unsigned long int stime = 0l;
37   long int rss_pages = -1l;
38 };
39 
40 bool ReadProcStat(int fd, ProcStat* out);
41 
42 // Ensures that the calling program does not exceed certain hard limits on
43 // resource usage e.g. time, memory and CPU. If exceeded, the program is
44 // crashed.
45 class Watchdog {
46  public:
47   struct TimerData {
48     TimeMillis deadline{};  // Absolute deadline, CLOCK_MONOTONIC.
49     int thread_id = 0;      // The tid we'll send a SIGABRT to on expiry.
50     WatchdogCrashReason crash_reason{};  // Becomes a crash key.
51 
52     TimerData() = default;
TimerDataTimerData53     TimerData(TimeMillis d, int t) : deadline(d), thread_id(t) {}
54     bool operator<(const TimerData& x) const {
55       return std::tie(deadline, thread_id) < std::tie(x.deadline, x.thread_id);
56     }
57     bool operator==(const TimerData& x) const {
58       return std::tie(deadline, thread_id) == std::tie(x.deadline, x.thread_id);
59     }
60   };
61 
62   // Handle to the timer set to crash the program. If the handle is dropped,
63   // the timer is removed so the program does not crash.
64   class Timer {
65    public:
66     ~Timer();
67     Timer(Timer&&) noexcept;
68 
69    private:
70     friend class Watchdog;
71 
72     explicit Timer(Watchdog*, uint32_t ms, WatchdogCrashReason);
73     Timer(const Timer&) = delete;
74     Timer& operator=(const Timer&) = delete;
75 
76     // In production this is always Watchdog::GetInstance(), which is long
77     // lived. However unittests use a non-global instance.
78     Watchdog* watchdog_ = nullptr;
79     TimerData timer_data_;
80   };
81   virtual ~Watchdog();
82 
83   static Watchdog* GetInstance();
84 
85   // Sets a timer which will crash the program in |ms| milliseconds if the
86   // returned handle is not destroyed before this point.
87   // WatchdogCrashReason is used only to set a crash key in the case of a crash,
88   // to disambiguate different timer types.
89   Timer CreateFatalTimer(uint32_t ms, WatchdogCrashReason);
90 
91   // Starts the watchdog thread which monitors the memory and CPU usage
92   // of the program.
93   void Start();
94 
95   // Sets a limit on the memory (defined as the RSS) used by the program
96   // averaged over the last |window_ms| milliseconds. If |kb| is 0, any
97   // existing limit is removed.
98   // Note: |window_ms| has to be a multiple of |polling_interval_ms_|.
99   void SetMemoryLimit(uint64_t bytes, uint32_t window_ms);
100 
101   // Sets a limit on the CPU usage used by the program averaged over the last
102   // |window_ms| milliseconds. If |percentage| is 0, any existing limit is
103   // removed.
104   // Note: |window_ms| has to be a multiple of |polling_interval_ms_|.
105   void SetCpuLimit(uint32_t percentage, uint32_t window_ms);
106 
107  private:
108   // Represents a ring buffer in which integer values can be stored.
109   class WindowedInterval {
110    public:
111     // Pushes a new value into a ring buffer wrapping if necessary and returns
112     // whether the ring buffer is full.
113     bool Push(uint64_t sample);
114 
115     // Returns the mean of the values in the buffer.
116     double Mean() const;
117 
118     // Clears the ring buffer while keeping the existing size.
119     void Clear();
120 
121     // Resets the size of the buffer as well as clearing it.
122     void Reset(size_t new_size);
123 
124     // Gets the oldest value inserted in the buffer. The buffer must be full
125     // (i.e. Push returned true) before this method can be called.
OldestWhenFull()126     uint64_t OldestWhenFull() const {
127       PERFETTO_CHECK(filled_);
128       return buffer_[position_];
129     }
130 
131     // Gets the newest value inserted in the buffer. The buffer must be full
132     // (i.e. Push returned true) before this method can be called.
NewestWhenFull()133     uint64_t NewestWhenFull() const {
134       PERFETTO_CHECK(filled_);
135       return buffer_[(position_ + size_ - 1) % size_];
136     }
137 
138     // Returns the size of the ring buffer.
size()139     size_t size() const { return size_; }
140 
141    private:
142     bool filled_ = false;
143     size_t position_ = 0;
144     size_t size_ = 0;
145     std::unique_ptr<uint64_t[]> buffer_;
146   };
147 
148   Watchdog(const Watchdog&) = delete;
149   Watchdog& operator=(const Watchdog&) = delete;
150   Watchdog(Watchdog&&) = delete;
151   Watchdog& operator=(Watchdog&&) = delete;
152 
153   // Main method for the watchdog thread.
154   void ThreadMain();
155 
156   // Check each type of resource every |polling_interval_ms_| miillis.
157   // Returns true if the threshold is exceeded and the process should be killed.
158   bool CheckMemory_Locked(uint64_t rss_bytes)
159       PERFETTO_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
160   bool CheckCpu_Locked(uint64_t cpu_time)
161       PERFETTO_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
162 
163   void AddFatalTimer(TimerData);
164   void RemoveFatalTimer(TimerData);
165   void RearmTimerFd_Locked() PERFETTO_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
166   void SerializeLogsAndKillThread(int tid, WatchdogCrashReason);
167 
168   // Computes the time interval spanned by a given ring buffer with respect
169   // to |polling_interval_ms_|.
170   uint32_t WindowTimeForRingBuffer(const WindowedInterval& window);
171 
172   const uint32_t polling_interval_ms_;
173   std::atomic<bool> enabled_{false};
174   std::thread thread_;
175   ScopedPlatformHandle timer_fd_;
176 
177   std::mutex mutex_;
178 
179   uint64_t memory_limit_bytes_ PERFETTO_GUARDED_BY(mutex_) = 0;
180   WindowedInterval memory_window_bytes_ PERFETTO_GUARDED_BY(mutex_);
181 
182   uint32_t cpu_limit_percentage_ PERFETTO_GUARDED_BY(mutex_) = 0;
183   WindowedInterval cpu_window_time_ticks_ PERFETTO_GUARDED_BY(mutex_);
184 
185   // Outstanding timers created via CreateFatalTimer() and not yet destroyed.
186   // The vector is not sorted. In most cases there are only 1-2 timers, we can
187   // afford O(N) operations.
188   // All the timers in the list share the same |timer_fd_|, which is keeped
189   // armed on the min(timers_) through RearmTimerFd_Locked().
190   std::vector<TimerData> timers_ PERFETTO_GUARDED_BY(mutex_);
191 
192  protected:
193   // Protected for testing.
194   explicit Watchdog(uint32_t polling_interval_ms);
195 
196   bool disable_kill_failsafe_for_testing_ = false;
197 };
198 
199 }  // namespace base
200 }  // namespace perfetto
201 #endif  // INCLUDE_PERFETTO_EXT_BASE_WATCHDOG_POSIX_H_
202