• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef INCLUDE_PERFETTO_EXT_BASE_WATCHDOG_POSIX_H_
18 #define INCLUDE_PERFETTO_EXT_BASE_WATCHDOG_POSIX_H_
19 
20 #include "perfetto/base/time.h"
21 #include "perfetto/ext/base/scoped_file.h"
22 
23 #include <atomic>
24 #include <mutex>
25 #include <thread>
26 #include <vector>
27 
28 namespace perfetto {
29 namespace base {
30 
31 enum class WatchdogCrashReason;  // Defined in watchdog.h.
32 
33 struct ProcStat {
34   unsigned long int utime = 0l;
35   unsigned long int stime = 0l;
36   long int rss_pages = -1l;
37 };
38 
39 bool ReadProcStat(int fd, ProcStat* out);
40 
41 // Ensures that the calling program does not exceed certain hard limits on
42 // resource usage e.g. time, memory and CPU. If exceeded, the program is
43 // crashed.
44 class Watchdog {
45  public:
46   struct TimerData {
47     TimeMillis deadline{};  // Absolute deadline, CLOCK_MONOTONIC.
48     int thread_id = 0;      // The tid we'll send a SIGABRT to on expiry.
49     WatchdogCrashReason crash_reason{};  // Becomes a crash key.
50 
51     TimerData() = default;
TimerDataTimerData52     TimerData(TimeMillis d, int t) : deadline(d), thread_id(t) {}
53     bool operator<(const TimerData& x) const {
54       return std::tie(deadline, thread_id) < std::tie(x.deadline, x.thread_id);
55     }
56     bool operator==(const TimerData& x) const {
57       return std::tie(deadline, thread_id) == std::tie(x.deadline, x.thread_id);
58     }
59   };
60 
61   // Handle to the timer set to crash the program. If the handle is dropped,
62   // the timer is removed so the program does not crash.
63   class Timer {
64    public:
65     ~Timer();
66     Timer(Timer&&) noexcept;
67 
68    private:
69     friend class Watchdog;
70 
71     explicit Timer(Watchdog*, uint32_t ms, WatchdogCrashReason);
72     Timer(const Timer&) = delete;
73     Timer& operator=(const Timer&) = delete;
74 
75     // In production this is always Watchdog::GetInstance(), which is long
76     // lived. However unittests use a non-global instance.
77     Watchdog* watchdog_ = nullptr;
78     TimerData timer_data_;
79   };
80   virtual ~Watchdog();
81 
82   static Watchdog* GetInstance();
83 
84   // Sets a timer which will crash the program in |ms| milliseconds if the
85   // returned handle is not destroyed before this point.
86   // WatchdogCrashReason is used only to set a crash key in the case of a crash,
87   // to disambiguate different timer types.
88   Timer CreateFatalTimer(uint32_t ms, WatchdogCrashReason);
89 
90   // Starts the watchdog thread which monitors the memory and CPU usage
91   // of the program.
92   void Start();
93 
94   // Sets a limit on the memory (defined as the RSS) used by the program
95   // averaged over the last |window_ms| milliseconds. If |kb| is 0, any
96   // existing limit is removed.
97   // Note: |window_ms| has to be a multiple of |polling_interval_ms_|.
98   void SetMemoryLimit(uint64_t bytes, uint32_t window_ms);
99 
100   // Sets a limit on the CPU usage used by the program averaged over the last
101   // |window_ms| milliseconds. If |percentage| is 0, any existing limit is
102   // removed.
103   // Note: |window_ms| has to be a multiple of |polling_interval_ms_|.
104   void SetCpuLimit(uint32_t percentage, uint32_t window_ms);
105 
106  private:
107   // Represents a ring buffer in which integer values can be stored.
108   class WindowedInterval {
109    public:
110     // Pushes a new value into a ring buffer wrapping if necessary and returns
111     // whether the ring buffer is full.
112     bool Push(uint64_t sample);
113 
114     // Returns the mean of the values in the buffer.
115     double Mean() const;
116 
117     // Clears the ring buffer while keeping the existing size.
118     void Clear();
119 
120     // Resets the size of the buffer as well as clearing it.
121     void Reset(size_t new_size);
122 
123     // Gets the oldest value inserted in the buffer. The buffer must be full
124     // (i.e. Push returned true) before this method can be called.
OldestWhenFull()125     uint64_t OldestWhenFull() const {
126       PERFETTO_CHECK(filled_);
127       return buffer_[position_];
128     }
129 
130     // Gets the newest value inserted in the buffer. The buffer must be full
131     // (i.e. Push returned true) before this method can be called.
NewestWhenFull()132     uint64_t NewestWhenFull() const {
133       PERFETTO_CHECK(filled_);
134       return buffer_[(position_ + size_ - 1) % size_];
135     }
136 
137     // Returns the size of the ring buffer.
size()138     size_t size() const { return size_; }
139 
140    private:
141     bool filled_ = false;
142     size_t position_ = 0;
143     size_t size_ = 0;
144     std::unique_ptr<uint64_t[]> buffer_;
145   };
146 
147   Watchdog(const Watchdog&) = delete;
148   Watchdog& operator=(const Watchdog&) = delete;
149   Watchdog(Watchdog&&) = delete;
150   Watchdog& operator=(Watchdog&&) = delete;
151 
152   // Main method for the watchdog thread.
153   void ThreadMain();
154 
155   // Check each type of resource every |polling_interval_ms_| miillis.
156   // Returns true if the threshold is exceeded and the process should be killed.
157   bool CheckMemory_Locked(uint64_t rss_bytes);
158   bool CheckCpu_Locked(uint64_t cpu_time);
159 
160   void AddFatalTimer(TimerData);
161   void RemoveFatalTimer(TimerData);
162   void RearmTimerFd_Locked();
163   void SerializeLogsAndKillThread(int tid, WatchdogCrashReason);
164 
165   // Computes the time interval spanned by a given ring buffer with respect
166   // to |polling_interval_ms_|.
167   uint32_t WindowTimeForRingBuffer(const WindowedInterval& window);
168 
169   const uint32_t polling_interval_ms_;
170   std::atomic<bool> enabled_{false};
171   std::thread thread_;
172   ScopedPlatformHandle timer_fd_;
173 
174   // --- Begin lock-protected members ---
175 
176   std::mutex mutex_;
177 
178   uint64_t memory_limit_bytes_ = 0;
179   WindowedInterval memory_window_bytes_;
180 
181   uint32_t cpu_limit_percentage_ = 0;
182   WindowedInterval cpu_window_time_ticks_;
183 
184   // Outstanding timers created via CreateFatalTimer() and not yet destroyed.
185   // The vector is not sorted. In most cases there are only 1-2 timers, we can
186   // afford O(N) operations.
187   // All the timers in the list share the same |timer_fd_|, which is keeped
188   // armed on the min(timers_) through RearmTimerFd_Locked().
189   std::vector<TimerData> timers_;
190 
191   // --- End lock-protected members ---
192 
193  protected:
194   // Protected for testing.
195   explicit Watchdog(uint32_t polling_interval_ms);
196 
197   bool disable_kill_failsafe_for_testing_ = false;
198 };
199 
200 }  // namespace base
201 }  // namespace perfetto
202 #endif  // INCLUDE_PERFETTO_EXT_BASE_WATCHDOG_POSIX_H_
203