1 // Copyright 2022 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef BASE_MESSAGE_LOOP_MESSAGE_PUMP_EPOLL_H_ 6 #define BASE_MESSAGE_LOOP_MESSAGE_PUMP_EPOLL_H_ 7 8 #include <poll.h> 9 #include <sys/epoll.h> 10 11 #include <cstdint> 12 #include <map> 13 14 #include "base/base_export.h" 15 #include "base/dcheck_is_on.h" 16 #include "base/feature_list.h" 17 #include "base/files/scoped_file.h" 18 #include "base/memory/raw_ptr.h" 19 #include "base/memory/raw_ptr_exclusion.h" 20 #include "base/memory/ref_counted.h" 21 #include "base/memory/weak_ptr.h" 22 #include "base/message_loop/message_pump.h" 23 #include "base/message_loop/watchable_io_message_pump_posix.h" 24 #include "base/threading/thread_checker.h" 25 #include "base/time/time.h" 26 #include "third_party/abseil-cpp/absl/container/inlined_vector.h" 27 28 #if DCHECK_IS_ON() 29 #include <deque> 30 #include <optional> 31 32 #include "base/debug/stack_trace.h" 33 #endif 34 35 namespace base { 36 37 // Use poll() rather than epoll(). 38 // 39 // Why? epoll() is supposed to be strictly better. But it has one consequence 40 // we don't necessarily want: when writing to a AF_UNIX socket, the kernel 41 // will wake up the waiter with a "sync" wakeup. The concept of a "sync" 42 // wakeup has various consequences, but on Android it tends to bias the 43 // scheduler towards a "baton passing" mode, where the current thread yields 44 // its CPU to the target. This is desirable to lower latency. 45 // 46 // However, when using epoll_wait(), the "sync" flag is dropped from the 47 // wakeup path. This is not the case with poll(). So let's use it to preserve 48 // this behavior. 49 // 50 // Caveat: Since both we and the kernel need to walk the list of all fds at 51 // every call, don't do it when we have too many FDs. 52 BASE_FEATURE(kUsePollForMessagePumpEpoll, 53 "UsePollForMessagePumpEpoll", 54 base::FEATURE_DISABLED_BY_DEFAULT); 55 56 // A MessagePump implementation suitable for I/O message loops on Linux-based 57 // systems with epoll API support. 58 class BASE_EXPORT MessagePumpEpoll : public MessagePump, 59 public WatchableIOMessagePumpPosix { 60 class Interest; 61 struct InterestParams; 62 63 public: 64 // Object which FD-watching clients must keep alive to continue watching 65 // their FD. See WatchFileDescriptor() below. 66 class FdWatchController : public FdWatchControllerInterface { 67 public: 68 explicit FdWatchController(const Location& from_here); 69 70 FdWatchController(const FdWatchController&) = delete; 71 FdWatchController& operator=(const FdWatchController&) = delete; 72 73 // Implicitly calls StopWatchingFileDescriptor. 74 ~FdWatchController() override; 75 76 // FdWatchControllerInterface: 77 bool StopWatchingFileDescriptor() override; 78 79 private: 80 friend class MessagePumpEpoll; 81 friend class MessagePumpEpollTest; 82 set_watcher(FdWatcher * watcher)83 void set_watcher(FdWatcher* watcher) { watcher_ = watcher; } set_pump(WeakPtr<MessagePumpEpoll> pump)84 void set_pump(WeakPtr<MessagePumpEpoll> pump) { pump_ = std::move(pump); } interest()85 const scoped_refptr<Interest>& interest() const { return interest_; } 86 87 // Creates a new Interest described by `params` and adopts it as this 88 // controller's exclusive interest. Any prior interest is dropped by the 89 // controller and should be unregistered on the MessagePumpEpoll. 90 const scoped_refptr<Interest>& AssignInterest(const InterestParams& params); 91 void ClearInterest(); 92 93 void OnFdReadable(); 94 void OnFdWritable(); 95 96 raw_ptr<FdWatcher> watcher_ = nullptr; 97 98 // If this pointer is non-null when the FdWatchController is destroyed, the 99 // pointee is set to true. 100 raw_ptr<bool> was_destroyed_ = nullptr; 101 102 WeakPtr<MessagePumpEpoll> pump_; 103 scoped_refptr<Interest> interest_; 104 }; 105 106 MessagePumpEpoll(); 107 MessagePumpEpoll(const MessagePumpEpoll&) = delete; 108 MessagePumpEpoll& operator=(const MessagePumpEpoll&) = delete; 109 ~MessagePumpEpoll() override; 110 111 // Initializes features for this class. See `base::features::Init()`. 112 static void InitializeFeatures(); 113 114 // Starts watching `fd` for events as prescribed by `mode` (see 115 // WatchableIOMessagePumpPosix). When an event occurs, `watcher` is notified. 116 // 117 // If `persistent` is false, the watch only persists until a matching event 118 // is observed, and `watcher` will only see at most one event; otherwise it 119 // remains active until explicitly cancelled and `watcher` may see multiple 120 // events over time. 121 // 122 // The watch can be cancelled at any time by destroying the `controller` or 123 // explicitly calling StopWatchingFileDescriptor() on it. 124 // 125 // IMPORTANT: `fd` MUST remain open as long as controller is alive and not 126 // stopped. If `fd` is closed while the watch is still active, this will 127 // result in memory bugs. 128 bool WatchFileDescriptor(int fd, 129 bool persistent, 130 int mode, 131 FdWatchController* controller, 132 FdWatcher* watcher); 133 134 // MessagePump methods: 135 void Run(Delegate* delegate) override; 136 void Quit() override; 137 void ScheduleWork() override; 138 void ScheduleDelayedWork( 139 const Delegate::NextWorkInfo& next_work_info) override; 140 141 private: 142 friend class MessagePumpEpollTest; 143 144 // The WatchFileDescriptor API supports multiple FdWatchControllers watching 145 // the same file descriptor, potentially for different events; but the epoll 146 // API only supports a single interest list entry per unique file descriptor. 147 // 148 // EpollEventEntry tracks all epoll state relevant to a single file 149 // descriptor, including references to all active and inactive Interests 150 // concerned with that descriptor. This is used to derive a single aggregate 151 // interest entry for the descriptor when manipulating epoll. 152 struct EpollEventEntry { 153 explicit EpollEventEntry(int fd); 154 EpollEventEntry(const EpollEventEntry&) = delete; 155 EpollEventEntry& operator=(const EpollEventEntry&) = delete; 156 ~EpollEventEntry(); 157 FromEpollEventEpollEventEntry158 static EpollEventEntry& FromEpollEvent(epoll_event& e) { 159 return *static_cast<EpollEventEntry*>(e.data.ptr); 160 } 161 162 // Returns the combined set of epoll event flags which should be monitored 163 // by the epoll instance for `fd`. This is based on a combination of the 164 // parameters of all currently active elements in `interests`. Namely: 165 // - EPOLLIN is set if any active Interest wants to `read`. 166 // - EPOLLOUT is set if any active Interest wants to `write`. 167 // - EPOLLONESHOT is set if all active Interests are one-shot. 168 uint32_t ComputeActiveEvents() const; 169 170 // The file descriptor to which this entry pertains. 171 const int fd; 172 173 // A cached copy of the last known epoll event bits registered for this 174 // descriptor on the epoll instance. 175 uint32_t registered_events = 0; 176 177 // A collection of all the interests regarding `fd` on this message pump. 178 // The small amount of inline storage avoids heap allocation in virtually 179 // all real scenarios, since there's little practical value in having more 180 // than two controllers (e.g. one reader and one writer) watch the same 181 // descriptor on the same thread. 182 absl::InlinedVector<scoped_refptr<Interest>, 2> interests; 183 184 // Temporary pointer to an active epoll_event structure which refers to 185 // this entry. This is set immediately upon returning from epoll_wait() and 186 // cleared again immediately before dispatching to any registered interests, 187 // so long as this entry isn't destroyed in the interim. 188 raw_ptr<epoll_event> active_event = nullptr; 189 190 // If the file descriptor is disconnected and no active `interests`, remove 191 // it from the epoll interest list to avoid unconditionally epoll_wait 192 // return, and prevent any future update on this `EpollEventEntry`. 193 bool stopped = false; 194 195 #if DCHECK_IS_ON() 196 struct EpollHistory { 197 base::debug::StackTrace stack_trace; 198 std::optional<epoll_event> event; 199 }; 200 static constexpr ssize_t kEpollHistoryWindowSize = 5; 201 std::deque<EpollHistory> epoll_history_; 202 PushEpollHistoryEpollEventEntry203 void PushEpollHistory(std::optional<epoll_event> event) { 204 EpollHistory info = {.stack_trace = base::debug::StackTrace(), 205 .event = event}; 206 epoll_history_.push_back(info); 207 if (epoll_history_.size() > kEpollHistoryWindowSize) { 208 epoll_history_.pop_front(); 209 } 210 } 211 #endif 212 }; 213 214 // State which lives on the stack within Run(), to support nested run loops. 215 struct RunState { RunStateRunState216 explicit RunState(Delegate* delegate) : delegate(delegate) {} 217 218 // RAW_PTR_EXCLUSION: Performance reasons (based on analysis of sampling 219 // profiler data and tab_search:top100:2020). 220 RAW_PTR_EXCLUSION Delegate* const delegate; 221 222 // Used to flag that the current Run() invocation should return ASAP. 223 bool should_quit = false; 224 }; 225 226 void AddEpollEvent(EpollEventEntry& entry); 227 void UpdateEpollEvent(EpollEventEntry& entry); 228 void StopEpollEvent(EpollEventEntry& entry); 229 void UnregisterInterest(const scoped_refptr<Interest>& interest); 230 bool WaitForEpollEvents(TimeDelta timeout); 231 bool GetEventsPoll(int epoll_timeout, std::vector<epoll_event>* epoll_events); 232 void OnEpollEvent(EpollEventEntry& entry, uint32_t events); 233 void HandleEvent(int fd, 234 bool can_read, 235 bool can_write, 236 FdWatchController* controller); 237 void HandleWakeUp(); 238 239 void BeginNativeWorkBatch(); 240 void RecordPeriodicMetrics(); 241 242 std::vector<struct pollfd>::iterator FindPollEntry(int fd); 243 void RemovePollEntry(int fd); 244 245 // Null if Run() is not currently executing. Otherwise it's a pointer into the 246 // stack of the innermost nested Run() invocation. 247 raw_ptr<RunState> run_state_ = nullptr; 248 249 // This flag is set when starting to process native work; reset after every 250 // `DoWork()` call. See crbug.com/1500295. 251 bool native_work_started_ = false; 252 253 // Mapping of all file descriptors currently watched by this message pump. 254 // std::map was chosen because (1) the number of elements can vary widely, 255 // (2) we don't do frequent lookups, and (3) values need stable addresses 256 // across insertion or removal of other elements. 257 std::map<int, EpollEventEntry> entries_; 258 259 // pollfd array passed to poll() when not using epoll. 260 std::vector<struct pollfd> pollfds_; 261 262 // The epoll instance used by this message pump to monitor file descriptors. 263 ScopedFD epoll_; 264 265 // An eventfd object used to wake the pump's thread when scheduling new work. 266 ScopedFD wake_event_; 267 268 // Tracks when we should next record periodic metrics. 269 base::TimeTicks next_metrics_time_; 270 271 // WatchFileDescriptor() must be called from this thread, and so must 272 // FdWatchController::StopWatchingFileDescriptor(). 273 THREAD_CHECKER(thread_checker_); 274 275 WeakPtrFactory<MessagePumpEpoll> weak_ptr_factory_{this}; 276 }; 277 278 } // namespace base 279 280 #endif // BASE_MESSAGE_LOOP_MESSAGE_PUMP_EPOLL_H_ 281