// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: BSD-3-Clause //! Safe wrappers over the //! [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html) API. use std::io; use std::ops::{Deref, Drop}; use std::os::unix::io::{AsRawFd, RawFd}; #[cfg(any(target_os = "linux", target_os = "android"))] use bitflags::bitflags; use libc::{ epoll_create1, epoll_ctl, epoll_event, epoll_wait, EPOLLERR, EPOLLET, EPOLLEXCLUSIVE, EPOLLHUP, EPOLLIN, EPOLLONESHOT, EPOLLOUT, EPOLLPRI, EPOLLRDHUP, EPOLLWAKEUP, EPOLL_CLOEXEC, EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD, }; use crate::syscall::SyscallReturnCode; /// Wrapper over `EPOLL_CTL_*` operations that can be performed on a file descriptor. #[derive(Debug)] #[repr(i32)] pub enum ControlOperation { /// Add a file descriptor to the interest list. Add = EPOLL_CTL_ADD, /// Change the settings associated with a file descriptor that is /// already in the interest list. Modify = EPOLL_CTL_MOD, /// Remove a file descriptor from the interest list. Delete = EPOLL_CTL_DEL, } bitflags! { /// The type of events we can monitor a file descriptor for. pub struct EventSet: u32 { /// The associated file descriptor is available for read operations. const IN = EPOLLIN as u32; /// The associated file descriptor is available for write operations. const OUT = EPOLLOUT as u32; /// Error condition happened on the associated file descriptor. const ERROR = EPOLLERR as u32; /// This can be used to detect peer shutdown when using Edge Triggered monitoring. const READ_HANG_UP = EPOLLRDHUP as u32; /// Sets the Edge Triggered behavior for the associated file descriptor. /// The default behavior is Level Triggered. const EDGE_TRIGGERED = EPOLLET as u32; /// Hang up happened on the associated file descriptor. Note that `epoll_wait` /// will always wait for this event and it is not necessary to set it in events. const HANG_UP = EPOLLHUP as u32; /// There is an exceptional condition on that file descriptor. It is mostly used to /// set high priority for some data. const PRIORITY = EPOLLPRI as u32; /// The event is considered as being "processed" from the time when it is returned /// by a call to `epoll_wait` until the next call to `epoll_wait` on the same /// epoll file descriptor, the closure of that file descriptor, the removal of the /// event file descriptor via EPOLL_CTL_DEL, or the clearing of EPOLLWAKEUP /// for the event file descriptor via EPOLL_CTL_MOD. const WAKE_UP = EPOLLWAKEUP as u32; /// Sets the one-shot behavior for the associated file descriptor. const ONE_SHOT = EPOLLONESHOT as u32; /// Sets an exclusive wake up mode for the epoll file descriptor that is being /// attached to the associated file descriptor. /// When a wake up event occurs and multiple epoll file descriptors are attached to /// the same target file using this mode, one or more of the epoll file descriptors /// will receive an event with `epoll_wait`. The default here is for all those file /// descriptors to receive an event. const EXCLUSIVE = EPOLLEXCLUSIVE as u32; } } /// Wrapper over /// ['libc::epoll_event'](https://doc.rust-lang.org/1.8.0/libc/struct.epoll_event.html). // We are using `transparent` here to be super sure that this struct and its fields // have the same alignment as those from the `epoll_event` struct from C. #[repr(transparent)] #[derive(Clone, Copy)] pub struct EpollEvent(epoll_event); impl std::fmt::Debug for EpollEvent { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{{ events: {}, data: {} }}", self.events(), self.data()) } } impl Deref for EpollEvent { type Target = epoll_event; fn deref(&self) -> &Self::Target { &self.0 } } impl Default for EpollEvent { fn default() -> Self { EpollEvent(epoll_event { events: 0u32, u64: 0u64, }) } } impl EpollEvent { /// Create a new epoll_event instance. /// /// # Arguments /// /// `events` - contains an event mask. /// `data` - a user data variable. `data` field can be a fd on which /// we want to monitor the events specified by `events`. /// /// # Examples /// /// ``` /// extern crate vmm_sys_util; /// use vmm_sys_util::epoll::{EpollEvent, EventSet}; /// /// let event = EpollEvent::new(EventSet::IN, 2); /// ``` pub fn new(events: EventSet, data: u64) -> Self { EpollEvent(epoll_event { events: events.bits(), u64: data, }) } /// Returns the `events` from /// ['libc::epoll_event'](https://doc.rust-lang.org/1.8.0/libc/struct.epoll_event.html). /// /// # Examples /// /// ``` /// extern crate vmm_sys_util; /// use vmm_sys_util::epoll::{EpollEvent, EventSet}; /// /// let event = EpollEvent::new(EventSet::IN, 2); /// assert_eq!(event.events(), 1); /// ``` pub fn events(&self) -> u32 { self.events } /// Returns the `EventSet` corresponding to `epoll_event.events`. /// /// # Panics /// /// Panics if `libc::epoll_event` contains invalid events. /// /// /// # Examples /// /// ``` /// extern crate vmm_sys_util; /// use vmm_sys_util::epoll::{EpollEvent, EventSet}; /// /// let event = EpollEvent::new(EventSet::IN, 2); /// assert_eq!(event.event_set(), EventSet::IN); /// ``` pub fn event_set(&self) -> EventSet { // This unwrap is safe because `epoll_events` can only be user created or // initialized by the kernel. We trust the kernel to only send us valid // events. The user can only initialize `epoll_events` using valid events. EventSet::from_bits(self.events()).unwrap() } /// Returns the `data` from the `libc::epoll_event`. /// /// # Examples /// /// ``` /// extern crate vmm_sys_util; /// use vmm_sys_util::epoll::{EpollEvent, EventSet}; /// /// let event = EpollEvent::new(EventSet::IN, 2); /// assert_eq!(event.data(), 2); /// ``` pub fn data(&self) -> u64 { self.u64 } /// Converts the `libc::epoll_event` data to a RawFd. /// /// This conversion is lossy when the data does not correspond to a RawFd /// (data does not fit in a i32). /// /// # Examples /// /// ``` /// extern crate vmm_sys_util; /// use vmm_sys_util::epoll::{EpollEvent, EventSet}; /// /// let event = EpollEvent::new(EventSet::IN, 2); /// assert_eq!(event.fd(), 2); /// ``` pub fn fd(&self) -> RawFd { self.u64 as i32 } } /// Wrapper over epoll functionality. #[derive(Debug)] pub struct Epoll { epoll_fd: RawFd, } impl Epoll { /// Create a new epoll file descriptor. pub fn new() -> io::Result { let epoll_fd = SyscallReturnCode( // SAFETY: Safe because the return code is transformed by `into_result` in a `Result`. unsafe { epoll_create1(EPOLL_CLOEXEC) }, ) .into_result()?; Ok(Epoll { epoll_fd }) } /// Wrapper for `libc::epoll_ctl`. /// /// This can be used for adding, modifying or removing a file descriptor in the /// interest list of the epoll instance. /// /// # Arguments /// /// * `operation` - refers to the action to be performed on the file descriptor. /// * `fd` - the file descriptor on which we want to perform `operation`. /// * `event` - refers to the `epoll_event` instance that is linked to `fd`. /// /// # Examples /// /// ``` /// extern crate vmm_sys_util; /// /// use std::os::unix::io::AsRawFd; /// use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet}; /// use vmm_sys_util::eventfd::EventFd; /// /// let epoll = Epoll::new().unwrap(); /// let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); /// epoll /// .ctl( /// ControlOperation::Add, /// event_fd.as_raw_fd() as i32, /// EpollEvent::new(EventSet::OUT, event_fd.as_raw_fd() as u64), /// ) /// .unwrap(); /// epoll /// .ctl( /// ControlOperation::Modify, /// event_fd.as_raw_fd() as i32, /// EpollEvent::new(EventSet::IN, 4), /// ) /// .unwrap(); /// ``` pub fn ctl(&self, operation: ControlOperation, fd: RawFd, event: EpollEvent) -> io::Result<()> { SyscallReturnCode( // SAFETY: Safe because we give a valid epoll file descriptor, a valid file descriptor // to watch, as well as a valid epoll_event structure. We also check the return value. unsafe { epoll_ctl( self.epoll_fd, operation as i32, fd, &event as *const EpollEvent as *mut epoll_event, ) }, ) .into_empty_result() } /// Wrapper for `libc::epoll_wait`. /// Returns the number of file descriptors in the interest list that became ready /// for I/O or `errno` if an error occurred. /// /// # Arguments /// /// * `timeout` - specifies for how long the `epoll_wait` system call will block /// (measured in milliseconds). /// * `events` - points to a memory area that will be used for storing the events /// returned by `epoll_wait()` call. /// /// # Examples /// /// ``` /// extern crate vmm_sys_util; /// /// use std::os::unix::io::AsRawFd; /// use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet}; /// use vmm_sys_util::eventfd::EventFd; /// /// let epoll = Epoll::new().unwrap(); /// let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); /// /// let mut ready_events = vec![EpollEvent::default(); 10]; /// epoll /// .ctl( /// ControlOperation::Add, /// event_fd.as_raw_fd() as i32, /// EpollEvent::new(EventSet::OUT, 4), /// ) /// .unwrap(); /// let ev_count = epoll.wait(-1, &mut ready_events[..]).unwrap(); /// assert_eq!(ev_count, 1); /// ``` pub fn wait(&self, timeout: i32, events: &mut [EpollEvent]) -> io::Result { let events_count = SyscallReturnCode( // SAFETY: Safe because we give a valid epoll file descriptor and an array of // epoll_event structures that will be modified by the kernel to indicate information // about the subset of file descriptors in the interest list. // We also check the return value. unsafe { epoll_wait( self.epoll_fd, events.as_mut_ptr() as *mut epoll_event, events.len() as i32, timeout, ) }, ) .into_result()? as usize; Ok(events_count) } } impl AsRawFd for Epoll { fn as_raw_fd(&self) -> RawFd { self.epoll_fd } } impl Drop for Epoll { fn drop(&mut self) { // SAFETY: Safe because this fd is opened with `epoll_create` and we trust // the kernel to give us a valid fd. unsafe { libc::close(self.epoll_fd); } } } #[cfg(test)] mod tests { use super::*; use crate::eventfd::EventFd; #[test] fn test_event_ops() { let mut event = EpollEvent::default(); assert_eq!(event.events(), 0); assert_eq!(event.data(), 0); event = EpollEvent::new(EventSet::IN, 2); assert_eq!(event.events(), 1); assert_eq!(event.event_set(), EventSet::IN); assert_eq!(event.data(), 2); assert_eq!(event.fd(), 2); } #[test] fn test_events_debug() { let events = EpollEvent::new(EventSet::IN, 42); assert_eq!(format!("{:?}", events), "{ events: 1, data: 42 }") } #[test] fn test_epoll() { const DEFAULT__TIMEOUT: i32 = 250; const EVENT_BUFFER_SIZE: usize = 128; let epoll = Epoll::new().unwrap(); assert_eq!(epoll.epoll_fd, epoll.as_raw_fd()); // Let's test different scenarios for `epoll_ctl()` and `epoll_wait()` functionality. let event_fd_1 = EventFd::new(libc::EFD_NONBLOCK).unwrap(); // For EPOLLOUT to be available it is enough only to be possible to write a value of // at least 1 to the eventfd counter without blocking. // If we write a value greater than 0 to this counter, the fd will be available for // EPOLLIN events too. event_fd_1.write(1).unwrap(); let mut event_1 = EpollEvent::new(EventSet::IN | EventSet::OUT, event_fd_1.as_raw_fd() as u64); // For EPOLL_CTL_ADD behavior we will try to add some fds with different event masks into // the interest list of epoll instance. assert!(epoll .ctl(ControlOperation::Add, event_fd_1.as_raw_fd(), event_1) .is_ok()); // We can't add twice the same fd to epoll interest list. assert!(epoll .ctl(ControlOperation::Add, event_fd_1.as_raw_fd(), event_1) .is_err()); let event_fd_2 = EventFd::new(libc::EFD_NONBLOCK).unwrap(); event_fd_2.write(1).unwrap(); assert!(epoll .ctl( ControlOperation::Add, event_fd_2.as_raw_fd(), // For this fd, we want an Event instance that has `data` field set to other // value than the value of the fd and `events` without EPOLLIN type set. EpollEvent::new(EventSet::OUT, 10) ) .is_ok()); // For the following eventfd we won't write anything to its counter, so we expect EPOLLIN // event to not be available for this fd, even if we say that we want to monitor this type // of event via EPOLL_CTL_ADD operation. let event_fd_3 = EventFd::new(libc::EFD_NONBLOCK).unwrap(); let event_3 = EpollEvent::new(EventSet::OUT | EventSet::IN, event_fd_3.as_raw_fd() as u64); assert!(epoll .ctl(ControlOperation::Add, event_fd_3.as_raw_fd(), event_3) .is_ok()); // Let's check `epoll_wait()` behavior for our epoll instance. let mut ready_events = vec![EpollEvent::default(); EVENT_BUFFER_SIZE]; let mut ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap(); // We expect to have 3 fds in the ready list of epoll instance. assert_eq!(ev_count, 3); // Let's check also the Event values that are now returned in the ready list. assert_eq!(ready_events[0].data(), event_fd_1.as_raw_fd() as u64); // For this fd, `data` field was populated with random data instead of the // corresponding fd value. assert_eq!(ready_events[1].data(), 10); assert_eq!(ready_events[2].data(), event_fd_3.as_raw_fd() as u64); // EPOLLIN and EPOLLOUT should be available for this fd. assert_eq!( ready_events[0].events(), (EventSet::IN | EventSet::OUT).bits() ); // Only EPOLLOUT is expected because we didn't want to monitor EPOLLIN on this fd. assert_eq!(ready_events[1].events(), EventSet::OUT.bits()); // Only EPOLLOUT too because eventfd counter value is 0 (we didn't write a value // greater than 0 to it). assert_eq!(ready_events[2].events(), EventSet::OUT.bits()); // Now we're gonna modify the Event instance for a fd to test EPOLL_CTL_MOD // behavior. // We create here a new Event with some events, other than those previously set, // that we want to monitor this time on event_fd_1. event_1 = EpollEvent::new(EventSet::OUT, 20); assert!(epoll .ctl(ControlOperation::Modify, event_fd_1.as_raw_fd(), event_1) .is_ok()); let event_fd_4 = EventFd::new(libc::EFD_NONBLOCK).unwrap(); // Can't modify a fd that wasn't added to epoll interest list. assert!(epoll .ctl( ControlOperation::Modify, event_fd_4.as_raw_fd(), EpollEvent::default() ) .is_err()); let _ = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap(); // Let's check that Event fields were indeed changed for the `event_fd_1` fd. assert_eq!(ready_events[0].data(), 20); // EPOLLOUT is now available for this fd as we've intended with EPOLL_CTL_MOD operation. assert_eq!(ready_events[0].events(), EventSet::OUT.bits()); // Now let's set for a fd to not have any events monitored. assert!(epoll .ctl( ControlOperation::Modify, event_fd_1.as_raw_fd(), EpollEvent::default() ) .is_ok()); // In this particular case we expect to remain only with 2 fds in the ready list. ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap(); assert_eq!(ev_count, 2); // Let's also delete a fd from the interest list. assert!(epoll .ctl( ControlOperation::Delete, event_fd_2.as_raw_fd(), EpollEvent::default() ) .is_ok()); // We expect to have only one fd remained in the ready list (event_fd_3). ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap(); assert_eq!(ev_count, 1); assert_eq!(ready_events[0].data(), event_fd_3.as_raw_fd() as u64); assert_eq!(ready_events[0].events(), EventSet::OUT.bits()); // If we try to remove a fd from epoll interest list that wasn't added before it will fail. assert!(epoll .ctl( ControlOperation::Delete, event_fd_4.as_raw_fd(), EpollEvent::default() ) .is_err()); } }