vmm_sys_util/linux/
epoll.rs

1// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: BSD-3-Clause
3
4//! Safe wrappers over the
5//! [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html) API.
6
7use std::io;
8use std::ops::{Deref, Drop};
9use std::os::unix::io::{AsRawFd, RawFd};
10
11#[cfg(any(target_os = "linux", target_os = "android"))]
12use bitflags::bitflags;
13use libc::{
14    epoll_create1, epoll_ctl, epoll_event, epoll_wait, EPOLLERR, EPOLLET, EPOLLEXCLUSIVE, EPOLLHUP,
15    EPOLLIN, EPOLLONESHOT, EPOLLOUT, EPOLLPRI, EPOLLRDHUP, EPOLLWAKEUP, EPOLL_CLOEXEC,
16    EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD,
17};
18
19use crate::syscall::SyscallReturnCode;
20
21/// Wrapper over `EPOLL_CTL_*` operations that can be performed on a file descriptor.
22#[derive(Debug)]
23#[repr(i32)]
24pub enum ControlOperation {
25    /// Add a file descriptor to the interest list.
26    Add = EPOLL_CTL_ADD,
27    /// Change the settings associated with a file descriptor that is
28    /// already in the interest list.
29    Modify = EPOLL_CTL_MOD,
30    /// Remove a file descriptor from the interest list.
31    Delete = EPOLL_CTL_DEL,
32}
33
34bitflags! {
35    /// The type of events we can monitor a file descriptor for.
36    pub struct EventSet: u32 {
37        /// The associated file descriptor is available for read operations.
38        const IN = EPOLLIN as u32;
39        /// The associated file descriptor is available for write operations.
40        const OUT = EPOLLOUT as u32;
41        /// Error condition happened on the associated file descriptor.
42        const ERROR = EPOLLERR as u32;
43        /// This can be used to detect peer shutdown when using Edge Triggered monitoring.
44        const READ_HANG_UP = EPOLLRDHUP as u32;
45        /// Sets the Edge Triggered behavior for the associated file descriptor.
46        /// The default behavior is Level Triggered.
47        const EDGE_TRIGGERED = EPOLLET as u32;
48        /// Hang up happened on the associated file descriptor. Note that `epoll_wait`
49        /// will always wait for this event and it is not necessary to set it in events.
50        const HANG_UP = EPOLLHUP as u32;
51        /// There is an exceptional condition on that file descriptor. It is mostly used to
52        /// set high priority for some data.
53        const PRIORITY = EPOLLPRI as u32;
54        /// The event is considered as being "processed" from the time when it is returned
55        /// by a call to `epoll_wait` until the next call to `epoll_wait` on the same
56        /// epoll file descriptor, the closure of that file descriptor, the removal of the
57        /// event file descriptor via EPOLL_CTL_DEL, or the clearing of EPOLLWAKEUP
58        /// for the event file descriptor via EPOLL_CTL_MOD.
59        const WAKE_UP = EPOLLWAKEUP as u32;
60        /// Sets the one-shot behavior for the associated file descriptor.
61        const ONE_SHOT = EPOLLONESHOT as u32;
62        /// Sets an exclusive wake up mode for the epoll file descriptor that is being
63        /// attached to the associated file descriptor.
64        /// When a wake up event occurs and multiple epoll file descriptors are attached to
65        /// the same target file using this mode, one or more of the epoll file descriptors
66        /// will receive an event with `epoll_wait`. The default here is for all those file
67        /// descriptors to receive an event.
68        const EXCLUSIVE = EPOLLEXCLUSIVE as u32;
69    }
70}
71
72/// Wrapper over
73/// ['libc::epoll_event'](https://doc.rust-lang.org/1.8.0/libc/struct.epoll_event.html).
74// We are using `transparent` here to be super sure that this struct and its fields
75// have the same alignment as those from the `epoll_event` struct from C.
76#[repr(transparent)]
77#[derive(Clone, Copy)]
78pub struct EpollEvent(epoll_event);
79
80impl std::fmt::Debug for EpollEvent {
81    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
82        write!(f, "{{ events: {}, data: {} }}", self.events(), self.data())
83    }
84}
85
86impl Deref for EpollEvent {
87    type Target = epoll_event;
88    fn deref(&self) -> &Self::Target {
89        &self.0
90    }
91}
92
93impl Default for EpollEvent {
94    fn default() -> Self {
95        EpollEvent(epoll_event {
96            events: 0u32,
97            u64: 0u64,
98        })
99    }
100}
101
102impl EpollEvent {
103    /// Create a new epoll_event instance.
104    ///
105    /// # Arguments
106    ///
107    /// `events` - contains an event mask.
108    /// `data` - a user data variable. `data` field can be a fd on which
109    ///          we want to monitor the events specified by `events`.
110    ///
111    /// # Examples
112    ///
113    /// ```
114    /// extern crate vmm_sys_util;
115    /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
116    ///
117    /// let event = EpollEvent::new(EventSet::IN, 2);
118    /// ```
119    pub fn new(events: EventSet, data: u64) -> Self {
120        EpollEvent(epoll_event {
121            events: events.bits(),
122            u64: data,
123        })
124    }
125
126    /// Returns the `events` from
127    /// ['libc::epoll_event'](https://doc.rust-lang.org/1.8.0/libc/struct.epoll_event.html).
128    ///
129    /// # Examples
130    ///
131    /// ```
132    /// extern crate vmm_sys_util;
133    /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
134    ///
135    /// let event = EpollEvent::new(EventSet::IN, 2);
136    /// assert_eq!(event.events(), 1);
137    /// ```
138    pub fn events(&self) -> u32 {
139        self.events
140    }
141
142    /// Returns the `EventSet` corresponding to `epoll_event.events`.
143    ///
144    /// # Panics
145    ///
146    /// Panics if `libc::epoll_event` contains invalid events.
147    ///
148    ///
149    /// # Examples
150    ///
151    /// ```
152    /// extern crate vmm_sys_util;
153    /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
154    ///
155    /// let event = EpollEvent::new(EventSet::IN, 2);
156    /// assert_eq!(event.event_set(), EventSet::IN);
157    /// ```
158    pub fn event_set(&self) -> EventSet {
159        // This unwrap is safe because `epoll_events` can only be user created or
160        // initialized by the kernel. We trust the kernel to only send us valid
161        // events. The user can only initialize `epoll_events` using valid events.
162        EventSet::from_bits(self.events()).unwrap()
163    }
164
165    /// Returns the `data` from the `libc::epoll_event`.
166    ///
167    /// # Examples
168    ///
169    /// ```
170    /// extern crate vmm_sys_util;
171    /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
172    ///
173    /// let event = EpollEvent::new(EventSet::IN, 2);
174    /// assert_eq!(event.data(), 2);
175    /// ```
176    pub fn data(&self) -> u64 {
177        self.u64
178    }
179
180    /// Converts the `libc::epoll_event` data to a RawFd.
181    ///
182    /// This conversion is lossy when the data does not correspond to a RawFd
183    /// (data does not fit in a i32).
184    ///
185    /// # Examples
186    ///
187    /// ```
188    /// extern crate vmm_sys_util;
189    /// use vmm_sys_util::epoll::{EpollEvent, EventSet};
190    ///
191    /// let event = EpollEvent::new(EventSet::IN, 2);
192    /// assert_eq!(event.fd(), 2);
193    /// ```
194    pub fn fd(&self) -> RawFd {
195        self.u64 as i32
196    }
197}
198
199/// Wrapper over epoll functionality.
200#[derive(Debug)]
201pub struct Epoll {
202    epoll_fd: RawFd,
203}
204
205impl Epoll {
206    /// Create a new epoll file descriptor.
207    pub fn new() -> io::Result<Self> {
208        let epoll_fd = SyscallReturnCode(
209            // SAFETY: Safe because the return code is transformed by `into_result` in a `Result`.
210            unsafe { epoll_create1(EPOLL_CLOEXEC) },
211        )
212        .into_result()?;
213        Ok(Epoll { epoll_fd })
214    }
215
216    /// Wrapper for `libc::epoll_ctl`.
217    ///
218    /// This can be used for adding, modifying or removing a file descriptor in the
219    /// interest list of the epoll instance.
220    ///
221    /// # Arguments
222    ///
223    /// * `operation` - refers to the action to be performed on the file descriptor.
224    /// * `fd` - the file descriptor on which we want to perform `operation`.
225    /// * `event` - refers to the `epoll_event` instance that is linked to `fd`.
226    ///
227    /// # Examples
228    ///
229    /// ```
230    /// extern crate vmm_sys_util;
231    ///
232    /// use std::os::unix::io::AsRawFd;
233    /// use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet};
234    /// use vmm_sys_util::eventfd::EventFd;
235    ///
236    /// let epoll = Epoll::new().unwrap();
237    /// let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
238    /// epoll
239    ///     .ctl(
240    ///         ControlOperation::Add,
241    ///         event_fd.as_raw_fd() as i32,
242    ///         EpollEvent::new(EventSet::OUT, event_fd.as_raw_fd() as u64),
243    ///     )
244    ///     .unwrap();
245    /// epoll
246    ///     .ctl(
247    ///         ControlOperation::Modify,
248    ///         event_fd.as_raw_fd() as i32,
249    ///         EpollEvent::new(EventSet::IN, 4),
250    ///     )
251    ///     .unwrap();
252    /// ```
253    pub fn ctl(&self, operation: ControlOperation, fd: RawFd, event: EpollEvent) -> io::Result<()> {
254        SyscallReturnCode(
255            // SAFETY: Safe because we give a valid epoll file descriptor, a valid file descriptor
256            // to watch, as well as a valid epoll_event structure. We also check the return value.
257            unsafe {
258                epoll_ctl(
259                    self.epoll_fd,
260                    operation as i32,
261                    fd,
262                    &event as *const EpollEvent as *mut epoll_event,
263                )
264            },
265        )
266        .into_empty_result()
267    }
268
269    /// Wrapper for `libc::epoll_wait`.
270    /// Returns the number of file descriptors in the interest list that became ready
271    /// for I/O or `errno` if an error occurred.
272    ///
273    /// # Arguments
274    ///
275    /// * `timeout` - specifies for how long the `epoll_wait` system call will block
276    ///               (measured in milliseconds).
277    /// * `events` - points to a memory area that will be used for storing the events
278    ///              returned by `epoll_wait()` call.
279    ///
280    /// # Examples
281    ///
282    /// ```
283    /// extern crate vmm_sys_util;
284    ///
285    /// use std::os::unix::io::AsRawFd;
286    /// use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet};
287    /// use vmm_sys_util::eventfd::EventFd;
288    ///
289    /// let epoll = Epoll::new().unwrap();
290    /// let event_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap();
291    ///
292    /// let mut ready_events = vec![EpollEvent::default(); 10];
293    /// epoll
294    ///     .ctl(
295    ///         ControlOperation::Add,
296    ///         event_fd.as_raw_fd() as i32,
297    ///         EpollEvent::new(EventSet::OUT, 4),
298    ///     )
299    ///     .unwrap();
300    /// let ev_count = epoll.wait(-1, &mut ready_events[..]).unwrap();
301    /// assert_eq!(ev_count, 1);
302    /// ```
303    pub fn wait(&self, timeout: i32, events: &mut [EpollEvent]) -> io::Result<usize> {
304        let events_count = SyscallReturnCode(
305            // SAFETY: Safe because we give a valid epoll file descriptor and an array of
306            // epoll_event structures that will be modified by the kernel to indicate information
307            // about the subset of file descriptors in the interest list.
308            // We also check the return value.
309            unsafe {
310                epoll_wait(
311                    self.epoll_fd,
312                    events.as_mut_ptr() as *mut epoll_event,
313                    events.len() as i32,
314                    timeout,
315                )
316            },
317        )
318        .into_result()? as usize;
319
320        Ok(events_count)
321    }
322}
323
324impl AsRawFd for Epoll {
325    fn as_raw_fd(&self) -> RawFd {
326        self.epoll_fd
327    }
328}
329
330impl Drop for Epoll {
331    fn drop(&mut self) {
332        // SAFETY: Safe because this fd is opened with `epoll_create` and we trust
333        // the kernel to give us a valid fd.
334        unsafe {
335            libc::close(self.epoll_fd);
336        }
337    }
338}
339
340#[cfg(test)]
341mod tests {
342    use super::*;
343
344    use crate::eventfd::EventFd;
345
346    #[test]
347    fn test_event_ops() {
348        let mut event = EpollEvent::default();
349        assert_eq!(event.events(), 0);
350        assert_eq!(event.data(), 0);
351
352        event = EpollEvent::new(EventSet::IN, 2);
353        assert_eq!(event.events(), 1);
354        assert_eq!(event.event_set(), EventSet::IN);
355
356        assert_eq!(event.data(), 2);
357        assert_eq!(event.fd(), 2);
358    }
359
360    #[test]
361    fn test_events_debug() {
362        let events = EpollEvent::new(EventSet::IN, 42);
363        assert_eq!(format!("{:?}", events), "{ events: 1, data: 42 }")
364    }
365
366    #[test]
367    fn test_epoll() {
368        const DEFAULT__TIMEOUT: i32 = 250;
369        const EVENT_BUFFER_SIZE: usize = 128;
370
371        let epoll = Epoll::new().unwrap();
372        assert_eq!(epoll.epoll_fd, epoll.as_raw_fd());
373
374        // Let's test different scenarios for `epoll_ctl()` and `epoll_wait()` functionality.
375
376        let event_fd_1 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
377        // For EPOLLOUT to be available it is enough only to be possible to write a value of
378        // at least 1 to the eventfd counter without blocking.
379        // If we write a value greater than 0 to this counter, the fd will be available for
380        // EPOLLIN events too.
381        event_fd_1.write(1).unwrap();
382
383        let mut event_1 =
384            EpollEvent::new(EventSet::IN | EventSet::OUT, event_fd_1.as_raw_fd() as u64);
385
386        // For EPOLL_CTL_ADD behavior we will try to add some fds with different event masks into
387        // the interest list of epoll instance.
388        assert!(epoll
389            .ctl(ControlOperation::Add, event_fd_1.as_raw_fd(), event_1)
390            .is_ok());
391
392        // We can't add twice the same fd to epoll interest list.
393        assert!(epoll
394            .ctl(ControlOperation::Add, event_fd_1.as_raw_fd(), event_1)
395            .is_err());
396
397        let event_fd_2 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
398        event_fd_2.write(1).unwrap();
399        assert!(epoll
400            .ctl(
401                ControlOperation::Add,
402                event_fd_2.as_raw_fd(),
403                // For this fd, we want an Event instance that has `data` field set to other
404                // value than the value of the fd and `events` without EPOLLIN type set.
405                EpollEvent::new(EventSet::OUT, 10)
406            )
407            .is_ok());
408
409        // For the following eventfd we won't write anything to its counter, so we expect EPOLLIN
410        // event to not be available for this fd, even if we say that we want to monitor this type
411        // of event via EPOLL_CTL_ADD operation.
412        let event_fd_3 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
413        let event_3 = EpollEvent::new(EventSet::OUT | EventSet::IN, event_fd_3.as_raw_fd() as u64);
414        assert!(epoll
415            .ctl(ControlOperation::Add, event_fd_3.as_raw_fd(), event_3)
416            .is_ok());
417
418        // Let's check `epoll_wait()` behavior for our epoll instance.
419        let mut ready_events = vec![EpollEvent::default(); EVENT_BUFFER_SIZE];
420        let mut ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
421
422        // We expect to have 3 fds in the ready list of epoll instance.
423        assert_eq!(ev_count, 3);
424
425        // Let's check also the Event values that are now returned in the ready list.
426        assert_eq!(ready_events[0].data(), event_fd_1.as_raw_fd() as u64);
427        // For this fd, `data` field was populated with random data instead of the
428        // corresponding fd value.
429        assert_eq!(ready_events[1].data(), 10);
430        assert_eq!(ready_events[2].data(), event_fd_3.as_raw_fd() as u64);
431
432        // EPOLLIN and EPOLLOUT should be available for this fd.
433        assert_eq!(
434            ready_events[0].events(),
435            (EventSet::IN | EventSet::OUT).bits()
436        );
437        // Only EPOLLOUT is expected because we didn't want to monitor EPOLLIN on this fd.
438        assert_eq!(ready_events[1].events(), EventSet::OUT.bits());
439        // Only EPOLLOUT too because eventfd counter value is 0 (we didn't write a value
440        // greater than 0 to it).
441        assert_eq!(ready_events[2].events(), EventSet::OUT.bits());
442
443        // Now we're gonna modify the Event instance for a fd to test EPOLL_CTL_MOD
444        // behavior.
445        // We create here a new Event with some events, other than those previously set,
446        // that we want to monitor this time on event_fd_1.
447        event_1 = EpollEvent::new(EventSet::OUT, 20);
448        assert!(epoll
449            .ctl(ControlOperation::Modify, event_fd_1.as_raw_fd(), event_1)
450            .is_ok());
451
452        let event_fd_4 = EventFd::new(libc::EFD_NONBLOCK).unwrap();
453        // Can't modify a fd that wasn't added to epoll interest list.
454        assert!(epoll
455            .ctl(
456                ControlOperation::Modify,
457                event_fd_4.as_raw_fd(),
458                EpollEvent::default()
459            )
460            .is_err());
461
462        let _ = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
463
464        // Let's check that Event fields were indeed changed for the `event_fd_1` fd.
465        assert_eq!(ready_events[0].data(), 20);
466        // EPOLLOUT is now available for this fd as we've intended with EPOLL_CTL_MOD operation.
467        assert_eq!(ready_events[0].events(), EventSet::OUT.bits());
468
469        // Now let's set for a fd to not have any events monitored.
470        assert!(epoll
471            .ctl(
472                ControlOperation::Modify,
473                event_fd_1.as_raw_fd(),
474                EpollEvent::default()
475            )
476            .is_ok());
477
478        // In this particular case we expect to remain only with 2 fds in the ready list.
479        ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
480        assert_eq!(ev_count, 2);
481
482        // Let's also delete a fd from the interest list.
483        assert!(epoll
484            .ctl(
485                ControlOperation::Delete,
486                event_fd_2.as_raw_fd(),
487                EpollEvent::default()
488            )
489            .is_ok());
490
491        // We expect to have only one fd remained in the ready list (event_fd_3).
492        ev_count = epoll.wait(DEFAULT__TIMEOUT, &mut ready_events[..]).unwrap();
493
494        assert_eq!(ev_count, 1);
495        assert_eq!(ready_events[0].data(), event_fd_3.as_raw_fd() as u64);
496        assert_eq!(ready_events[0].events(), EventSet::OUT.bits());
497
498        // If we try to remove a fd from epoll interest list that wasn't added before it will fail.
499        assert!(epoll
500            .ctl(
501                ControlOperation::Delete,
502                event_fd_4.as_raw_fd(),
503                EpollEvent::default()
504            )
505            .is_err());
506    }
507}