vmm_sys_util/linux/
poll.rs

1// Copyright 2019 Intel Corporation. All Rights Reserved.
2//
3// Copyright 2017 The Chromium OS Authors. All rights reserved.
4//
5// SPDX-License-Identifier: BSD-3-Clause
6
7//! Traits and structures for working with
8//! [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html)
9
10use std::cell::{Cell, Ref, RefCell};
11use std::cmp::min;
12use std::fs::File;
13use std::i32;
14use std::i64;
15use std::io::{stderr, Cursor, Write};
16use std::marker::PhantomData;
17use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd};
18use std::ptr::null_mut;
19use std::slice;
20use std::thread;
21use std::time::Duration;
22
23use libc::{
24    c_int, epoll_create1, epoll_ctl, epoll_event, epoll_wait, EINTR, EPOLLERR, EPOLLHUP, EPOLLIN,
25    EPOLLOUT, EPOLL_CLOEXEC, EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD,
26};
27
28use crate::errno::{errno_result, Error, Result};
29
30macro_rules! handle_eintr_errno {
31    ($x:expr) => {{
32        let mut res;
33        loop {
34            res = $x;
35            if res != -1 || Error::last() != Error::new(EINTR) {
36                break;
37            }
38        }
39        res
40    }};
41}
42
43const POLL_CONTEXT_MAX_EVENTS: usize = 16;
44
45/// A wrapper of raw `libc::epoll_event`.
46///
47/// This should only be used with [`EpollContext`](struct.EpollContext.html).
48pub struct EpollEvents(RefCell<[epoll_event; POLL_CONTEXT_MAX_EVENTS]>);
49
50impl std::fmt::Debug for EpollEvents {
51    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
52        write!(f, "EpollEvents {{ ... }}")
53    }
54}
55
56impl EpollEvents {
57    /// Creates a new EpollEvents.
58    pub fn new() -> EpollEvents {
59        EpollEvents(RefCell::new(
60            [epoll_event { events: 0, u64: 0 }; POLL_CONTEXT_MAX_EVENTS],
61        ))
62    }
63}
64
65impl Default for EpollEvents {
66    fn default() -> Self {
67        Self::new()
68    }
69}
70
71/// Trait for a token that can be associated with an `fd` in a [`PollContext`](struct.PollContext.html).
72///
73/// Simple enums that have no or primitive variant data can use the `#[derive(PollToken)]`
74/// custom derive to implement this trait.
75pub trait PollToken {
76    /// Converts this token into a u64 that can be turned back into a token via `from_raw_token`.
77    fn as_raw_token(&self) -> u64;
78
79    /// Converts a raw token as returned from `as_raw_token` back into a token.
80    ///
81    /// It is invalid to give a raw token that was not returned via `as_raw_token` from the same
82    /// `Self`. The implementation can expect that this will never happen as a result of its usage
83    /// in `PollContext`.
84    fn from_raw_token(data: u64) -> Self;
85}
86
87impl PollToken for usize {
88    fn as_raw_token(&self) -> u64 {
89        *self as u64
90    }
91
92    fn from_raw_token(data: u64) -> Self {
93        data as Self
94    }
95}
96
97impl PollToken for u64 {
98    fn as_raw_token(&self) -> u64 {
99        *self
100    }
101
102    fn from_raw_token(data: u64) -> Self {
103        data as Self
104    }
105}
106
107impl PollToken for u32 {
108    fn as_raw_token(&self) -> u64 {
109        u64::from(*self)
110    }
111
112    fn from_raw_token(data: u64) -> Self {
113        data as Self
114    }
115}
116
117impl PollToken for u16 {
118    fn as_raw_token(&self) -> u64 {
119        u64::from(*self)
120    }
121
122    fn from_raw_token(data: u64) -> Self {
123        data as Self
124    }
125}
126
127impl PollToken for u8 {
128    fn as_raw_token(&self) -> u64 {
129        u64::from(*self)
130    }
131
132    fn from_raw_token(data: u64) -> Self {
133        data as Self
134    }
135}
136
137impl PollToken for () {
138    fn as_raw_token(&self) -> u64 {
139        0
140    }
141
142    fn from_raw_token(_data: u64) -> Self {}
143}
144
145/// An event returned by [`PollContext::wait`](struct.PollContext.html#method.wait).
146pub struct PollEvent<'a, T> {
147    event: &'a epoll_event,
148    token: PhantomData<T>, // Needed to satisfy usage of T
149}
150
151impl<T> std::fmt::Debug for PollEvent<'_, T> {
152    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
153        f.debug_struct("PollEvent")
154            .field("event", &"?")
155            .field("token", &self.token)
156            .finish()
157    }
158}
159
160impl<'a, T: PollToken> PollEvent<'a, T> {
161    /// Gets the token associated in
162    /// [`PollContext::add`](struct.PollContext.html#method.add) with this event.
163    pub fn token(&self) -> T {
164        T::from_raw_token(self.event.u64)
165    }
166
167    /// Get the raw events returned by the kernel.
168    pub fn raw_events(&self) -> u32 {
169        self.event.events
170    }
171
172    /// Checks if the event is readable.
173    ///
174    /// True if the `fd` associated with this token in
175    /// [`PollContext::add`](struct.PollContext.html#method.add) is readable.
176    pub fn readable(&self) -> bool {
177        self.event.events & (EPOLLIN as u32) != 0
178    }
179
180    /// Checks if the event is writable.
181    ///
182    /// True if the `fd` associated with this token in
183    /// [`PollContext::add`](struct.PollContext.html#method.add) is writable.
184    pub fn writable(&self) -> bool {
185        self.event.events & (EPOLLOUT as u32) != 0
186    }
187
188    /// Checks if the event has been hangup on.
189    ///
190    /// True if the `fd` associated with this token in
191    /// [`PollContext::add`](struct.PollContext.html#method.add) has been hungup on.
192    pub fn hungup(&self) -> bool {
193        self.event.events & (EPOLLHUP as u32) != 0
194    }
195
196    /// Checks if the event has associated error conditions.
197    ///
198    /// True if the `fd` associated with this token in
199    /// [`PollContext::add`](struct.PollContext.html#method.add) has associated error conditions.
200    pub fn has_error(&self) -> bool {
201        self.event.events & (EPOLLERR as u32) != 0
202    }
203}
204
205/// An iterator over a subset of events returned by
206/// [`PollContext::wait`](struct.PollContext.html#method.wait).
207#[derive(Debug)]
208pub struct PollEventIter<'a, I, T>
209where
210    I: Iterator<Item = &'a epoll_event>,
211{
212    mask: u32,
213    iter: I,
214    tokens: PhantomData<[T]>, // Needed to satisfy usage of T
215}
216
217impl<'a, I, T> Iterator for PollEventIter<'a, I, T>
218where
219    I: Iterator<Item = &'a epoll_event>,
220    T: PollToken,
221{
222    type Item = PollEvent<'a, T>;
223    fn next(&mut self) -> Option<Self::Item> {
224        let mask = self.mask;
225        self.iter
226            .find(|event| (event.events & mask) != 0)
227            .map(|event| PollEvent {
228                event,
229                token: PhantomData,
230            })
231    }
232}
233
234/// The list of events returned by [`PollContext::wait`](struct.PollContext.html#method.wait).
235pub struct PollEvents<'a, T> {
236    count: usize,
237    events: Ref<'a, [epoll_event; POLL_CONTEXT_MAX_EVENTS]>,
238    tokens: PhantomData<[T]>, // Needed to satisfy usage of T
239}
240
241impl<T> std::fmt::Debug for PollEvents<'_, T> {
242    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
243        f.debug_struct("PollEventsOwned")
244            .field("count", &self.count)
245            .field("events", &"?")
246            .field("tokens", &self.tokens)
247            .finish()
248    }
249}
250
251impl<'a, T: PollToken> PollEvents<'a, T> {
252    /// Creates owned structure from borrowed [`PollEvents`](struct.PollEvents.html).
253    ///
254    /// Copies the events to an owned structure so the reference to this (and by extension
255    /// [`PollContext`](struct.PollContext.html)) can be dropped.
256    pub fn to_owned(&self) -> PollEventsOwned<T> {
257        PollEventsOwned {
258            count: self.count,
259            events: RefCell::new(*self.events),
260            tokens: PhantomData,
261        }
262    }
263
264    /// Iterates over each event.
265    pub fn iter(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T> {
266        PollEventIter {
267            mask: 0xffff_ffff,
268            iter: self.events[..self.count].iter(),
269            tokens: PhantomData,
270        }
271    }
272
273    /// Iterates over each readable event.
274    pub fn iter_readable(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T> {
275        PollEventIter {
276            mask: EPOLLIN as u32,
277            iter: self.events[..self.count].iter(),
278            tokens: PhantomData,
279        }
280    }
281
282    /// Iterates over each hungup event.
283    pub fn iter_hungup(&self) -> PollEventIter<'_, slice::Iter<'_, epoll_event>, T> {
284        PollEventIter {
285            mask: EPOLLHUP as u32,
286            iter: self.events[..self.count].iter(),
287            tokens: PhantomData,
288        }
289    }
290}
291
292/// A deep copy of the event records from [`PollEvents`](struct.PollEvents.html).
293pub struct PollEventsOwned<T> {
294    count: usize,
295    events: RefCell<[epoll_event; POLL_CONTEXT_MAX_EVENTS]>,
296    tokens: PhantomData<T>, // Needed to satisfy usage of T
297}
298
299impl<T> std::fmt::Debug for PollEventsOwned<T> {
300    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
301        f.debug_struct("PollEventsOwned")
302            .field("count", &self.count)
303            .field("events", &"?")
304            .field("tokens", &self.tokens)
305            .finish()
306    }
307}
308
309impl<T: PollToken> PollEventsOwned<T> {
310    /// Creates borrowed structure from owned structure
311    /// [`PollEventsOwned`](struct.PollEventsOwned.html).
312    ///
313    /// Takes a reference to the events so it can be iterated via methods in
314    /// [`PollEvents`](struct.PollEvents.html).
315    pub fn as_ref(&self) -> PollEvents<'_, T> {
316        PollEvents {
317            count: self.count,
318            events: self.events.borrow(),
319            tokens: PhantomData,
320        }
321    }
322}
323
324/// Watching events taken by [`PollContext`](struct.PollContext.html).
325#[derive(Debug, Copy, Clone)]
326pub struct WatchingEvents(u32);
327
328impl WatchingEvents {
329    /// Returns empty `WatchingEvents`.
330    #[inline(always)]
331    pub fn empty() -> WatchingEvents {
332        WatchingEvents(0)
333    }
334
335    /// Creates a new `WatchingEvents` with a specified value.
336    ///
337    /// Builds `WatchingEvents` from raw `epoll_event`.
338    ///
339    /// # Arguments
340    ///
341    /// * `raw`: the events to be created for watching.
342    #[inline(always)]
343    pub fn new(raw: u32) -> WatchingEvents {
344        WatchingEvents(raw)
345    }
346
347    /// Sets read events.
348    ///
349    /// Sets the events to be readable.
350    #[inline(always)]
351    pub fn set_read(self) -> WatchingEvents {
352        WatchingEvents(self.0 | EPOLLIN as u32)
353    }
354
355    /// Sets write events.
356    ///
357    /// Sets the events to be writable.
358    #[inline(always)]
359    pub fn set_write(self) -> WatchingEvents {
360        WatchingEvents(self.0 | EPOLLOUT as u32)
361    }
362
363    /// Gets the underlying epoll events.
364    pub fn get_raw(&self) -> u32 {
365        self.0
366    }
367}
368
369/// A wrapper of linux [`epoll`](http://man7.org/linux/man-pages/man7/epoll.7.html).
370///
371/// It provides similar interface to [`PollContext`](struct.PollContext.html).
372/// It is thread safe while PollContext is not. It requires user to pass in a reference of
373/// EpollEvents while PollContext does not. Always use PollContext if you don't need to access the
374/// same epoll from different threads.
375///
376/// # Examples
377///
378/// ```
379/// extern crate vmm_sys_util;
380/// use vmm_sys_util::eventfd::EventFd;
381/// use vmm_sys_util::poll::{EpollContext, EpollEvents};
382///
383/// let evt = EventFd::new(0).unwrap();
384/// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
385/// let events = EpollEvents::new();
386///
387/// evt.write(1).unwrap();
388/// ctx.add(&evt, 1).unwrap();
389///
390/// for event in ctx.wait(&events).unwrap().iter_readable() {
391///     assert_eq!(event.token(), 1);
392/// }
393/// ```
394#[derive(Debug)]
395pub struct EpollContext<T> {
396    epoll_ctx: File,
397    // Needed to satisfy usage of T
398    tokens: PhantomData<[T]>,
399}
400
401impl<T: PollToken> EpollContext<T> {
402    /// Creates a new `EpollContext`.
403    ///
404    /// Uses [`epoll_create1`](http://man7.org/linux/man-pages/man2/epoll_create.2.html)
405    /// to create a new epoll fd.
406    ///
407    /// # Examples
408    ///
409    /// ```
410    /// extern crate vmm_sys_util;
411    /// use vmm_sys_util::poll::EpollContext;
412    ///
413    /// let ctx: EpollContext<usize> = EpollContext::new().unwrap();
414    /// ```
415    pub fn new() -> Result<EpollContext<T>> {
416        // SAFETY: Safe because we check the return value.
417        let epoll_fd = unsafe { epoll_create1(EPOLL_CLOEXEC) };
418        if epoll_fd < 0 {
419            return errno_result();
420        }
421        Ok(EpollContext {
422            // SAFETY: Safe because we verified that the FD is valid and we trust `epoll_create1`.
423            epoll_ctx: unsafe { File::from_raw_fd(epoll_fd) },
424            tokens: PhantomData,
425        })
426    }
427
428    /// Adds the given `fd` to this context and associates the given
429    /// `token` with the `fd`'s readable events.
430    ///
431    /// A `fd` can only be added once and does not need to be kept open.
432    /// If the `fd` is dropped and there were no duplicated file descriptors
433    /// (i.e. adding the same descriptor with a different FD number) added
434    /// to this context, events will not be reported by `wait` anymore.
435    ///
436    /// # Arguments
437    ///
438    /// * `fd`: the target file descriptor to be added.
439    /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
440    ///
441    /// # Examples
442    ///
443    /// ```
444    /// extern crate vmm_sys_util;
445    /// use vmm_sys_util::eventfd::EventFd;
446    /// use vmm_sys_util::poll::EpollContext;
447    ///
448    /// let evt = EventFd::new(0).unwrap();
449    /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
450    /// ctx.add(&evt, 1).unwrap();
451    /// ```
452    pub fn add(&self, fd: &dyn AsRawFd, token: T) -> Result<()> {
453        self.add_fd_with_events(fd, WatchingEvents::empty().set_read(), token)
454    }
455
456    /// Adds the given `fd` to this context, watching for the specified `events`
457    /// and associates the given 'token' with those events.
458    ///
459    /// A `fd` can only be added once and does not need to be kept open. If the `fd`
460    /// is dropped and there were no duplicated file descriptors (i.e. adding the same
461    /// descriptor with a different FD number) added to this context, events will
462    /// not be reported by `wait` anymore.
463    ///
464    /// # Arguments
465    ///
466    /// * `fd`: the target file descriptor to be added.
467    /// * `events`: specifies the events to be watched.
468    /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
469    ///
470    /// # Examples
471    ///
472    /// ```
473    /// extern crate vmm_sys_util;
474    /// use vmm_sys_util::eventfd::EventFd;
475    /// use vmm_sys_util::poll::{EpollContext, WatchingEvents};
476    ///
477    /// let evt = EventFd::new(0).unwrap();
478    /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
479    /// ctx.add_fd_with_events(&evt, WatchingEvents::empty().set_read(), 1)
480    ///     .unwrap();
481    /// ```
482    pub fn add_fd_with_events(
483        &self,
484        fd: &dyn AsRawFd,
485        events: WatchingEvents,
486        token: T,
487    ) -> Result<()> {
488        let mut evt = epoll_event {
489            events: events.get_raw(),
490            u64: token.as_raw_token(),
491        };
492        // SAFETY: Safe because we give a valid epoll FD and FD to watch, as well as a
493        // valid epoll_event structure. Then we check the return value.
494        let ret = unsafe {
495            epoll_ctl(
496                self.epoll_ctx.as_raw_fd(),
497                EPOLL_CTL_ADD,
498                fd.as_raw_fd(),
499                &mut evt,
500            )
501        };
502        if ret < 0 {
503            return errno_result();
504        };
505        Ok(())
506    }
507
508    /// Changes the setting associated with the given `fd` in this context.
509    ///
510    /// If `fd` was previously added to this context, the watched events will be replaced with
511    /// `events` and the token associated with it will be replaced with the given `token`.
512    ///
513    /// # Arguments
514    ///
515    /// * `fd`: the target file descriptor to be performed.
516    /// * `events`: specifies the events to be watched.
517    /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
518    ///
519    /// # Examples
520    ///
521    /// ```
522    /// extern crate vmm_sys_util;
523    /// use vmm_sys_util::eventfd::EventFd;
524    /// use vmm_sys_util::poll::{EpollContext, WatchingEvents};
525    ///
526    /// let evt = EventFd::new(0).unwrap();
527    /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
528    /// ctx.add_fd_with_events(&evt, WatchingEvents::empty().set_read(), 1)
529    ///     .unwrap();
530    /// ctx.modify(&evt, WatchingEvents::empty().set_write(), 2)
531    ///     .unwrap();
532    /// ```
533    pub fn modify(&self, fd: &dyn AsRawFd, events: WatchingEvents, token: T) -> Result<()> {
534        let mut evt = epoll_event {
535            events: events.0,
536            u64: token.as_raw_token(),
537        };
538        // SAFETY: Safe because we give a valid epoll FD and FD to modify, as well as a valid
539        // epoll_event structure. Then we check the return value.
540        let ret = unsafe {
541            epoll_ctl(
542                self.epoll_ctx.as_raw_fd(),
543                EPOLL_CTL_MOD,
544                fd.as_raw_fd(),
545                &mut evt,
546            )
547        };
548        if ret < 0 {
549            return errno_result();
550        };
551        Ok(())
552    }
553
554    /// Deletes the given `fd` from this context.
555    ///
556    /// If an `fd`'s token shows up in the list of hangup events, it should be removed using this
557    /// method or by closing/dropping (if and only if the fd was never dup()'d/fork()'d) the `fd`.
558    /// Failure to do so will cause the `wait` method to always return immediately, causing ~100%
559    /// CPU load.
560    ///
561    /// # Arguments
562    ///
563    /// * `fd`: the target file descriptor to be removed.
564    ///
565    /// # Examples
566    ///
567    /// ```
568    /// extern crate vmm_sys_util;
569    /// use vmm_sys_util::eventfd::EventFd;
570    /// use vmm_sys_util::poll::EpollContext;
571    ///
572    /// let evt = EventFd::new(0).unwrap();
573    /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
574    /// ctx.add(&evt, 1).unwrap();
575    /// ctx.delete(&evt).unwrap();
576    /// ```
577    pub fn delete(&self, fd: &dyn AsRawFd) -> Result<()> {
578        // SAFETY: Safe because we give a valid epoll FD and FD to stop watching. Then we check
579        // the return value.
580        let ret = unsafe {
581            epoll_ctl(
582                self.epoll_ctx.as_raw_fd(),
583                EPOLL_CTL_DEL,
584                fd.as_raw_fd(),
585                null_mut(),
586            )
587        };
588        if ret < 0 {
589            return errno_result();
590        };
591        Ok(())
592    }
593
594    /// Waits for any events to occur in FDs that were previously added to this context.
595    ///
596    /// The events are level-triggered, meaning that if any events are unhandled (i.e. not reading
597    /// for readable events and not closing for hungup events), subsequent calls to `wait` will
598    /// return immediately. The consequence of not handling an event perpetually while calling
599    /// `wait` is that the callers loop will degenerated to busy loop polling, pinning a CPU to
600    /// ~100% usage.
601    ///
602    /// # Arguments
603    ///
604    /// * `events`: the events to wait for.
605    ///
606    /// # Examples
607    ///
608    /// ```
609    /// extern crate vmm_sys_util;
610    /// use vmm_sys_util::eventfd::EventFd;
611    /// use vmm_sys_util::poll::{EpollContext, EpollEvents};
612    ///
613    /// let evt = EventFd::new(0).unwrap();
614    /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
615    /// let events = EpollEvents::new();
616    ///
617    /// evt.write(1).unwrap();
618    /// ctx.add(&evt, 1).unwrap();
619    ///
620    /// for event in ctx.wait(&events).unwrap().iter_readable() {
621    ///     assert_eq!(event.token(), 1);
622    /// }
623    /// ```
624    pub fn wait<'a>(&self, events: &'a EpollEvents) -> Result<PollEvents<'a, T>> {
625        self.wait_timeout(events, Duration::new(i64::MAX as u64, 0))
626    }
627
628    /// Like [`wait`](struct.EpollContext.html#method.wait) except will only block for a
629    /// maximum of the given `timeout`.
630    ///
631    /// This may return earlier than `timeout` with zero events if the duration indicated exceeds
632    /// system limits.
633    ///
634    /// # Arguments
635    ///
636    /// * `events`: the events to wait for.
637    /// * `timeout`: specifies the timeout that will block.
638    ///
639    /// # Examples
640    ///
641    /// ```
642    /// extern crate vmm_sys_util;
643    /// # use std::time::Duration;
644    /// use vmm_sys_util::eventfd::EventFd;
645    /// use vmm_sys_util::poll::{EpollContext, EpollEvents};
646    ///
647    /// let evt = EventFd::new(0).unwrap();
648    /// let ctx: EpollContext<u32> = EpollContext::new().unwrap();
649    /// let events = EpollEvents::new();
650    ///
651    /// evt.write(1).unwrap();
652    /// ctx.add(&evt, 1).unwrap();
653    /// for event in ctx
654    ///     .wait_timeout(&events, Duration::new(100, 0))
655    ///     .unwrap()
656    ///     .iter_readable()
657    /// {
658    ///     assert_eq!(event.token(), 1);
659    /// }
660    /// ```
661    pub fn wait_timeout<'a>(
662        &self,
663        events: &'a EpollEvents,
664        timeout: Duration,
665    ) -> Result<PollEvents<'a, T>> {
666        let timeout_millis = if timeout.as_secs() as i64 == i64::max_value() {
667            // We make the convenient assumption that 2^63 seconds is an effectively unbounded time
668            // frame. This is meant to mesh with `wait` calling us with no timeout.
669            -1
670        } else {
671            // In cases where we the number of milliseconds would overflow an i32, we substitute the
672            // maximum timeout which is ~24.8 days.
673            let millis = timeout
674                .as_secs()
675                .checked_mul(1_000)
676                .and_then(|ms| ms.checked_add(u64::from(timeout.subsec_nanos()) / 1_000_000))
677                .unwrap_or(i32::max_value() as u64);
678            min(i32::max_value() as u64, millis) as i32
679        };
680        let ret = {
681            let mut epoll_events = events.0.borrow_mut();
682            let max_events = epoll_events.len() as c_int;
683            // SAFETY: Safe because we give an epoll context and a properly sized epoll_events
684            // array pointer, which we trust the kernel to fill in properly.
685            unsafe {
686                handle_eintr_errno!(epoll_wait(
687                    self.epoll_ctx.as_raw_fd(),
688                    &mut epoll_events[0],
689                    max_events,
690                    timeout_millis
691                ))
692            }
693        };
694        if ret < 0 {
695            return errno_result();
696        }
697        let epoll_events = events.0.borrow();
698        let events = PollEvents {
699            count: ret as usize,
700            events: epoll_events,
701            tokens: PhantomData,
702        };
703        Ok(events)
704    }
705}
706
707impl<T: PollToken> AsRawFd for EpollContext<T> {
708    fn as_raw_fd(&self) -> RawFd {
709        self.epoll_ctx.as_raw_fd()
710    }
711}
712
713impl<T: PollToken> IntoRawFd for EpollContext<T> {
714    fn into_raw_fd(self) -> RawFd {
715        self.epoll_ctx.into_raw_fd()
716    }
717}
718
719/// Used to poll multiple objects that have file descriptors.
720///
721/// # Example
722///
723/// ```
724/// # use vmm_sys_util::errno::Result;
725/// # use vmm_sys_util::eventfd::EventFd;
726/// # use vmm_sys_util::poll::{PollContext, PollEvents};
727/// let evt1 = EventFd::new(0).unwrap();
728/// let evt2 = EventFd::new(0).unwrap();
729/// evt2.write(1).unwrap();
730///
731/// let ctx: PollContext<u32> = PollContext::new().unwrap();
732/// ctx.add(&evt1, 1).unwrap();
733/// ctx.add(&evt2, 2).unwrap();
734///
735/// let pollevents: PollEvents<u32> = ctx.wait().unwrap();
736/// let tokens: Vec<u32> = pollevents.iter_readable().map(|e| e.token()).collect();
737/// assert_eq!(&tokens[..], &[2]);
738/// ```
739#[derive(Debug)]
740pub struct PollContext<T> {
741    epoll_ctx: EpollContext<T>,
742
743    // We use a RefCell here so that the `wait` method only requires an immutable self reference
744    // while returning the events (encapsulated by PollEvents). Without the RefCell, `wait` would
745    // hold a mutable reference that lives as long as its returned reference (i.e. the PollEvents),
746    // even though that reference is immutable. This is terribly inconvenient for the caller because
747    // the borrow checking would prevent them from using `delete` and `add` while the events are in
748    // scope.
749    events: EpollEvents,
750
751    // Hangup busy loop detection variables. See `check_for_hungup_busy_loop`.
752    check_for_hangup: bool,
753    hangups: Cell<usize>,
754    max_hangups: Cell<usize>,
755}
756
757impl<T: PollToken> PollContext<T> {
758    /// Creates a new `PollContext`.
759    pub fn new() -> Result<PollContext<T>> {
760        Ok(PollContext {
761            epoll_ctx: EpollContext::new()?,
762            events: EpollEvents::new(),
763            check_for_hangup: true,
764            hangups: Cell::new(0),
765            max_hangups: Cell::new(0),
766        })
767    }
768
769    /// Enable/disable of checking for unhandled hangup events.
770    pub fn set_check_for_hangup(&mut self, enable: bool) {
771        self.check_for_hangup = enable;
772    }
773
774    /// Adds the given `fd` to this context and associates the given `token` with the `fd`'s
775    /// readable events.
776    ///
777    /// A `fd` can only be added once and does not need to be kept open. If the `fd` is dropped and
778    /// there were no duplicated file descriptors (i.e. adding the same descriptor with a different
779    /// FD number) added to this context, events will not be reported by `wait` anymore.
780    ///
781    /// # Arguments
782    ///
783    /// * `fd`: the target file descriptor to be added.
784    /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
785    pub fn add(&self, fd: &dyn AsRawFd, token: T) -> Result<()> {
786        self.add_fd_with_events(fd, WatchingEvents::empty().set_read(), token)
787    }
788
789    /// Adds the given `fd` to this context, watching for the specified events and associates the
790    /// given 'token' with those events.
791    ///
792    /// A `fd` can only be added once and does not need to be kept open. If the `fd` is dropped and
793    /// there were no duplicated file descriptors (i.e. adding the same descriptor with a different
794    /// FD number) added to this context, events will not be reported by `wait` anymore.
795    ///
796    /// # Arguments
797    ///
798    /// * `fd`: the target file descriptor to be added.
799    /// * `events`: specifies the events to be watched.
800    /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
801    pub fn add_fd_with_events(
802        &self,
803        fd: &dyn AsRawFd,
804        events: WatchingEvents,
805        token: T,
806    ) -> Result<()> {
807        self.epoll_ctx.add_fd_with_events(fd, events, token)?;
808        self.hangups.set(0);
809        self.max_hangups.set(self.max_hangups.get() + 1);
810        Ok(())
811    }
812
813    /// Changes the setting associated with the given `fd` in this context.
814    ///
815    /// If `fd` was previously added to this context, the watched events will be replaced with
816    /// `events` and the token associated with it will be replaced with the given `token`.
817    ///
818    /// # Arguments
819    ///
820    /// * `fd`: the target file descriptor to be modified.
821    /// * `events`: specifies the events to be watched.
822    /// * `token`: a `PollToken` implementation, used to be as u64 of `libc::epoll_event` structure.
823    pub fn modify(&self, fd: &dyn AsRawFd, events: WatchingEvents, token: T) -> Result<()> {
824        self.epoll_ctx.modify(fd, events, token)
825    }
826
827    /// Deletes the given `fd` from this context.
828    ///
829    /// If an `fd`'s token shows up in the list of hangup events, it should be removed using this
830    /// method or by closing/dropping (if and only if the fd was never dup()'d/fork()'d) the `fd`.
831    /// Failure to do so will cause the `wait` method to always return immediately, causing ~100%
832    /// CPU load.
833    ///
834    /// # Arguments
835    ///
836    /// * `fd`: the target file descriptor to be removed.
837    pub fn delete(&self, fd: &dyn AsRawFd) -> Result<()> {
838        self.epoll_ctx.delete(fd)?;
839        self.hangups.set(0);
840        self.max_hangups.set(self.max_hangups.get() - 1);
841        Ok(())
842    }
843
844    // This method determines if the the user of wait is misusing the `PollContext` by leaving FDs
845    // in this `PollContext` that have been shutdown or hungup on. Such an FD will cause `wait` to
846    // return instantly with a hungup event. If that FD is perpetually left in this context, a busy
847    // loop burning ~100% of one CPU will silently occur with no human visible malfunction.
848    //
849    // How do we know if the client of this context is ignoring hangups? A naive implementation
850    // would trigger if consecutive wait calls yield hangup events, but there are legitimate cases
851    // for this, such as two distinct sockets becoming hungup across two consecutive wait calls. A
852    // smarter implementation would only trigger if `delete` wasn't called between waits that
853    // yielded hangups. Sadly `delete` isn't the only way to remove an FD from this context. The
854    // other way is for the client to close the hungup FD, which automatically removes it from this
855    // context. Assuming that the client always uses close, this implementation would too eagerly
856    // trigger.
857    //
858    // The implementation used here keeps an upper bound of FDs in this context using a counter
859    // hooked into add/delete (which is imprecise because close can also remove FDs without us
860    // knowing). The number of consecutive (no add or delete in between) hangups yielded by wait
861    // calls is counted and compared to the upper bound. If the upper bound is exceeded by the
862    // consecutive hangups, the implementation triggers the check and logs.
863    //
864    // This implementation has false negatives because the upper bound can be completely too high,
865    // in the worst case caused by only using close instead of delete. However, this method has the
866    // advantage of always triggering eventually genuine busy loop cases, requires no dynamic
867    // allocations, is fast and constant time to compute, and has no false positives.
868    fn check_for_hungup_busy_loop(&self, new_hangups: usize) {
869        let old_hangups = self.hangups.get();
870        let max_hangups = self.max_hangups.get();
871        if old_hangups <= max_hangups && old_hangups + new_hangups > max_hangups {
872            let mut buf = [0u8; 512];
873            let (res, len) = {
874                let mut buf_cursor = Cursor::new(&mut buf[..]);
875                // Oops, clippy bug. See https://github.com/rust-lang/rust-clippy/issues/9810
876                #[allow(clippy::write_literal)]
877                (
878                    writeln!(
879                        &mut buf_cursor,
880                        "[{}:{}] busy poll wait loop with hungup FDs detected on thread {}\n",
881                        file!(),
882                        line!(),
883                        thread::current().name().unwrap_or("")
884                    ),
885                    buf_cursor.position() as usize,
886                )
887            };
888
889            if res.is_ok() {
890                let _ = stderr().write_all(&buf[..len]);
891            }
892            // This panic is helpful for tests of this functionality.
893            #[cfg(test)]
894            panic!("hungup busy loop detected");
895        }
896        self.hangups.set(old_hangups + new_hangups);
897    }
898
899    /// Waits for any events to occur in FDs that were previously added to this context.
900    ///
901    /// The events are level-triggered, meaning that if any events are unhandled (i.e. not reading
902    /// for readable events and not closing for hungup events), subsequent calls to `wait` will
903    /// return immediately. The consequence of not handling an event perpetually while calling
904    /// `wait` is that the callers loop will degenerated to busy loop polling, pinning a CPU to
905    /// ~100% usage.
906    ///
907    /// # Panics
908    /// Panics if the returned `PollEvents` structure is not dropped before subsequent `wait` calls.
909    pub fn wait(&self) -> Result<PollEvents<'_, T>> {
910        self.wait_timeout(Duration::new(i64::MAX as u64, 0))
911    }
912
913    /// Like [`wait`](struct.EpollContext.html#method.wait) except will only block for a
914    /// maximum of the given `timeout`.
915    ///
916    /// This may return earlier than `timeout` with zero events if the duration indicated exceeds
917    /// system limits.
918    ///
919    /// # Arguments
920    ///
921    /// * `timeout`: specify the time that will block.
922    pub fn wait_timeout(&self, timeout: Duration) -> Result<PollEvents<'_, T>> {
923        let events = self.epoll_ctx.wait_timeout(&self.events, timeout)?;
924        let hangups = events.iter_hungup().count();
925        if self.check_for_hangup {
926            self.check_for_hungup_busy_loop(hangups);
927        }
928        Ok(events)
929    }
930}
931
932impl<T: PollToken> AsRawFd for PollContext<T> {
933    fn as_raw_fd(&self) -> RawFd {
934        self.epoll_ctx.as_raw_fd()
935    }
936}
937
938impl<T: PollToken> IntoRawFd for PollContext<T> {
939    fn into_raw_fd(self) -> RawFd {
940        self.epoll_ctx.into_raw_fd()
941    }
942}
943
944#[cfg(test)]
945mod tests {
946    use super::*;
947    use crate::eventfd::EventFd;
948    use std::os::unix::net::UnixStream;
949    use std::time::Instant;
950
951    #[test]
952    fn test_poll_context() {
953        let evt1 = EventFd::new(0).unwrap();
954        let evt2 = EventFd::new(0).unwrap();
955        evt1.write(1).unwrap();
956        evt2.write(1).unwrap();
957        let ctx: PollContext<u32> = PollContext::new().unwrap();
958        ctx.add(&evt1, 1).unwrap();
959        ctx.add(&evt2, 2).unwrap();
960
961        let mut evt_count = 0;
962        while evt_count < 2 {
963            for event in ctx.wait().unwrap().iter_readable() {
964                evt_count += 1;
965                match event.token() {
966                    1 => {
967                        evt1.read().unwrap();
968                        ctx.delete(&evt1).unwrap();
969                    }
970                    2 => {
971                        evt2.read().unwrap();
972                        ctx.delete(&evt2).unwrap();
973                    }
974                    _ => panic!("unexpected token"),
975                };
976            }
977        }
978        assert_eq!(evt_count, 2);
979    }
980
981    #[test]
982    fn test_poll_context_overflow() {
983        const EVT_COUNT: usize = POLL_CONTEXT_MAX_EVENTS * 2 + 1;
984        let ctx: PollContext<usize> = PollContext::new().unwrap();
985        let mut evts = Vec::with_capacity(EVT_COUNT);
986        for i in 0..EVT_COUNT {
987            let evt = EventFd::new(0).unwrap();
988            evt.write(1).unwrap();
989            ctx.add(&evt, i).unwrap();
990            evts.push(evt);
991        }
992        let mut evt_count = 0;
993        while evt_count < EVT_COUNT {
994            for event in ctx.wait().unwrap().iter_readable() {
995                evts[event.token()].read().unwrap();
996                evt_count += 1;
997            }
998        }
999    }
1000
1001    #[test]
1002    #[should_panic]
1003    fn test_poll_context_hungup() {
1004        let (s1, s2) = UnixStream::pair().unwrap();
1005        let ctx: PollContext<u32> = PollContext::new().unwrap();
1006        ctx.add(&s1, 1).unwrap();
1007
1008        // Causes s1 to receive hangup events, which we purposefully ignore to trip the detection
1009        // logic in `PollContext`.
1010        drop(s2);
1011
1012        // Should easily panic within this many iterations.
1013        for _ in 0..1000 {
1014            ctx.wait().unwrap();
1015        }
1016    }
1017
1018    #[test]
1019    fn test_poll_context_timeout() {
1020        let mut ctx: PollContext<u32> = PollContext::new().unwrap();
1021        let dur = Duration::from_millis(10);
1022        let start_inst = Instant::now();
1023
1024        ctx.set_check_for_hangup(false);
1025        ctx.wait_timeout(dur).unwrap();
1026        assert!(start_inst.elapsed() >= dur);
1027    }
1028
1029    #[test]
1030    fn test_poll_event() {
1031        let event = epoll_event {
1032            events: (EPOLLIN | EPOLLERR | EPOLLOUT | EPOLLHUP) as u32,
1033            u64: 0x10,
1034        };
1035        let ev = PollEvent::<u32> {
1036            event: &event,
1037            token: PhantomData,
1038        };
1039
1040        assert_eq!(ev.token(), 0x10);
1041        assert!(ev.readable());
1042        assert!(ev.writable());
1043        assert!(ev.hungup());
1044        assert!(ev.has_error());
1045        assert_eq!(
1046            ev.raw_events(),
1047            (EPOLLIN | EPOLLERR | EPOLLOUT | EPOLLHUP) as u32
1048        );
1049    }
1050}